├── .actor ├── Dockerfile ├── README.md ├── actor.json ├── bin │ └── actor.sh ├── dataset_schema.json └── input_schema.json ├── .dockerignore ├── .github ├── FUNDING.yml └── workflows │ ├── build_gnu_linux.yml │ ├── build_macos.yml │ ├── build_windows.yml │ ├── cd.yml │ ├── ci-netbsd.yml │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── assets └── icon │ ├── icon.blend │ └── icon.png ├── dist └── run-in-container.sh ├── monolith.nuspec ├── snap └── snapcraft.yaml ├── src ├── cache.rs ├── cookies.rs ├── core.rs ├── css.rs ├── gui.rs ├── html.rs ├── js.rs ├── lib.rs ├── main.rs ├── session.rs └── url.rs └── tests ├── _data_ ├── basic │ ├── local-file.html │ ├── local-script.js │ └── local-style.css ├── css │ ├── index.html │ └── style.css ├── import-css-via-data-url │ ├── index.html │ └── style.css ├── integrity │ ├── index.html │ ├── script.js │ └── style.css ├── noscript │ ├── image.svg │ ├── index.html │ ├── nested.html │ └── script.html ├── svg │ ├── icons.svg │ ├── image.html │ ├── image.svg │ ├── index.html │ └── svg.html └── unusual_encodings │ ├── gb2312.html │ └── iso-8859-1.html ├── cli ├── base_url.rs ├── basic.rs ├── data_url.rs ├── local_files.rs ├── mod.rs ├── noscript.rs └── unusual_encodings.rs ├── cookies ├── cookie │ ├── is_expired.rs │ ├── matches_url.rs │ └── mod.rs ├── mod.rs └── parse_cookie_file_contents.rs ├── core ├── detect_media_type.rs ├── format_output_path.rs ├── mod.rs ├── options.rs └── parse_content_type.rs ├── css ├── embed_css.rs ├── is_image_url_prop.rs └── mod.rs ├── html ├── add_favicon.rs ├── check_integrity.rs ├── compose_csp.rs ├── create_metadata_tag.rs ├── embed_srcset.rs ├── get_base_url.rs ├── get_charset.rs ├── get_node_attr.rs ├── get_node_name.rs ├── has_favicon.rs ├── is_favicon.rs ├── mod.rs ├── parse_link_type.rs ├── parse_srcset.rs ├── serialize_document.rs ├── set_node_attr.rs └── walk.rs ├── js ├── attr_is_event_handler.rs └── mod.rs ├── mod.rs ├── session ├── mod.rs └── retrieve_asset.rs └── url ├── clean_url.rs ├── create_data_url.rs ├── domain_is_within_domain.rs ├── get_referer_url.rs ├── is_url_and_has_protocol.rs ├── mod.rs ├── parse_data_url.rs └── resolve_url.rs /.actor/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:alpine 2 | 3 | RUN apk --no-cache add curl bash git monolith jq 4 | RUN npm -g install apify-cli 5 | COPY .actor .actor 6 | CMD ./.actor/bin/actor.sh 7 | -------------------------------------------------------------------------------- /.actor/README.md: -------------------------------------------------------------------------------- 1 | # Monolith Actor on Apify 2 | 3 | [![Monolith Actor](https://apify.com/actor-badge?actor=snshn/monolith)](https://apify.com/snshn/monolith?fpr=snshn) 4 | 5 | This Actor wraps [Monolith](https://crates.io/crates/monolith) to crawl a web page URL and bundle the entire content in a single HTML file, without installing and running the tool locally. 6 | 7 | ## What are Actors? 8 | [Actors](https://docs.apify.com/platform/actors?fpr=snshn) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=snshn). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=snshn) and can be found in the [Apify Store](https://apify.com/store?fpr=snshn). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=snshn). 9 | 10 | ## Usage 11 | 12 | ### Apify Console 13 | 14 | 1. Go to the Apify Actor page 15 | 2. Click "Run" 16 | 3. In the input form, fill in **URL(s)** to crawl and bundle 17 | 4. The Actor will run and : 18 | - save the bundled HTML files in the run's default key-value store 19 | - save the links to the KVS with original URL and monolith process exit status to the dataset 20 | 21 | 22 | ### Apify CLI 23 | 24 | ```bash 25 | apify call snshn/monolith --input='{ 26 | "urls": ["https://news.ycombinator.com/"] 27 | }' 28 | ``` 29 | 30 | ### Using Apify API 31 | 32 | ```bash 33 | curl --request POST \ 34 | --url "https://api.apify.com/v2/acts/snshn~monolith/run" \ 35 | --header 'Content-Type: application/json' \ 36 | --header 'Authorization: Bearer YOUR_API_TOKEN' \ 37 | --data '{ 38 | "urls": ["https://news.ycombinator.com/"], 39 | } 40 | }' 41 | ``` 42 | 43 | ## Input Parameters 44 | 45 | The Actor accepts a JSON schema with the following structure: 46 | 47 | | Field | Type | Required | Default | Description | 48 | |-------|------|----------|---------|-------------| 49 | | `urls` | array | Yes | - | List of URLs to monolith | 50 | | `urls[]` | string | Yes | - | URL to monolith | 51 | 52 | 53 | ### Example Input 54 | 55 | ```json 56 | { 57 | "urls": ["https://news.ycombinator.com/"], 58 | } 59 | ``` 60 | 61 | ## Output 62 | 63 | The Actor provides three types of outputs: 64 | 65 | ### Dataset Record 66 | 67 | | Field | Type | Required | Description | 68 | |-------|------|----------|-------------| 69 | | `url` | string | Yes | A link to the Apify key-value store object where the monolithic html is available for download | 70 | | `kvsUrl` | array | Yes | Exit status of the monolith process | 71 | | `status`| number | No | The original start URL for the monolith process | 72 | 73 | ### Example Dataset Item (JSON) 74 | 75 | ```json 76 | { 77 | "url": "https://news.ycombinator.com/", 78 | "kvsUrl": "https://api.apify.com/v2/key-value-stores/JRFLHRy9DOtdKGpdm/records/https___news.ycombinator.com_", 79 | "status": "0" 80 | } 81 | ``` 82 | 83 | ## Performance & Resources 84 | 85 | - **Memory Requirements**: 86 | - Minimum: 4168 MB RAM 87 | - **Processing Time**: 88 | - 30s per compex page like [bbc.co.uk](https://bbc.co.uk) 89 | 90 | 91 | For more help, check the [Monolith Project documentation](https://github.com/Y2Z/monolith) or raise an issue in the [Actor page detail](https://apify.com/snshn/monolith?fpr=snshn) on Apify. 92 | 93 | 94 | -------------------------------------------------------------------------------- /.actor/actor.json: -------------------------------------------------------------------------------- 1 | { 2 | "actorSpecification": 1, 3 | "name": "monolith", 4 | "version": "0.0", 5 | "buildTag": "latest", 6 | "environmentVariables": {}, 7 | "dockerFile": "./Dockerfile", 8 | "dockerContext": "../", 9 | "input": "./input_schema.json", 10 | "storages": { 11 | "dataset": "./dataset_schema.json" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /.actor/bin/actor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #pwd 3 | #find ./storage 4 | apify actor:get-input > /dev/null 5 | INPUT=`apify actor:get-input | jq -r .urls[] | xargs echo` 6 | echo "INPUT: $INPUT" 7 | 8 | for url in $INPUT; do 9 | # support for local usage 10 | # sanitize url to a safe *nix filename - replace nonalfanumerical characters 11 | # https://stackoverflow.com/questions/9847288/is-it-possible-to-use-in-a-filename 12 | # https://serverfault.com/questions/348482/how-to-remove-invalid-characters-from-filenames 13 | safe_filename=`echo $url | sed -e 's/[^A-Za-z0-9._-]/_/g'` 14 | echo "Monolith-ing $url to key $safe_filename" 15 | monolith $url | apify actor:set-value "$safe_filename" --contentType=text/html 16 | kvs_url="https://api.apify.com/v2/key-value-stores/${APIFY_DEFAULT_KEY_VALUE_STORE_ID}/records/${safe_filename}" 17 | result=$? 18 | echo "Pushing result item to the datastore" 19 | echo "{\"url\":\"${url}\",\"status\":\"${result}\", \"kvsUrl\":\"${kvs_url}\"}" | apify actor:push-data 20 | done 21 | 22 | exit 0 23 | -------------------------------------------------------------------------------- /.actor/dataset_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "actorSpecification": 1, 3 | "fields":{ 4 | "title": "Sherlock actor input", 5 | "description": "This is actor input schema", 6 | "type": "object", 7 | "schemaVersion": 1, 8 | "properties": { 9 | "kvsUrl": { 10 | "title": "Object URL", 11 | "type": "string", 12 | "description": "A link to the Apify key-value store object where the monolithic html is available" 13 | }, 14 | "status": { 15 | "title": "Exist status", 16 | "type": "string", 17 | "description": "Exit status of the monolith process" 18 | }, 19 | "url": { 20 | "title": "URL", 21 | "type": "string", 22 | "description": "The original start URL for the monolith process " 23 | } 24 | 25 | }, 26 | "required": [ 27 | "kvsUrl", 28 | "status", 29 | "url" 30 | ] 31 | }, 32 | "views": { 33 | "overview": { 34 | "title": "Overview", 35 | "transformation": { 36 | "fields": [ 37 | "url", 38 | "kvsUrl", 39 | "status" 40 | ], 41 | }, 42 | "display": { 43 | "component": "table", 44 | "url": { 45 | "label": "Page URL" 46 | }, 47 | "kvsUrl": { 48 | "label": "KVS URL" 49 | }, 50 | "status": { 51 | "label": "Status" 52 | } 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /.actor/input_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Sherlock actor input", 3 | "description": "This is actor input schema", 4 | "type": "object", 5 | "schemaVersion": 1, 6 | "properties": { 7 | "urls": { 8 | "title": "Urls", 9 | "type": "array", 10 | "description": "A list of urls of pages to bundle into single HTML document", 11 | "editor": "stringList", 12 | "prefill": ["http://www.google.com"] 13 | } 14 | }, 15 | "required": [ 16 | "urls" 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: snshn 4 | -------------------------------------------------------------------------------- /.github/workflows/build_gnu_linux.yml: -------------------------------------------------------------------------------- 1 | name: GNU/Linux 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'assets/' 8 | - 'dist/' 9 | - 'snap/' 10 | - 'Dockerfile' 11 | - 'LICENSE' 12 | - 'Makefile' 13 | - 'monolith.nuspec' 14 | - 'README.md' 15 | 16 | jobs: 17 | build: 18 | 19 | strategy: 20 | matrix: 21 | os: 22 | - ubuntu-latest 23 | rust: 24 | - stable 25 | runs-on: ${{ matrix.os }} 26 | 27 | steps: 28 | - run: git config --global core.autocrlf false 29 | 30 | - uses: actions/checkout@v2 31 | 32 | - name: Build 33 | run: cargo build --all --locked --verbose 34 | -------------------------------------------------------------------------------- /.github/workflows/build_macos.yml: -------------------------------------------------------------------------------- 1 | name: macOS 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'assets/' 8 | - 'dist/' 9 | - 'snap/' 10 | - 'Dockerfile' 11 | - 'LICENSE' 12 | - 'Makefile' 13 | - 'monolith.nuspec' 14 | - 'README.md' 15 | 16 | jobs: 17 | build: 18 | 19 | strategy: 20 | matrix: 21 | os: 22 | - macos-latest 23 | rust: 24 | - stable 25 | runs-on: ${{ matrix.os }} 26 | 27 | steps: 28 | - run: git config --global core.autocrlf false 29 | 30 | - uses: actions/checkout@v2 31 | 32 | - name: Build 33 | run: cargo build --all --locked --verbose 34 | -------------------------------------------------------------------------------- /.github/workflows/build_windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'assets/' 8 | - 'dist/' 9 | - 'snap/' 10 | - 'Dockerfile' 11 | - 'LICENSE' 12 | - 'Makefile' 13 | - 'monolith.nuspec' 14 | - 'README.md' 15 | 16 | jobs: 17 | build: 18 | 19 | strategy: 20 | matrix: 21 | os: 22 | - windows-latest 23 | rust: 24 | - stable 25 | runs-on: ${{ matrix.os }} 26 | 27 | steps: 28 | - run: git config --global core.autocrlf false 29 | 30 | - uses: actions/checkout@v2 31 | 32 | - name: Build 33 | run: cargo build --all --locked --verbose 34 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | # CD GitHub Actions workflow for monolith 2 | 3 | name: CD 4 | 5 | on: 6 | release: 7 | types: 8 | - created 9 | 10 | jobs: 11 | 12 | gnu_linux_aarch64: 13 | runs-on: ubuntu-20.04 14 | steps: 15 | - name: Checkout the repository 16 | uses: actions/checkout@v4 17 | 18 | - name: Prepare cross-platform environment 19 | run: | 20 | sudo mkdir /cross-build 21 | sudo touch /etc/apt/sources.list.d/arm64.list 22 | echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/arm64.list 23 | sudo apt-get update 24 | sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross 25 | sudo apt-get download libssl1.1:arm64 libssl-dev:arm64 26 | sudo dpkg -x libssl1.1*.deb /cross-build 27 | sudo dpkg -x libssl-dev*.deb /cross-build 28 | rustup target add aarch64-unknown-linux-gnu 29 | echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV 30 | echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV 31 | echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV 32 | echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV 33 | echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV 34 | 35 | - name: Build the executable 36 | run: cargo build --release --target=aarch64-unknown-linux-gnu --no-default-features --features cli 37 | 38 | - name: Attach artifact to the release 39 | uses: Shopify/upload-to-release@v2.0.0 40 | with: 41 | name: monolith-gnu-linux-aarch64 42 | path: target/aarch64-unknown-linux-gnu/release/monolith 43 | repo-token: ${{ secrets.GITHUB_TOKEN }} 44 | 45 | gnu_linux_armhf: 46 | runs-on: ubuntu-20.04 47 | steps: 48 | - name: Checkout the repository 49 | uses: actions/checkout@v4 50 | 51 | - name: Prepare cross-platform environment 52 | run: | 53 | sudo mkdir /cross-build 54 | sudo touch /etc/apt/sources.list.d/armhf.list 55 | echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/armhf.list 56 | sudo apt-get update 57 | sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross 58 | sudo apt-get download libssl1.1:armhf libssl-dev:armhf 59 | sudo dpkg -x libssl1.1*.deb /cross-build 60 | sudo dpkg -x libssl-dev*.deb /cross-build 61 | rustup target add arm-unknown-linux-gnueabihf 62 | echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV 63 | echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV 64 | echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV 65 | echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV 66 | echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV 67 | 68 | - name: Build the executable 69 | run: cargo build --release --target=arm-unknown-linux-gnueabihf --no-default-features --features cli 70 | 71 | - name: Attach artifact to the release 72 | uses: Shopify/upload-to-release@v2.0.0 73 | with: 74 | name: monolith-gnu-linux-armhf 75 | path: target/arm-unknown-linux-gnueabihf/release/monolith 76 | repo-token: ${{ secrets.GITHUB_TOKEN }} 77 | 78 | gnu_linux_x86_64: 79 | runs-on: ubuntu-20.04 80 | steps: 81 | - name: Checkout the repository 82 | uses: actions/checkout@v4 83 | 84 | - name: Build the executable 85 | run: cargo build --release 86 | 87 | - uses: Shopify/upload-to-release@v2.0.0 88 | with: 89 | name: monolith-gnu-linux-x86_64 90 | path: target/release/monolith 91 | repo-token: ${{ secrets.GITHUB_TOKEN }} 92 | 93 | windows: 94 | runs-on: windows-2019 95 | steps: 96 | - run: git config --global core.autocrlf false 97 | 98 | - name: Checkout the repository 99 | uses: actions/checkout@v4 100 | 101 | - name: Build the executable 102 | run: cargo build --release 103 | 104 | - uses: Shopify/upload-to-release@v2.0.0 105 | with: 106 | name: monolith.exe 107 | path: target\release\monolith.exe 108 | repo-token: ${{ secrets.GITHUB_TOKEN }} 109 | -------------------------------------------------------------------------------- /.github/workflows/ci-netbsd.yml: -------------------------------------------------------------------------------- 1 | # CI NetBSD GitHub Actions workflow for monolith 2 | 3 | name: CI (NetBSD) 4 | 5 | on: 6 | pull_request: 7 | branches: [ master ] 8 | paths-ignore: 9 | - 'assets/' 10 | - 'dist/' 11 | - 'snap/' 12 | - 'Dockerfile' 13 | - 'LICENSE' 14 | - 'Makefile' 15 | - 'monolith.nuspec' 16 | - 'README.md' 17 | 18 | jobs: 19 | build_and_test: 20 | runs-on: ubuntu-latest 21 | name: Build and test (netbsd) 22 | steps: 23 | - name: "Checkout repository" 24 | uses: actions/checkout@v4 25 | 26 | - name: Test in NetBSD 27 | uses: vmactions/netbsd-vm@v1 28 | with: 29 | usesh: true 30 | prepare: | 31 | /usr/sbin/pkg_add cwrappers gmake mktools pkgconf rust 32 | run: | 33 | cargo build --all --locked --verbose --no-default-features --features cli 34 | cargo test --all --locked --verbose --no-default-features --features cli 35 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # CI GitHub Actions workflow for monolith 2 | 3 | name: CI 4 | 5 | on: 6 | pull_request: 7 | branches: [ master ] 8 | paths-ignore: 9 | - 'assets/' 10 | - 'dist/' 11 | - 'snap/' 12 | - 'Dockerfile' 13 | - 'LICENSE' 14 | - 'Makefile' 15 | - 'monolith.nuspec' 16 | - 'README.md' 17 | 18 | jobs: 19 | build_and_test: 20 | name: Build and test 21 | strategy: 22 | matrix: 23 | os: 24 | - ubuntu-latest 25 | - macos-latest 26 | - windows-latest 27 | runs-on: ${{ matrix.os }} 28 | steps: 29 | - run: git config --global core.autocrlf false 30 | 31 | - name: "Checkout repository" 32 | uses: actions/checkout@v4 33 | 34 | - name: Build 35 | run: cargo build --all --locked --verbose 36 | 37 | - name: Run tests 38 | run: cargo test --all --locked --verbose 39 | 40 | - name: Check code formatting 41 | run: | 42 | rustup component add rustfmt 43 | cargo fmt --all -- --check 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # These are backup files generated by rustfmt 6 | **/*.rs.bk 7 | 8 | # Added by Apify CLI 9 | storage 10 | node_modules 11 | .venv 12 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "monolith" 3 | version = "2.11.0" 4 | authors = [ 5 | "Sunshine ", 6 | "Mahdi Robatipoor ", 7 | "Emmanuel Delaborde ", 8 | "Emi Simpson ", 9 | "rhysd ", 10 | "Andriy Rakhnin ", 11 | ] 12 | edition = "2021" 13 | description = "CLI tool and library for saving web pages as a single HTML file" 14 | homepage = "https://github.com/Y2Z/monolith" 15 | repository = "https://github.com/Y2Z/monolith" 16 | readme = "README.md" 17 | keywords = ["web", "http", "html", "download", "command-line"] 18 | categories = ["command-line-utilities", "web-programming"] 19 | include = ["src/*.rs", "Cargo.toml"] 20 | license = "CC0-1.0" 21 | 22 | [dependencies] 23 | atty = "=0.2.14" # Used for highlighting network errors 24 | base64 = "=0.22.1" # Used for integrity attributes 25 | chrono = "=0.4.41" # Used for formatting timestamps 26 | clap = { version = "=4.5.37", features = [ 27 | "derive", 28 | ], optional = true } # Used for processing CLI arguments 29 | cssparser = "=0.35.0" # Used for dealing with CSS 30 | directories = { version = "=6.0.0", optional = true } # Used for GUI 31 | druid = { version = "=0.8.3", optional = true } # Used for GUI 32 | encoding_rs = "=0.8.35" # Used for parsing and converting document charsets 33 | html5ever = "=0.29.1" # Used for all things DOM 34 | markup5ever_rcdom = "=0.5.0-unofficial" # Used for manipulating DOM 35 | percent-encoding = "=2.3.1" # Used for encoding URLs 36 | sha2 = "=0.10.9" # Used for calculating checksums during integrity checks 37 | redb = "=2.4.0" # Used for on-disk caching of remote assets 38 | tempfile = { version = "=3.19.1", optional = true } # Used for on-disk caching of remote assets 39 | url = "=2.5.4" # Used for parsing URLs 40 | openssl = "=0.10.72" # Used for static linking of the OpenSSL library 41 | 42 | # Used for unwrapping NOSCRIPT 43 | [dependencies.regex] 44 | version = "=1.11.1" 45 | default-features = false 46 | features = ["std", "perf-dfa", "unicode-perl"] 47 | 48 | # Used for making network requests 49 | [dependencies.reqwest] 50 | version = "=0.12.15" 51 | default-features = false 52 | features = ["default-tls", "blocking", "gzip", "brotli", "deflate"] 53 | 54 | [dev-dependencies] 55 | assert_cmd = "=2.0.17" 56 | 57 | [features] 58 | default = ["cli", "vendored-openssl"] 59 | cli = ["clap", "tempfile"] # Build a CLI tool that includes main() function 60 | gui = [ 61 | "directories", 62 | "druid", 63 | "tempfile", 64 | ] # Build a GUI executable that includes main() function 65 | vendored-openssl = [ 66 | "openssl/vendored", 67 | ] # Compile and statically link a copy of OpenSSL 68 | 69 | [lib] 70 | name = "monolith" 71 | path = "src/lib.rs" 72 | 73 | [[bin]] 74 | name = "monolith" 75 | path = "src/main.rs" 76 | required-features = ["cli"] 77 | 78 | [[bin]] 79 | name = "monolith-gui" 80 | path = "src/gui.rs" 81 | required-features = ["gui"] 82 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clux/muslrust:stable as builder 2 | 3 | RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \ 4 | | grep "tarball_url.*\"," \ 5 | | cut -d '"' -f 4) 6 | RUN tar xfz monolith.tar.gz \ 7 | && mv Y2Z-monolith-* monolith \ 8 | && rm monolith.tar.gz 9 | 10 | WORKDIR monolith/ 11 | RUN make install 12 | 13 | 14 | FROM alpine 15 | 16 | RUN apk update && \ 17 | apk add --no-cache openssl && \ 18 | rm -rf "/var/cache/apk/*" 19 | 20 | COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith 21 | WORKDIR /tmp 22 | ENTRYPOINT ["/usr/bin/monolith"] 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for monolith 2 | 3 | all: build build-gui 4 | .PHONY: all 5 | 6 | build: 7 | @cargo build --locked 8 | .PHONY: build 9 | 10 | build-gui: 11 | @cargo build --locked --bin monolith-gui --features="gui" 12 | .PHONY: build_gui 13 | 14 | clean: 15 | @cargo clean 16 | .PHONY: clean 17 | 18 | format: 19 | @cargo fmt --all -- 20 | .PHONY: format 21 | 22 | format-check: 23 | @cargo fmt --all -- --check 24 | .PHONY: format 25 | 26 | install: 27 | @cargo install --force --locked --path . 28 | .PHONY: install 29 | 30 | lint: 31 | @cargo clippy --fix --allow-dirty --allow-staged 32 | # @cargo fix --allow-dirty --allow-staged 33 | .PHONY: lint 34 | 35 | lint-check: 36 | @cargo clippy -- 37 | .PHONY: lint_check 38 | 39 | test: build 40 | @cargo test --locked 41 | .PHONY: test 42 | 43 | uninstall: 44 | @cargo uninstall 45 | .PHONY: uninstall 46 | 47 | update-lock-file: 48 | @cargo update 49 | .PHONY: clean 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux) 2 | [![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS) 3 | [![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows) 4 | [![Monolith Actor on Apify](https://apify.com/actor-badge?actor=snshn/monolith)](https://apify.com/snshn/monolith?fpr=snshn) 5 | 6 | 7 | ``` 8 | _____ _____________ __________ ___________________ ___ 9 | | \ / \ | | | | | | 10 | | \/ __ \| __ | | ___ ___ |__| | 11 | | | | | | | | | | | | | 12 | | |\ /| |__| |__| |___| | | | | __ | 13 | | | \__/ | |\ | | | | | | | 14 | |___| |__________| \___________________| |___| |___| |___| 15 | ``` 16 | 17 | A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. 18 | 19 | Unlike the conventional “Save page as”, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share. 20 | 21 | If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available. 22 | 23 | 24 | --------------------------------------------------- 25 | 26 | 27 | ## Installation 28 | 29 | #### Using [Cargo](https://crates.io/crates/monolith) (cross-platform) 30 | 31 | ```console 32 | cargo install monolith 33 | ``` 34 | 35 | #### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux) 36 | 37 | ```console 38 | brew install monolith 39 | ``` 40 | 41 | #### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows) 42 | 43 | ```console 44 | choco install monolith 45 | ``` 46 | 47 | #### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows) 48 | 49 | ```console 50 | scoop install main/monolith 51 | ``` 52 | 53 | #### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows) 54 | 55 | ```console 56 | winget install --id=Y2Z.Monolith -e 57 | ``` 58 | 59 | #### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS) 60 | 61 | ```console 62 | sudo port install monolith 63 | ``` 64 | 65 | #### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux) 66 | 67 | ```console 68 | snap install monolith 69 | ``` 70 | 71 | #### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux) 72 | 73 | ```console 74 | guix install monolith 75 | ``` 76 | 77 | #### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith) 78 | 79 | ```console 80 | nix-env -iA nixpkgs.monolith 81 | ``` 82 | 83 | #### Using [Flox](https://flox.dev) 84 | 85 | ```console 86 | flox install monolith 87 | ``` 88 | 89 | #### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux) 90 | 91 | ```console 92 | pacman -S monolith 93 | ``` 94 | 95 | #### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux) 96 | 97 | ```console 98 | apk add monolith 99 | ``` 100 | 101 | #### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux) 102 | 103 | ```console 104 | xbps-install -S monolith 105 | ``` 106 | 107 | #### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD) 108 | 109 | ```console 110 | pkg install monolith 111 | ``` 112 | 113 | #### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD) 114 | 115 | ```console 116 | cd /usr/ports/www/monolith/ 117 | make install clean 118 | ``` 119 | 120 | #### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc) 121 | 122 | ```console 123 | cd /usr/pkgsrc/www/monolith 124 | make install clean 125 | ``` 126 | 127 | #### Using [containers](https://www.docker.com/) 128 | 129 | ```console 130 | docker build -t y2z/monolith . 131 | sudo install -b dist/run-in-container.sh /usr/local/bin/monolith 132 | ``` 133 | 134 | #### From [source](https://github.com/Y2Z/monolith) 135 | 136 | Dependencies: `libssl`, `cargo` 137 | 138 |
139 | Install cargo (GNU/Linux) 140 | Check if cargo is installed 141 | 142 | ```console 143 | cargo -v 144 | ``` 145 | 146 | If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)): 147 | 148 | ```console 149 | curl https://sh.rustup.rs -sSf | sh 150 | . "$HOME/.cargo/env" 151 | ``` 152 | 153 | Proceed with installing from source: 154 |
155 | 156 | ```console 157 | git clone https://github.com/Y2Z/monolith.git 158 | cd monolith 159 | make install 160 | ``` 161 | 162 | #### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc) 163 | 164 | Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture. 165 | 166 | 167 | --------------------------------------------------- 168 | 169 | 170 | ## Usage 171 | 172 | ```console 173 | monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o %title%.%timestamp%.html 174 | ``` 175 | 176 | ```console 177 | cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html 178 | ``` 179 | 180 | 181 | --------------------------------------------------- 182 | 183 | 184 | ## Options 185 | 186 | - `-a`: Exclude audio sources 187 | - `-b`: Use `custom base URL` 188 | - `-B`: Forbid retrieving assets from specified domain(s) 189 | - `-c`: Exclude CSS 190 | - `-C`: Read cookies from `file` 191 | - `-d`: Allow retrieving assets only from specified `domain(s)` 192 | - `-e`: Ignore network errors 193 | - `-E`: Save document using `custom encoding` 194 | - `-f`: Omit frames 195 | - `-F`: Exclude web fonts 196 | - `-h`: Print help information 197 | - `-i`: Remove images 198 | - `-I`: Isolate the document 199 | - `-j`: Exclude JavaScript 200 | - `-k`: Accept invalid X.509 (TLS) certificates 201 | - `-m`: Output in MHTML format instead of HTML 202 | - `-M`: Don't add timestamp and URL information 203 | - `-n`: Extract contents of NOSCRIPT elements 204 | - `-o`: Write output to `file` (use “-” for STDOUT) 205 | - `-q`: Be quiet 206 | - `-t`: Adjust `network request timeout` 207 | - `-u`: Provide `custom User-Agent` 208 | - `-v`: Exclude videos 209 | - `-V`: Print version number 210 | 211 | 212 | --------------------------------------------------- 213 | 214 | 215 | ## Whitelisting and blacklisting domains 216 | 217 | Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.: 218 | 219 | ```console 220 | monolith -I -d example.com -d www.example.com https://example.com -o example-only.html 221 | ``` 222 | 223 | ```console 224 | monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html 225 | ``` 226 | 227 | 228 | --------------------------------------------------- 229 | 230 | 231 | ## Dynamic content 232 | 233 | Monolith doesn't feature a JavaScript engine, hence websites that retrieve and display data after initial load may require usage of additional tools. 234 | 235 | For example, Chromium (Chrome) can be used to act as a pre-processor for such pages: 236 | 237 | ```console 238 | chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html 239 | ``` 240 | 241 | 242 | --------------------------------------------------- 243 | 244 | 245 | ## Authentication 246 | 247 | ```console 248 | monolith https://username:password@example.com -o example-basic-auth.html 249 | ``` 250 | 251 | 252 | --------------------------------------------------- 253 | 254 | 255 | ## Proxies 256 | 257 | Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables. 258 | 259 | 260 | --------------------------------------------------- 261 | 262 | ### Apify Actor Usage 263 | 264 | Run Monolith Actor on Apify 265 | 266 | You can run Monolith in the cloud without installation using the [Monolith Actor](https://apify.com/snshn/monolith?fpr=snshn) on [Apify](https://apify.com?fpr=snshn) free of charge. 267 | 268 | ``` bash 269 | echo '{"urls": ["https://news.ycombinator.com/"]}' | apify call -so snshn/monolith 270 | [{ 271 | "url": "https://news.ycombinator.com/", 272 | "status": "0", 273 | "kvsUrl": "https://api.apify.com/v2/key-value-stores/of9xNgvpon4elPLbc/records/https___news.ycombinator.com_" 274 | }] 275 | ``` 276 | 277 | Read more about the [Monolith Actor](.actor/README.md), including how to use it via the Apify UI, API and CLI without installation. 278 | 279 | --------------------------------------------------- 280 | 281 | 282 | ## Contributing 283 | 284 | Please open an issue if something is wrong, that helps make this project better. 285 | 286 | 287 | --------------------------------------------------- 288 | 289 | 290 | ## License 291 | 292 | To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide. 293 | This software is distributed without any warranty. 294 | -------------------------------------------------------------------------------- /assets/icon/icon.blend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Y2Z/monolith/7fed227bb00a15d94f2cca8fb87a7018b40d7dc0/assets/icon/icon.blend -------------------------------------------------------------------------------- /assets/icon/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Y2Z/monolith/7fed227bb00a15d94f2cca8fb87a7018b40d7dc0/assets/icon/icon.png -------------------------------------------------------------------------------- /dist/run-in-container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DOCKER=docker 4 | if which podman 2>&1 > /dev/null; then 5 | DOCKER=podman 6 | fi 7 | ORG_NAME=y2z 8 | PROG_NAME=monolith 9 | 10 | $DOCKER run --rm $ORG_NAME/$PROG_NAME "$@" 11 | -------------------------------------------------------------------------------- /monolith.nuspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | monolith 5 | 2.8.1 6 | Monolith 7 | Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd 8 | https://github.com/Y2Z/monolith 9 | https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png 10 | https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE 11 | false 12 | CLI tool for saving complete web pages as a single HTML file 13 | 14 | A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. 15 | 16 | Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share. 17 | 18 | If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available. 19 | 20 | Public Domain 21 | en-US 22 | scraping archiving 23 | https://github.com/Y2Z/monolith/blob/master/README.md 24 | 25 | 26 | -------------------------------------------------------------------------------- /snap/snapcraft.yaml: -------------------------------------------------------------------------------- 1 | name: monolith 2 | base: core18 3 | # Version data defined inside the monolith part below 4 | adopt-info: monolith 5 | summary: Monolith - Save HTML pages with ease 6 | description: | 7 | A data hoarder's dream come true: bundle any web page into a single 8 | HTML file. You can finally replace that gazillion of open tabs with 9 | a gazillion of .html files stored somewhere on your precious little 10 | drive. 11 | Unlike conventional "Save page as…", monolith not only saves the 12 | target document, it embeds CSS, image, and JavaScript assets all 13 | at once, producing a single HTML5 document that is a joy to store 14 | and share. 15 | If compared to saving websites with wget -mpk, monolith embeds 16 | all assets as data URLs and therefore displays the saved page 17 | exactly the same, being completely separated from the Internet. 18 | 19 | confinement: strict 20 | 21 | architectures: 22 | - build-on: amd64 23 | - build-on: arm64 24 | - build-on: armhf 25 | - build-on: i386 26 | - build-on: ppc64el 27 | - build-on: s390x 28 | 29 | parts: 30 | monolith: 31 | plugin: rust 32 | source: . 33 | build-packages: 34 | - libssl-dev 35 | - pkg-config 36 | override-pull: | 37 | snapcraftctl pull 38 | # Determine the current tag 39 | last_committed_tag="$(git describe --tags --abbrev=0)" 40 | last_committed_tag_ver="$(echo ${last_committed_tag} | sed 's/v//')" 41 | # Determine the most recent version in the beta channel in the Snap Store 42 | last_released_tag="$(snap info $SNAPCRAFT_PROJECT_NAME | awk '$1 == "beta:" { print $2 }')" 43 | # If the latest tag from the upstream project has not been released to 44 | # beta, build that tag instead of master. 45 | if [ "${last_committed_tag_ver}" != "${last_released_tag}" ]; then 46 | git fetch 47 | git checkout "${last_committed_tag}" 48 | fi 49 | # set version number of the snap based on what we did above 50 | snapcraftctl set-version $(git describe --tags --abbrev=0) 51 | 52 | apps: 53 | monolith: 54 | command: monolith 55 | plugs: 56 | - home 57 | - network 58 | - removable-media 59 | -------------------------------------------------------------------------------- /src/cache.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::fs::File; 3 | use std::io::{BufWriter, Write}; 4 | use std::path::Path; 5 | 6 | use redb::{Database, Error, TableDefinition}; 7 | 8 | pub struct CacheMetadataItem { 9 | data: Option>, // Asset's blob; used for caching small files or if on-disk database isn't utilized 10 | media_type: Option, // MIME-type, things like "text/plain", "image/png"... 11 | charset: Option, // "UTF-8", "UTF-16"... 12 | } 13 | 14 | // #[derive(Debug)] 15 | pub struct Cache { 16 | min_file_size: usize, // Only use database for assets larger than this size (in bytes), otherwise keep them in RAM 17 | metadata: HashMap, // Dictionary of metadata (and occasionally data [mostly for very small files]) 18 | db: Option, // Pointer to database instance; None if not yet initialized or if failed to initialize 19 | db_ok: Option, // None by default, Some(true) if was able to initialize database, Some (false) if an error occured 20 | db_file_path: Option, // Filesystem path to file used for storing database 21 | } 22 | 23 | const FILE_WRITE_BUF_LEN: usize = 1024 * 100; // On-disk cache file write buffer size (in bytes) 24 | const TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("_"); 25 | 26 | impl Cache { 27 | pub fn new(min_file_size: usize, db_file_path: Option) -> Cache { 28 | let mut cache = Cache { 29 | min_file_size, 30 | metadata: HashMap::new(), 31 | db: None, 32 | db_ok: None, 33 | db_file_path: db_file_path.clone(), 34 | }; 35 | 36 | if db_file_path.is_some() { 37 | // Attempt to initialize on-disk database 38 | match Database::create(Path::new(&db_file_path.unwrap())) { 39 | Ok(db) => { 40 | cache.db = Some(db); 41 | cache.db_ok = Some(true); 42 | cache 43 | } 44 | Err(..) => { 45 | cache.db_ok = Some(false); 46 | cache 47 | } 48 | } 49 | } else { 50 | cache.db_ok = Some(false); 51 | cache 52 | } 53 | } 54 | 55 | pub fn set(&mut self, key: &str, data: &Vec, media_type: String, charset: String) { 56 | let mut cache_metadata_item: CacheMetadataItem = CacheMetadataItem { 57 | data: if self.db_ok.is_some() && self.db_ok.unwrap() { 58 | None 59 | } else { 60 | Some(data.to_owned().to_vec()) 61 | }, 62 | media_type: Some(media_type.to_owned()), 63 | charset: Some(charset), 64 | }; 65 | 66 | if (self.db_ok.is_none() || !self.db_ok.unwrap()) || data.len() <= self.min_file_size { 67 | cache_metadata_item.data = Some(data.to_owned().to_vec()); 68 | } else { 69 | match self.db.as_ref().unwrap().begin_write() { 70 | Ok(write_txn) => { 71 | { 72 | let mut table = write_txn.open_table(TABLE).unwrap(); 73 | table.insert(key, &*data.to_owned()).unwrap(); 74 | } 75 | write_txn.commit().unwrap(); 76 | } 77 | Err(..) => { 78 | // Fall back to caching everything in memory 79 | cache_metadata_item.data = Some(data.to_owned().to_vec()); 80 | } 81 | } 82 | } 83 | 84 | self.metadata 85 | .insert((*key).to_string(), cache_metadata_item); 86 | } 87 | 88 | pub fn get(&self, key: &str) -> Result<(Vec, String, String), Error> { 89 | if self.metadata.contains_key(key) { 90 | let metadata_item = self.metadata.get(key).unwrap(); 91 | 92 | if metadata_item.data.is_some() { 93 | return Ok(( 94 | metadata_item.data.as_ref().unwrap().to_vec(), 95 | metadata_item.media_type.as_ref().expect("").to_string(), 96 | metadata_item.charset.as_ref().expect("").to_string(), 97 | )); 98 | } else if self.db_ok.is_some() && self.db_ok.unwrap() { 99 | let read_txn = self.db.as_ref().unwrap().begin_read()?; 100 | let table = read_txn.open_table(TABLE)?; 101 | let data = table.get(key)?; 102 | let bytes = data.unwrap(); 103 | 104 | return Ok(( 105 | bytes.value().to_vec(), 106 | metadata_item.media_type.as_ref().expect("").to_string(), 107 | metadata_item.charset.as_ref().expect("").to_string(), 108 | )); 109 | } 110 | } 111 | 112 | Err(Error::TransactionInProgress) // XXX 113 | } 114 | 115 | pub fn contains_key(&self, key: &str) -> bool { 116 | self.metadata.contains_key(key) 117 | } 118 | 119 | pub fn destroy_database_file(&mut self) { 120 | if self.db_ok.is_none() || !self.db_ok.unwrap() { 121 | return; 122 | } 123 | 124 | // Destroy database instance (prevents writes into file) 125 | self.db = None; 126 | self.db_ok = Some(false); 127 | 128 | // Wipe database file 129 | if let Some(db_file_path) = self.db_file_path.to_owned() { 130 | // Overwrite file with zeroes 131 | if let Ok(temp_file) = File::options() 132 | .read(true) 133 | .write(true) 134 | .open(db_file_path.clone()) 135 | { 136 | let mut buffer = [0; FILE_WRITE_BUF_LEN]; 137 | let mut remaining_size: usize = temp_file.metadata().unwrap().len() as usize; 138 | let mut writer = BufWriter::new(temp_file); 139 | 140 | while remaining_size > 0 { 141 | let bytes_to_write: usize = if remaining_size < FILE_WRITE_BUF_LEN { 142 | remaining_size 143 | } else { 144 | FILE_WRITE_BUF_LEN 145 | }; 146 | let buffer = &mut buffer[..bytes_to_write]; 147 | writer.write(buffer).unwrap(); 148 | 149 | remaining_size -= bytes_to_write; 150 | } 151 | } 152 | } 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/cookies.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | 3 | use crate::url::Url; 4 | 5 | pub struct Cookie { 6 | pub domain: String, 7 | pub include_subdomains: bool, 8 | pub path: String, 9 | pub https_only: bool, 10 | pub expires: u64, 11 | pub name: String, 12 | pub value: String, 13 | } 14 | 15 | #[derive(Debug)] 16 | pub enum CookieFileContentsParseError { 17 | InvalidHeader, 18 | } 19 | 20 | impl Cookie { 21 | pub fn is_expired(&self) -> bool { 22 | if self.expires == 0 { 23 | return false; // Session, never expires 24 | } 25 | 26 | let start = SystemTime::now(); 27 | let since_the_epoch = start 28 | .duration_since(UNIX_EPOCH) 29 | .expect("Time went backwards"); 30 | 31 | self.expires < since_the_epoch.as_secs() 32 | } 33 | 34 | pub fn matches_url(&self, url: &str) -> bool { 35 | match Url::parse(url) { 36 | Ok(url) => { 37 | // Check protocol scheme 38 | match url.scheme() { 39 | "http" => { 40 | if self.https_only { 41 | return false; 42 | } 43 | } 44 | "https" => {} 45 | _ => { 46 | // Should never match URLs of protocols other than HTTP(S) 47 | return false; 48 | } 49 | } 50 | 51 | // Check host 52 | if let Some(url_host) = url.host_str() { 53 | if self.domain.starts_with(".") && self.include_subdomains { 54 | if !url_host.to_lowercase().ends_with(&self.domain) 55 | && !url_host 56 | .eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1]) 57 | { 58 | return false; 59 | } 60 | } else if !url_host.eq_ignore_ascii_case(&self.domain) { 61 | return false; 62 | } 63 | } else { 64 | return false; 65 | } 66 | 67 | // Check path 68 | if !url.path().eq_ignore_ascii_case(&self.path) 69 | && !url.path().starts_with(&self.path) 70 | { 71 | return false; 72 | } 73 | } 74 | Err(_) => { 75 | return false; 76 | } 77 | } 78 | 79 | true 80 | } 81 | } 82 | 83 | pub fn parse_cookie_file_contents( 84 | cookie_file_contents: &str, 85 | ) -> Result, CookieFileContentsParseError> { 86 | let mut cookies: Vec = Vec::new(); 87 | 88 | for (i, line) in cookie_file_contents.lines().enumerate() { 89 | if i == 0 { 90 | // Parsing first line 91 | if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") { 92 | return Err(CookieFileContentsParseError::InvalidHeader); 93 | } 94 | } else { 95 | // Ignore comment lines 96 | if line.starts_with("#") { 97 | continue; 98 | } 99 | 100 | // Attempt to parse values 101 | let mut fields = line.split("\t"); 102 | if fields.clone().count() != 7 { 103 | continue; 104 | } 105 | cookies.push(Cookie { 106 | domain: fields.next().unwrap().to_string().to_lowercase(), 107 | include_subdomains: fields.next().unwrap() == "TRUE", 108 | path: fields.next().unwrap().to_string(), 109 | https_only: fields.next().unwrap() == "TRUE", 110 | expires: fields.next().unwrap().parse::().unwrap(), 111 | name: fields.next().unwrap().to_string(), 112 | value: fields.next().unwrap().to_string(), 113 | }); 114 | } 115 | } 116 | 117 | Ok(cookies) 118 | } 119 | -------------------------------------------------------------------------------- /src/js.rs: -------------------------------------------------------------------------------- 1 | const JS_DOM_EVENT_ATTRS: &[&str] = &[ 2 | // From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects": 3 | // https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects 4 | // https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes") 5 | 6 | // Global event handlers 7 | "onabort", 8 | "onauxclick", 9 | "onblur", 10 | "oncancel", 11 | "oncanplay", 12 | "oncanplaythrough", 13 | "onchange", 14 | "onclick", 15 | "onclose", 16 | "oncontextmenu", 17 | "oncuechange", 18 | "ondblclick", 19 | "ondrag", 20 | "ondragend", 21 | "ondragenter", 22 | "ondragexit", 23 | "ondragleave", 24 | "ondragover", 25 | "ondragstart", 26 | "ondrop", 27 | "ondurationchange", 28 | "onemptied", 29 | "onended", 30 | "onerror", 31 | "onfocus", 32 | "onformdata", 33 | "oninput", 34 | "oninvalid", 35 | "onkeydown", 36 | "onkeypress", 37 | "onkeyup", 38 | "onload", 39 | "onloadeddata", 40 | "onloadedmetadata", 41 | "onloadstart", 42 | "onmousedown", 43 | "onmouseenter", 44 | "onmouseleave", 45 | "onmousemove", 46 | "onmouseout", 47 | "onmouseover", 48 | "onmouseup", 49 | "onwheel", 50 | "onpause", 51 | "onplay", 52 | "onplaying", 53 | "onprogress", 54 | "onratechange", 55 | "onreset", 56 | "onresize", 57 | "onscroll", 58 | "onsecuritypolicyviolation", 59 | "onseeked", 60 | "onseeking", 61 | "onselect", 62 | "onslotchange", 63 | "onstalled", 64 | "onsubmit", 65 | "onsuspend", 66 | "ontimeupdate", 67 | "ontoggle", 68 | "onvolumechange", 69 | "onwaiting", 70 | "onwebkitanimationend", 71 | "onwebkitanimationiteration", 72 | "onwebkitanimationstart", 73 | "onwebkittransitionend", 74 | // Event handlers for and elements 75 | "onafterprint", 76 | "onbeforeprint", 77 | "onbeforeunload", 78 | "onhashchange", 79 | "onlanguagechange", 80 | "onmessage", 81 | "onmessageerror", 82 | "onoffline", 83 | "ononline", 84 | "onpagehide", 85 | "onpageshow", 86 | "onpopstate", 87 | "onrejectionhandled", 88 | "onstorage", 89 | "onunhandledrejection", 90 | "onunload", 91 | // Event handlers for element 92 | "oncut", 93 | "oncopy", 94 | "onpaste", 95 | ]; 96 | 97 | // Returns true if DOM attribute name matches a native JavaScript event handler 98 | pub fn attr_is_event_handler(attr_name: &str) -> bool { 99 | JS_DOM_EVENT_ATTRS 100 | .iter() 101 | .any(|a| attr_name.eq_ignore_ascii_case(a)) 102 | } 103 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cache; 2 | pub mod cookies; 3 | pub mod core; 4 | pub mod css; 5 | pub mod html; 6 | pub mod js; 7 | pub mod session; 8 | pub mod url; 9 | -------------------------------------------------------------------------------- /src/session.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::path::{Path, PathBuf}; 3 | use std::time::Duration; 4 | 5 | use reqwest::blocking::Client; 6 | use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; 7 | 8 | use crate::cache::Cache; 9 | use crate::cookies::Cookie; 10 | use crate::core::{ 11 | detect_media_type, parse_content_type, print_error_message, print_info_message, MonolithOptions, 12 | }; 13 | use crate::url::{clean_url, domain_is_within_domain, get_referer_url, parse_data_url, Url}; 14 | 15 | pub struct Session { 16 | cache: Option, 17 | client: Client, 18 | cookies: Option>, 19 | pub options: MonolithOptions, 20 | urls: Vec, 21 | } 22 | 23 | impl Session { 24 | pub fn new( 25 | cache: Option, 26 | cookies: Option>, 27 | options: MonolithOptions, 28 | ) -> Self { 29 | let mut header_map = HeaderMap::new(); 30 | if let Some(user_agent) = &options.user_agent { 31 | header_map.insert( 32 | USER_AGENT, 33 | HeaderValue::from_str(user_agent).expect("Invalid User-Agent header specified"), 34 | ); 35 | } 36 | let client = Client::builder() 37 | .timeout(Duration::from_secs(if options.timeout > 0 { 38 | options.timeout 39 | } else { 40 | // We have to specify something that eventually makes the program fail 41 | // (prevent it from hanging forever) 42 | 600 // 10 minutes in seconds 43 | })) 44 | .danger_accept_invalid_certs(options.insecure) 45 | .default_headers(header_map) 46 | .build() 47 | .expect("Failed to initialize HTTP client"); 48 | 49 | Session { 50 | cache, 51 | cookies, 52 | client, 53 | options, 54 | urls: Vec::new(), 55 | } 56 | } 57 | 58 | pub fn retrieve_asset( 59 | &mut self, 60 | parent_url: &Url, 61 | url: &Url, 62 | ) -> Result<(Vec, Url, String, String), reqwest::Error> { 63 | let cache_key: String = clean_url(url.clone()).as_str().to_string(); 64 | 65 | if !self.urls.contains(&url.as_str().to_string()) { 66 | self.urls.push(url.as_str().to_string()); 67 | } 68 | 69 | if url.scheme() == "data" { 70 | let (media_type, charset, data) = parse_data_url(url); 71 | Ok((data, url.clone(), media_type, charset)) 72 | } else if url.scheme() == "file" { 73 | // Check if parent_url is also a file:// URL (if not, then we don't embed the asset) 74 | if parent_url.scheme() != "file" { 75 | if !self.options.silent { 76 | print_error_message(&format!("{} (security error)", &cache_key)); 77 | } 78 | 79 | // Provoke error 80 | self.client.get("").send()?; 81 | } 82 | 83 | let path_buf: PathBuf = url.to_file_path().unwrap().clone(); 84 | let path: &Path = path_buf.as_path(); 85 | if path.exists() { 86 | if path.is_dir() { 87 | if !self.options.silent { 88 | print_error_message(&format!("{} (is a directory)", &cache_key)); 89 | } 90 | 91 | // Provoke error 92 | Err(self.client.get("").send().unwrap_err()) 93 | } else { 94 | if !self.options.silent { 95 | print_info_message(&cache_key.to_string()); 96 | } 97 | 98 | let file_blob: Vec = fs::read(path).expect("unable to read file"); 99 | 100 | Ok(( 101 | file_blob.clone(), 102 | url.clone(), 103 | detect_media_type(&file_blob, url), 104 | "".to_string(), 105 | )) 106 | } 107 | } else { 108 | if !self.options.silent { 109 | print_error_message(&format!("{} (file not found)", &url)); 110 | } 111 | 112 | // Provoke error 113 | Err(self.client.get("").send().unwrap_err()) 114 | } 115 | } else if self.cache.is_some() && self.cache.as_ref().unwrap().contains_key(&cache_key) { 116 | // URL is in cache, we get and return it 117 | if !self.options.silent { 118 | print_info_message(&format!("{} (from cache)", &cache_key)); 119 | } 120 | 121 | Ok(( 122 | self.cache 123 | .as_ref() 124 | .unwrap() 125 | .get(&cache_key) 126 | .unwrap() 127 | .0 128 | .to_vec(), 129 | url.clone(), 130 | self.cache.as_ref().unwrap().get(&cache_key).unwrap().1, 131 | self.cache.as_ref().unwrap().get(&cache_key).unwrap().2, 132 | )) 133 | } else { 134 | if let Some(domains) = &self.options.domains { 135 | let domain_matches = domains 136 | .iter() 137 | .any(|d| domain_is_within_domain(url.host_str().unwrap(), d.trim())); 138 | if (self.options.blacklist_domains && domain_matches) 139 | || (!self.options.blacklist_domains && !domain_matches) 140 | { 141 | return Err(self.client.get("").send().unwrap_err()); 142 | } 143 | } 144 | 145 | // URL not in cache, we retrieve the file 146 | let mut headers = HeaderMap::new(); 147 | if self.cookies.is_some() && !self.cookies.as_ref().unwrap().is_empty() { 148 | for cookie in self.cookies.as_ref().unwrap() { 149 | if !cookie.is_expired() && cookie.matches_url(url.as_str()) { 150 | let cookie_header_value: String = cookie.name.clone() + "=" + &cookie.value; 151 | headers 152 | .insert(COOKIE, HeaderValue::from_str(&cookie_header_value).unwrap()); 153 | } 154 | } 155 | } 156 | // Add referer header for page resource requests 157 | if ["https", "http"].contains(&parent_url.scheme()) && parent_url != url { 158 | headers.insert( 159 | REFERER, 160 | HeaderValue::from_str(get_referer_url(parent_url.clone()).as_str()).unwrap(), 161 | ); 162 | } 163 | match self.client.get(url.as_str()).headers(headers).send() { 164 | Ok(response) => { 165 | if !self.options.ignore_errors && response.status() != reqwest::StatusCode::OK { 166 | if !self.options.silent { 167 | print_error_message(&format!("{} ({})", &cache_key, response.status())); 168 | } 169 | 170 | // Provoke error 171 | return Err(self.client.get("").send().unwrap_err()); 172 | } 173 | 174 | let response_url: Url = response.url().clone(); 175 | 176 | if !self.options.silent { 177 | if url.as_str() == response_url.as_str() { 178 | print_info_message(&cache_key.to_string()); 179 | } else { 180 | print_info_message(&format!("{} -> {}", &cache_key, &response_url)); 181 | } 182 | } 183 | 184 | // Attempt to obtain media type and charset by reading Content-Type header 185 | let content_type: &str = response 186 | .headers() 187 | .get(CONTENT_TYPE) 188 | .and_then(|header| header.to_str().ok()) 189 | .unwrap_or(""); 190 | 191 | let (media_type, charset, _is_base64) = parse_content_type(content_type); 192 | 193 | // Convert response into a byte array 194 | let mut data: Vec = vec![]; 195 | match response.bytes() { 196 | Ok(b) => { 197 | data = b.to_vec(); 198 | } 199 | Err(error) => { 200 | if !self.options.silent { 201 | print_error_message(&format!("{}", error)); 202 | } 203 | } 204 | } 205 | 206 | // Add retrieved resource to cache 207 | if self.cache.is_some() { 208 | let new_cache_key: String = clean_url(response_url.clone()).to_string(); 209 | 210 | self.cache.as_mut().unwrap().set( 211 | &new_cache_key, 212 | &data, 213 | media_type.clone(), 214 | charset.clone(), 215 | ); 216 | } 217 | 218 | // Return 219 | Ok((data, response_url, media_type, charset)) 220 | } 221 | Err(error) => { 222 | if !self.options.silent { 223 | print_error_message(&format!("{} ({})", &cache_key, error)); 224 | } 225 | 226 | Err(self.client.get("").send().unwrap_err()) 227 | } 228 | } 229 | } 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /src/url.rs: -------------------------------------------------------------------------------- 1 | use base64::{prelude::BASE64_STANDARD, Engine}; 2 | use percent_encoding::percent_decode_str; 3 | pub use url::Url; 4 | 5 | use crate::core::{detect_media_type, parse_content_type}; 6 | 7 | pub const EMPTY_IMAGE_DATA_URL: &str = "data:image/png,\ 8 | %89PNG%0D%0A%1A%0A%00%00%00%0DIHDR%00%00%00%0D%00%00%00%0D%08%04%00%00%00%D8%E2%2C%F7%00%00%00%11IDATx%DAcd%C0%09%18G%A5%28%96%02%00%0A%F8%00%0E%CB%8A%EB%16%00%00%00%00IEND%AEB%60%82"; 9 | 10 | pub fn clean_url(url: Url) -> Url { 11 | let mut url = url.clone(); 12 | 13 | // Clear fragment (if any) 14 | url.set_fragment(None); 15 | 16 | url 17 | } 18 | 19 | pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url { 20 | // TODO: move this block out of this function 21 | let media_type: String = if media_type.is_empty() { 22 | detect_media_type(data, final_asset_url) 23 | } else { 24 | media_type.to_string() 25 | }; 26 | 27 | let mut data_url: Url = Url::parse("data:,").unwrap(); 28 | 29 | let c: String = 30 | if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") { 31 | format!(";charset={}", charset.trim()) 32 | } else { 33 | "".to_string() 34 | }; 35 | 36 | data_url.set_path( 37 | format!( 38 | "{}{};base64,{}", 39 | media_type, 40 | c, 41 | BASE64_STANDARD.encode(data) 42 | ) 43 | .as_str(), 44 | ); 45 | 46 | data_url 47 | } 48 | 49 | pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool { 50 | if domain_to_match_against.is_empty() { 51 | return false; 52 | } 53 | 54 | if domain_to_match_against == "." { 55 | return true; 56 | } 57 | 58 | let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect(); 59 | let domain_to_match_against_partials: Vec<&str> = domain_to_match_against 60 | .trim_end_matches(".") 61 | .rsplit(".") 62 | .collect(); 63 | let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with("."); 64 | 65 | let mut i: usize = 0; 66 | let l: usize = std::cmp::max( 67 | domain_partials.len(), 68 | domain_to_match_against_partials.len(), 69 | ); 70 | let mut ok: bool = true; 71 | 72 | while i < l { 73 | // Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot 74 | if !domain_to_match_against_starts_with_a_dot 75 | && domain_to_match_against_partials.len() < i + 1 76 | { 77 | ok = false; 78 | break; 79 | } 80 | 81 | let domain_partial = if domain_partials.len() < i + 1 { 82 | "" 83 | } else { 84 | domain_partials.get(i).unwrap() 85 | }; 86 | let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 { 87 | "" 88 | } else { 89 | domain_to_match_against_partials.get(i).unwrap() 90 | }; 91 | 92 | let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial); 93 | 94 | if !parts_match && !domain_to_match_against_partial.is_empty() { 95 | ok = false; 96 | break; 97 | } 98 | 99 | i += 1; 100 | } 101 | 102 | ok 103 | } 104 | 105 | pub fn is_url_and_has_protocol(input: &str) -> bool { 106 | match Url::parse(input) { 107 | Ok(parsed_url) => !parsed_url.scheme().is_empty(), 108 | Err(_) => false, 109 | } 110 | } 111 | 112 | pub fn parse_data_url(url: &Url) -> (String, String, Vec) { 113 | let path: String = url.path().to_string(); 114 | let comma_loc: usize = path.find(',').unwrap_or(path.len()); 115 | 116 | // Split data URL into meta data and raw data 117 | let content_type: String = path.chars().take(comma_loc).collect(); 118 | let data: String = path.chars().skip(comma_loc + 1).collect(); 119 | 120 | // Parse meta data 121 | let (media_type, charset, is_base64) = parse_content_type(&content_type); 122 | 123 | // Parse raw data into vector of bytes 124 | let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string(); 125 | let blob: Vec = if is_base64 { 126 | BASE64_STANDARD.decode(&text).unwrap_or_default() 127 | } else { 128 | text.as_bytes().to_vec() 129 | }; 130 | 131 | (media_type, charset, blob) 132 | } 133 | 134 | pub fn get_referer_url(url: Url) -> Url { 135 | let mut url = url.clone(); 136 | // Spec: https://httpwg.org/specs/rfc9110.html#field.referer 137 | // Must not include the fragment and userinfo components of the URI 138 | url.set_fragment(None); 139 | url.set_username("").unwrap(); 140 | url.set_password(None).unwrap(); 141 | 142 | url 143 | } 144 | 145 | pub fn resolve_url(from: &Url, to: &str) -> Url { 146 | match Url::parse(to) { 147 | Ok(parsed_url) => parsed_url, 148 | Err(_) => match from.join(to) { 149 | Ok(joined) => joined, 150 | Err(_) => Url::parse("data:,").unwrap(), 151 | }, 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /tests/_data_/basic/local-file.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Local HTML file 8 | 9 | 10 | 11 | 12 | 13 | 14 | Tricky href 15 | Remote URL 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/_data_/basic/local-script.js: -------------------------------------------------------------------------------- 1 | document.body.style.backgroundColor = "green"; 2 | document.body.style.color = "red"; 3 | -------------------------------------------------------------------------------- /tests/_data_/basic/local-style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #000; 3 | color: #fff; 4 | } 5 | -------------------------------------------------------------------------------- /tests/_data_/css/index.html: -------------------------------------------------------------------------------- 1 | 12 | -------------------------------------------------------------------------------- /tests/_data_/css/style.css: -------------------------------------------------------------------------------- 1 | body{background-color:#000;color:#fff} 2 | -------------------------------------------------------------------------------- /tests/_data_/import-css-via-data-url/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Attempt to import CSS via data URL asset 8 | 16 | 17 | 18 | 19 | 20 |

If you see pink background with white foreground then we’re in trouble

21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /tests/_data_/import-css-via-data-url/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: pink; 3 | color: white; 4 | } 5 | -------------------------------------------------------------------------------- /tests/_data_/integrity/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Local HTML file 6 | 13 | 20 | 21 | 22 | 23 |

24 | This page should have black background and white foreground, but 25 | only when served via http: (not via file:) 26 |

27 | 31 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/_data_/integrity/script.js: -------------------------------------------------------------------------------- 1 | function noop() { 2 | console.log(""); 3 | } 4 | -------------------------------------------------------------------------------- /tests/_data_/integrity/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #000; 3 | color: #FFF; 4 | } 5 | -------------------------------------------------------------------------------- /tests/_data_/noscript/image.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | SVG 5 | 6 | -------------------------------------------------------------------------------- /tests/_data_/noscript/index.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/_data_/noscript/nested.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/_data_/noscript/script.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/_data_/svg/icons.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /tests/_data_/svg/image.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/_data_/svg/image.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | SVG 5 | 6 | -------------------------------------------------------------------------------- /tests/_data_/svg/index.html: -------------------------------------------------------------------------------- 1 |
2 | -------------------------------------------------------------------------------- /tests/_data_/svg/svg.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /tests/_data_/unusual_encodings/gb2312.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Y2Z/monolith/7fed227bb00a15d94f2cca8fb87a7018b40d7dc0/tests/_data_/unusual_encodings/gb2312.html -------------------------------------------------------------------------------- /tests/_data_/unusual_encodings/iso-8859-1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | © Some Company 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/cli/base_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use assert_cmd::prelude::*; 11 | use std::env; 12 | use std::process::Command; 13 | 14 | #[test] 15 | fn add_new_when_provided() { 16 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 17 | let out = cmd 18 | .arg("-M") 19 | .arg("-b") 20 | .arg("http://localhost:30701/") 21 | .arg("data:text/html,Hello%2C%20World!") 22 | .output() 23 | .unwrap(); 24 | 25 | // STDERR should be empty 26 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 27 | 28 | // STDOUT should contain newly added base URL 29 | assert_eq!( 30 | String::from_utf8_lossy(&out.stdout), 31 | r#"Hello, World! 32 | "# 33 | ); 34 | 35 | // Exit code should be 0 36 | out.assert().code(0); 37 | } 38 | 39 | #[test] 40 | fn keep_existing_when_none_provided() { 41 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 42 | let out = cmd 43 | .arg("-M") 44 | .arg("data:text/html,Hello%2C%20World!") 45 | .output() 46 | .unwrap(); 47 | 48 | // STDERR should be empty 49 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 50 | 51 | // STDOUT should contain newly added base URL 52 | assert_eq!( 53 | String::from_utf8_lossy(&out.stdout), 54 | r#"Hello, World! 55 | "# 56 | ); 57 | 58 | // Exit code should be 0 59 | out.assert().code(0); 60 | } 61 | 62 | #[test] 63 | fn override_existing_when_provided() { 64 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 65 | let out = cmd 66 | .arg("-M") 67 | .arg("-b") 68 | .arg("http://localhost/") 69 | .arg("data:text/html,Hello%2C%20World!") 70 | .output() 71 | .unwrap(); 72 | 73 | // STDERR should be empty 74 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 75 | 76 | // STDOUT should contain newly added base URL 77 | assert_eq!( 78 | String::from_utf8_lossy(&out.stdout), 79 | r#"Hello, World! 80 | "# 81 | ); 82 | 83 | // Exit code should be 0 84 | out.assert().code(0); 85 | } 86 | 87 | #[test] 88 | fn set_existing_to_empty_when_empty_provided() { 89 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 90 | let out = cmd 91 | .arg("-M") 92 | .arg("-b") 93 | .arg("") 94 | .arg("data:text/html,Hello%2C%20World!") 95 | .output() 96 | .unwrap(); 97 | 98 | // STDERR should be empty 99 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 100 | 101 | // STDOUT should contain newly added base URL 102 | assert_eq!( 103 | String::from_utf8_lossy(&out.stdout), 104 | r#"Hello, World! 105 | "# 106 | ); 107 | 108 | // Exit code should be 0 109 | out.assert().code(0); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /tests/cli/basic.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use assert_cmd::prelude::*; 11 | use std::env; 12 | use std::fs; 13 | use std::path::Path; 14 | use std::process::{Command, Stdio}; 15 | use url::Url; 16 | 17 | #[test] 18 | fn print_help_information() { 19 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 20 | let out = cmd.arg("-h").output().unwrap(); 21 | 22 | // STDERR should be empty 23 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 24 | 25 | // STDOUT should contain program name, version, and usage information 26 | // TODO 27 | 28 | // Exit code should be 0 29 | out.assert().code(0); 30 | } 31 | 32 | #[test] 33 | fn print_version() { 34 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 35 | let out = cmd.arg("-V").output().unwrap(); 36 | 37 | // STDERR should be empty 38 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 39 | 40 | // STDOUT should contain program name and version 41 | assert_eq!( 42 | String::from_utf8_lossy(&out.stdout), 43 | format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")) 44 | ); 45 | 46 | // Exit code should be 0 47 | out.assert().code(0); 48 | } 49 | 50 | #[test] 51 | fn stdin_target_input() { 52 | let mut echo = Command::new("echo") 53 | .arg("Hello from STDIN") 54 | .stdout(Stdio::piped()) 55 | .spawn() 56 | .unwrap(); 57 | let echo_out = echo.stdout.take().unwrap(); 58 | echo.wait().unwrap(); 59 | 60 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 61 | cmd.stdin(echo_out); 62 | let out = cmd.arg("-M").arg("-").output().unwrap(); 63 | 64 | // STDERR should be empty 65 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 66 | 67 | // STDOUT should contain HTML created out of STDIN 68 | assert_eq!( 69 | String::from_utf8_lossy(&out.stdout), 70 | r#"Hello from STDIN 71 | 72 | "# 73 | ); 74 | 75 | // Exit code should be 0 76 | out.assert().code(0); 77 | } 78 | 79 | #[test] 80 | fn css_import_string() { 81 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 82 | let path_html: &Path = Path::new("tests/_data_/css/index.html"); 83 | let path_css: &Path = Path::new("tests/_data_/css/style.css"); 84 | 85 | assert!(path_html.is_file()); 86 | assert!(path_css.is_file()); 87 | 88 | let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap(); 89 | 90 | // STDERR should list files that got retrieved 91 | assert_eq!( 92 | String::from_utf8_lossy(&out.stderr), 93 | format!( 94 | "\ 95 | {file_url_html}\n\ 96 | {file_url_css}\n\ 97 | {file_url_css}\n\ 98 | {file_url_css}\n\ 99 | ", 100 | file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), 101 | file_url_css = Url::from_file_path(fs::canonicalize(path_css).unwrap()).unwrap(), 102 | ) 103 | ); 104 | 105 | // STDOUT should contain embedded CSS url()'s 106 | assert_eq!( 107 | String::from_utf8_lossy(&out.stdout), 108 | r##" 119 | 120 | "## 121 | ); 122 | 123 | // Exit code should be 0 124 | out.assert().code(0); 125 | } 126 | } 127 | 128 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 129 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 130 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 131 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 132 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 133 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 134 | 135 | #[cfg(test)] 136 | mod failing { 137 | use assert_cmd::prelude::*; 138 | use std::env; 139 | use std::process::Command; 140 | 141 | #[test] 142 | fn bad_input_empty_target() { 143 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 144 | let out = cmd.arg("").output().unwrap(); 145 | 146 | // STDERR should contain error description 147 | assert_eq!( 148 | String::from_utf8_lossy(&out.stderr), 149 | "Error: no target specified\n" 150 | ); 151 | 152 | // STDOUT should be empty 153 | assert_eq!(String::from_utf8_lossy(&out.stdout), ""); 154 | 155 | // Exit code should be 1 156 | out.assert().code(1); 157 | } 158 | 159 | #[test] 160 | fn unsupported_scheme() { 161 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 162 | let out = cmd.arg("mailto:snshn@tutanota.com").output().unwrap(); 163 | 164 | // STDERR should contain error description 165 | assert_eq!( 166 | String::from_utf8_lossy(&out.stderr), 167 | "Error: unsupported target URL scheme \"mailto\"\n" 168 | ); 169 | 170 | // STDOUT should be empty 171 | assert_eq!(String::from_utf8_lossy(&out.stdout), ""); 172 | 173 | // Exit code should be 1 174 | out.assert().code(1); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /tests/cli/data_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use assert_cmd::prelude::*; 11 | use std::env; 12 | use std::process::Command; 13 | 14 | use monolith::url::EMPTY_IMAGE_DATA_URL; 15 | 16 | #[test] 17 | fn isolate_data_url() { 18 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 19 | let out = cmd 20 | .arg("-M") 21 | .arg("-I") 22 | .arg("data:text/html,Hello%2C%20World!") 23 | .output() 24 | .unwrap(); 25 | 26 | // STDERR should be empty 27 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 28 | 29 | // STDOUT should contain isolated HTML 30 | assert_eq!( 31 | String::from_utf8_lossy(&out.stdout), 32 | r#"Hello, World! 33 | "# 34 | ); 35 | 36 | // Exit code should be 0 37 | out.assert().code(0); 38 | } 39 | 40 | #[test] 41 | fn remove_css_from_data_url() { 42 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 43 | let out = cmd 44 | .arg("-M") 45 | .arg("-c") 46 | .arg("data:text/html,Hello") 47 | .output() 48 | .unwrap(); 49 | 50 | // STDERR should be empty 51 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 52 | 53 | // STDOUT should contain HTML with no CSS 54 | assert_eq!( 55 | String::from_utf8_lossy(&out.stdout), 56 | r#"Hello 57 | "# 58 | ); 59 | 60 | // Exit code should be 0 61 | out.assert().code(0); 62 | } 63 | 64 | #[test] 65 | fn remove_fonts_from_data_url() { 66 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 67 | let out = cmd 68 | .arg("-M") 69 | .arg("-F") 70 | .arg("data:text/html,Hi") 71 | .output() 72 | .unwrap(); 73 | 74 | // STDERR should be empty 75 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 76 | 77 | // STDOUT should contain HTML with no web fonts 78 | assert_eq!( 79 | String::from_utf8_lossy(&out.stdout), 80 | r#"Hi 81 | "# 82 | ); 83 | 84 | // Exit code should be 0 85 | out.assert().code(0); 86 | } 87 | 88 | #[test] 89 | fn remove_frames_from_data_url() { 90 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 91 | let out = cmd 92 | .arg("-M") 93 | .arg("-f") 94 | .arg(r#"data:text/html,Hi"#) 95 | .output() 96 | .unwrap(); 97 | 98 | // STDERR should be empty 99 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 100 | 101 | // STDOUT should contain HTML with no iframes 102 | assert_eq!( 103 | String::from_utf8_lossy(&out.stdout), 104 | r#"Hi 105 | "# 106 | ); 107 | 108 | // Exit code should be 0 109 | out.assert().code(0); 110 | } 111 | 112 | #[test] 113 | fn remove_images_from_data_url() { 114 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 115 | let out = cmd 116 | .arg("-M") 117 | .arg("-i") 118 | .arg("data:text/html,Hi") 119 | .output() 120 | .unwrap(); 121 | 122 | // STDERR should be empty 123 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 124 | 125 | // STDOUT should contain HTML with no images 126 | assert_eq!( 127 | String::from_utf8_lossy(&out.stdout), 128 | format!( 129 | r#"Hi 130 | "#, 131 | empty_image = EMPTY_IMAGE_DATA_URL, 132 | ) 133 | ); 134 | 135 | // Exit code should be 0 136 | out.assert().code(0); 137 | } 138 | 139 | #[test] 140 | fn remove_js_from_data_url() { 141 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 142 | let out = cmd 143 | .arg("-M") 144 | .arg("-j") 145 | .arg("data:text/html,Hi") 146 | .output() 147 | .unwrap(); 148 | 149 | // STDERR should be empty 150 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 151 | 152 | // STDOUT should contain HTML with no JS 153 | assert_eq!( 154 | String::from_utf8_lossy(&out.stdout), 155 | r#"Hi 156 | "# 157 | ); 158 | 159 | // Exit code should be 0 160 | out.assert().code(0); 161 | } 162 | } 163 | 164 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 165 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 166 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 167 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 168 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 169 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 170 | 171 | #[cfg(test)] 172 | mod failing { 173 | use assert_cmd::prelude::*; 174 | use std::env; 175 | use std::process::Command; 176 | 177 | #[test] 178 | fn bad_input_data_url() { 179 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 180 | let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap(); 181 | 182 | // STDERR should be empty 183 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 184 | 185 | // STDOUT should contain text 186 | assert_eq!(String::from_utf8_lossy(&out.stdout), "Hello, World!"); 187 | 188 | // Exit code should be 0 189 | out.assert().code(0); 190 | } 191 | 192 | #[test] 193 | fn security_disallow_local_assets_within_data_url_targets() { 194 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 195 | let out = cmd 196 | .arg("-M") 197 | .arg(r#"data:text/html,%3Cscript%20src="src/tests/data/basic/local-script.js"%3E%3C/script%3E"#) 198 | .output() 199 | .unwrap(); 200 | 201 | // STDERR should be empty 202 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 203 | 204 | // STDOUT should contain HTML without contents of local JS file 205 | assert_eq!( 206 | String::from_utf8_lossy(&out.stdout), 207 | r#" 208 | "# 209 | ); 210 | 211 | // Exit code should be 0 212 | out.assert().code(0); 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /tests/cli/mod.rs: -------------------------------------------------------------------------------- 1 | mod base_url; 2 | mod basic; 3 | mod data_url; 4 | mod local_files; 5 | mod noscript; 6 | mod unusual_encodings; 7 | -------------------------------------------------------------------------------- /tests/cli/noscript.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use assert_cmd::prelude::*; 11 | use std::env; 12 | use std::fs; 13 | use std::path::Path; 14 | use std::process::Command; 15 | use url::Url; 16 | 17 | #[test] 18 | fn parse_noscript_contents() { 19 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 20 | let path_html: &Path = Path::new("tests/_data_/noscript/index.html"); 21 | let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); 22 | 23 | let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap(); 24 | 25 | // STDERR should contain target HTML and embedded SVG files 26 | assert_eq!( 27 | String::from_utf8_lossy(&out.stderr), 28 | format!( 29 | "\ 30 | {file_url_html}\n\ 31 | {file_url_svg}\n\ 32 | ", 33 | file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), 34 | file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), 35 | ) 36 | ); 37 | 38 | // STDOUT should contain HTML with no CSS 39 | assert_eq!( 40 | String::from_utf8_lossy(&out.stdout), 41 | "\n\n" 42 | ); 43 | 44 | // Exit code should be 0 45 | out.assert().code(0); 46 | } 47 | 48 | #[test] 49 | fn unwrap_noscript_contents() { 50 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 51 | let path_html: &Path = Path::new("tests/_data_/noscript/index.html"); 52 | let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); 53 | 54 | let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap(); 55 | 56 | // STDERR should contain target HTML and embedded SVG files 57 | assert_eq!( 58 | String::from_utf8_lossy(&out.stderr), 59 | format!( 60 | "\ 61 | {file_url_html}\n\ 62 | {file_url_svg}\n\ 63 | ", 64 | file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), 65 | file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), 66 | ) 67 | ); 68 | 69 | // STDOUT should contain HTML with no CSS 70 | assert_eq!( 71 | String::from_utf8_lossy(&out.stdout), 72 | "\n\n" 73 | ); 74 | 75 | // Exit code should be 0 76 | out.assert().code(0); 77 | } 78 | 79 | #[test] 80 | fn unwrap_noscript_contents_nested() { 81 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 82 | let path_html: &Path = Path::new("tests/_data_/noscript/nested.html"); 83 | let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); 84 | 85 | let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap(); 86 | 87 | // STDERR should contain target HTML and embedded SVG files 88 | assert_eq!( 89 | String::from_utf8_lossy(&out.stderr), 90 | format!( 91 | "\ 92 | {file_url_html}\n\ 93 | {file_url_svg}\n\ 94 | ", 95 | file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), 96 | file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), 97 | ) 98 | ); 99 | 100 | // STDOUT should contain HTML with no CSS 101 | assert_eq!( 102 | String::from_utf8_lossy(&out.stdout), 103 | "

JS is not active

\n\n" 104 | ); 105 | 106 | // Exit code should be 0 107 | out.assert().code(0); 108 | } 109 | 110 | #[test] 111 | fn unwrap_noscript_contents_with_script() { 112 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 113 | let path_html: &Path = Path::new("tests/_data_/noscript/script.html"); 114 | let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); 115 | 116 | let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap(); 117 | 118 | // STDERR should contain target HTML and embedded SVG files 119 | assert_eq!( 120 | String::from_utf8_lossy(&out.stderr), 121 | format!( 122 | "\ 123 | {file_url_html}\n\ 124 | {file_url_svg}\n\ 125 | ", 126 | file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), 127 | file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), 128 | ) 129 | ); 130 | 131 | // STDOUT should contain HTML with no CSS 132 | assert_eq!( 133 | String::from_utf8_lossy(&out.stdout), 134 | r#" 135 | 136 | "# 137 | ); 138 | 139 | // Exit code should be 0 140 | out.assert().code(0); 141 | } 142 | 143 | #[test] 144 | fn unwrap_noscript_contents_attr_data_url() { 145 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 146 | let out = cmd 147 | .arg("-M") 148 | .arg("-n") 149 | .arg("data:text/html,") 150 | .output() 151 | .unwrap(); 152 | 153 | // STDERR should be empty 154 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 155 | 156 | // STDOUT should contain unwrapped contents of NOSCRIPT element 157 | assert_eq!( 158 | String::from_utf8_lossy(&out.stdout), 159 | r#"test 160 | "# 161 | ); 162 | 163 | // Exit code should be 0 164 | out.assert().code(0); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /tests/cli/unusual_encodings.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use assert_cmd::prelude::*; 11 | use encoding_rs::Encoding; 12 | use std::env; 13 | use std::path::MAIN_SEPARATOR; 14 | use std::process::{Command, Stdio}; 15 | 16 | #[test] 17 | fn properly_save_document_with_gb2312() { 18 | let cwd = env::current_dir().unwrap(); 19 | let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); 20 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 21 | let out = cmd 22 | .arg("-M") 23 | .arg(format!( 24 | "tests{s}_data_{s}unusual_encodings{s}gb2312.html", 25 | s = MAIN_SEPARATOR 26 | )) 27 | .output() 28 | .unwrap(); 29 | let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; 30 | 31 | // STDERR should contain only the target file 32 | assert_eq!( 33 | String::from_utf8_lossy(&out.stderr), 34 | format!( 35 | "{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n", 36 | file = file_url_protocol, 37 | cwd = cwd_normalized, 38 | ) 39 | ); 40 | 41 | // STDOUT should contain original document without any modifications 42 | let s: String; 43 | if let Some(encoding) = Encoding::for_label(b"gb2312") { 44 | let (string, _, _) = encoding.decode(&out.stdout); 45 | s = string.to_string(); 46 | } else { 47 | s = String::from_utf8_lossy(&out.stdout).to_string(); 48 | } 49 | assert_eq!( 50 | s, 51 | r##" 52 | 53 | 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 54 | 55 | 56 |

近七成人减少线下需求 银行数字化转型提速

57 | 58 | 59 | 60 | "## 61 | ); 62 | 63 | // Exit code should be 0 64 | out.assert().code(0); 65 | } 66 | 67 | #[test] 68 | fn properly_save_document_with_gb2312_from_stdin() { 69 | let mut echo = Command::new("cat") 70 | .arg(format!( 71 | "tests{s}_data_{s}unusual_encodings{s}gb2312.html", 72 | s = MAIN_SEPARATOR 73 | )) 74 | .stdout(Stdio::piped()) 75 | .spawn() 76 | .unwrap(); 77 | let echo_out = echo.stdout.take().unwrap(); 78 | echo.wait().unwrap(); 79 | 80 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 81 | cmd.stdin(echo_out); 82 | let out = cmd.arg("-M").arg("-").output().unwrap(); 83 | 84 | // STDERR should be empty 85 | assert_eq!(String::from_utf8_lossy(&out.stderr), ""); 86 | 87 | // STDOUT should contain HTML created out of STDIN 88 | let s: String; 89 | if let Some(encoding) = Encoding::for_label(b"gb2312") { 90 | let (string, _, _) = encoding.decode(&out.stdout); 91 | s = string.to_string(); 92 | } else { 93 | s = String::from_utf8_lossy(&out.stdout).to_string(); 94 | } 95 | assert_eq!( 96 | s, 97 | r##" 98 | 99 | 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 100 | 101 | 102 |

近七成人减少线下需求 银行数字化转型提速

103 | 104 | 105 | 106 | "## 107 | ); 108 | 109 | // Exit code should be 0 110 | out.assert().code(0); 111 | } 112 | 113 | #[test] 114 | fn properly_save_document_with_gb2312_custom_charset() { 115 | let cwd = env::current_dir().unwrap(); 116 | let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); 117 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 118 | let out = cmd 119 | .arg("-M") 120 | .arg("-E") 121 | .arg("utf8") 122 | .arg(format!( 123 | "tests{s}_data_{s}unusual_encodings{s}gb2312.html", 124 | s = MAIN_SEPARATOR 125 | )) 126 | .output() 127 | .unwrap(); 128 | let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; 129 | 130 | // STDERR should contain only the target file 131 | assert_eq!( 132 | String::from_utf8_lossy(&out.stderr), 133 | format!( 134 | "{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n", 135 | file = file_url_protocol, 136 | cwd = cwd_normalized, 137 | ) 138 | ); 139 | 140 | // STDOUT should contain original document without any modifications 141 | assert_eq!( 142 | String::from_utf8_lossy(&out.stdout).to_string(), 143 | r#" 144 | 145 | 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 146 | 147 | 148 |

近七成人减少线下需求 银行数字化转型提速

149 | 150 | 151 | 152 | "# 153 | ); 154 | 155 | // Exit code should be 0 156 | out.assert().code(0); 157 | } 158 | 159 | #[test] 160 | fn properly_save_document_with_gb2312_custom_charset_bad() { 161 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 162 | let out = cmd 163 | .arg("-M") 164 | .arg("-E") 165 | .arg("utf0") 166 | .arg(format!( 167 | "tests{s}_data_{s}unusual_encodings{s}gb2312.html", 168 | s = MAIN_SEPARATOR 169 | )) 170 | .output() 171 | .unwrap(); 172 | 173 | // STDERR should contain error message 174 | assert_eq!( 175 | String::from_utf8_lossy(&out.stderr), 176 | "Error: unknown encoding \"utf0\"\n" 177 | ); 178 | 179 | // STDOUT should be empty 180 | assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), ""); 181 | 182 | // Exit code should be 1 183 | out.assert().code(1); 184 | } 185 | } 186 | 187 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 188 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 189 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 190 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 191 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 192 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 193 | 194 | #[cfg(test)] 195 | mod failing { 196 | use assert_cmd::prelude::*; 197 | use std::env; 198 | use std::path::MAIN_SEPARATOR; 199 | use std::process::Command; 200 | 201 | #[test] 202 | fn change_iso88591_to_utf8_to_properly_display_html_entities() { 203 | let cwd = env::current_dir().unwrap(); 204 | let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); 205 | let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); 206 | let out = cmd 207 | .arg("-M") 208 | .arg(format!( 209 | "tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html", 210 | s = MAIN_SEPARATOR 211 | )) 212 | .output() 213 | .unwrap(); 214 | let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; 215 | 216 | // STDERR should contain only the target file 217 | assert_eq!( 218 | String::from_utf8_lossy(&out.stderr), 219 | format!( 220 | "{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n", 221 | file = file_url_protocol, 222 | cwd = cwd_normalized, 223 | ) 224 | ); 225 | 226 | // STDOUT should contain original document but with UTF-8 charset 227 | assert_eq!( 228 | String::from_utf8_lossy(&out.stdout), 229 | r##" 230 | 231 | 232 | 233 | � Some Company 234 | 235 | 236 | 237 | "## 238 | ); 239 | 240 | // Exit code should be 0 241 | out.assert().code(0); 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /tests/cookies/cookie/is_expired.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::cookies; 11 | 12 | #[test] 13 | fn never_expires() { 14 | let cookie = cookies::Cookie { 15 | domain: String::from("127.0.0.1"), 16 | include_subdomains: true, 17 | path: String::from("/"), 18 | https_only: false, 19 | expires: 0, 20 | name: String::from(""), 21 | value: String::from(""), 22 | }; 23 | 24 | assert!(!cookie.is_expired()); 25 | } 26 | 27 | #[test] 28 | fn expires_long_from_now() { 29 | let cookie = cookies::Cookie { 30 | domain: String::from("127.0.0.1"), 31 | include_subdomains: true, 32 | path: String::from("/"), 33 | https_only: false, 34 | expires: 9999999999, 35 | name: String::from(""), 36 | value: String::from(""), 37 | }; 38 | 39 | assert!(!cookie.is_expired()); 40 | } 41 | } 42 | 43 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 44 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 45 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 46 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 47 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 48 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 49 | 50 | #[cfg(test)] 51 | mod failing { 52 | use monolith::cookies; 53 | 54 | #[test] 55 | fn expired() { 56 | let cookie = cookies::Cookie { 57 | domain: String::from("127.0.0.1"), 58 | include_subdomains: true, 59 | path: String::from("/"), 60 | https_only: false, 61 | expires: 1, 62 | name: String::from(""), 63 | value: String::from(""), 64 | }; 65 | 66 | assert!(cookie.is_expired()); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /tests/cookies/cookie/matches_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::cookies; 11 | 12 | #[test] 13 | fn secure_url() { 14 | let cookie = cookies::Cookie { 15 | domain: String::from("127.0.0.1"), 16 | include_subdomains: true, 17 | path: String::from("/"), 18 | https_only: true, 19 | expires: 0, 20 | name: String::from(""), 21 | value: String::from(""), 22 | }; 23 | assert!(cookie.matches_url("https://127.0.0.1/something")); 24 | } 25 | 26 | #[test] 27 | fn non_secure_url() { 28 | let cookie = cookies::Cookie { 29 | domain: String::from("127.0.0.1"), 30 | include_subdomains: true, 31 | path: String::from("/"), 32 | https_only: false, 33 | expires: 0, 34 | name: String::from(""), 35 | value: String::from(""), 36 | }; 37 | assert!(cookie.matches_url("http://127.0.0.1/something")); 38 | } 39 | 40 | #[test] 41 | fn subdomain() { 42 | let cookie = cookies::Cookie { 43 | domain: String::from(".somethingsomething.com"), 44 | include_subdomains: true, 45 | path: String::from("/"), 46 | https_only: true, 47 | expires: 0, 48 | name: String::from(""), 49 | value: String::from(""), 50 | }; 51 | assert!(cookie.matches_url("https://cdn.somethingsomething.com/something")); 52 | } 53 | } 54 | 55 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 56 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 57 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 58 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 59 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 60 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 61 | 62 | #[cfg(test)] 63 | mod failing { 64 | use monolith::cookies; 65 | 66 | #[test] 67 | fn empty_url() { 68 | let cookie = cookies::Cookie { 69 | domain: String::from("127.0.0.1"), 70 | include_subdomains: true, 71 | path: String::from("/"), 72 | https_only: false, 73 | expires: 0, 74 | name: String::from(""), 75 | value: String::from(""), 76 | }; 77 | assert!(!cookie.matches_url("")); 78 | } 79 | 80 | #[test] 81 | fn wrong_hostname() { 82 | let cookie = cookies::Cookie { 83 | domain: String::from("127.0.0.1"), 84 | include_subdomains: true, 85 | path: String::from("/"), 86 | https_only: false, 87 | expires: 0, 88 | name: String::from(""), 89 | value: String::from(""), 90 | }; 91 | assert!(!cookie.matches_url("http://0.0.0.0/")); 92 | } 93 | 94 | #[test] 95 | fn wrong_path() { 96 | let cookie = cookies::Cookie { 97 | domain: String::from("127.0.0.1"), 98 | include_subdomains: false, 99 | path: String::from("/"), 100 | https_only: false, 101 | expires: 0, 102 | name: String::from(""), 103 | value: String::from(""), 104 | }; 105 | assert!(!cookie.matches_url("http://0.0.0.0/path")); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /tests/cookies/cookie/mod.rs: -------------------------------------------------------------------------------- 1 | mod is_expired; 2 | mod matches_url; 3 | -------------------------------------------------------------------------------- /tests/cookies/mod.rs: -------------------------------------------------------------------------------- 1 | mod cookie; 2 | mod parse_cookie_file_contents; 3 | -------------------------------------------------------------------------------- /tests/cookies/parse_cookie_file_contents.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::cookies; 11 | 12 | #[test] 13 | fn parse_file() { 14 | let file_contents = r#"# Netscape HTTP Cookie File 15 | 127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"#; 16 | let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); 17 | assert_eq!(result.len(), 1); 18 | assert_eq!(result[0].domain, "127.0.0.1"); 19 | assert!(!result[0].include_subdomains); 20 | assert_eq!(result[0].path, "/"); 21 | assert!(!result[0].https_only); 22 | assert_eq!(result[0].expires, 0); 23 | assert_eq!(result[0].name, "USER_TOKEN"); 24 | assert_eq!(result[0].value, "in"); 25 | } 26 | 27 | #[test] 28 | fn parse_multiline_file() { 29 | let file_contents = r#"# HTTP Cookie File 30 | 127.0.0.1 FALSE / FALSE 0 USER_TOKEN in 31 | 127.0.0.1 TRUE / TRUE 9 USER_TOKEN out 32 | 33 | "#; 34 | let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); 35 | assert_eq!(result.len(), 2); 36 | assert_eq!(result[0].domain, "127.0.0.1"); 37 | assert!(!result[0].include_subdomains); 38 | assert_eq!(result[0].path, "/"); 39 | assert!(!result[0].https_only); 40 | assert_eq!(result[0].expires, 0); 41 | assert_eq!(result[0].name, "USER_TOKEN"); 42 | assert_eq!(result[0].value, "in"); 43 | assert_eq!(result[1].domain, "127.0.0.1"); 44 | assert!(result[1].include_subdomains); 45 | assert_eq!(result[1].path, "/"); 46 | assert!(result[1].https_only); 47 | assert_eq!(result[1].expires, 9); 48 | assert_eq!(result[1].name, "USER_TOKEN"); 49 | assert_eq!(result[1].value, "out"); 50 | } 51 | } 52 | 53 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 54 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 55 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 56 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 57 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 58 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 59 | 60 | #[cfg(test)] 61 | mod failing { 62 | use monolith::cookies; 63 | 64 | #[test] 65 | fn empty() { 66 | let file_contents = ""; 67 | let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); 68 | assert_eq!(result.len(), 0); 69 | } 70 | 71 | #[test] 72 | fn no_header() { 73 | let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"; 74 | match cookies::parse_cookie_file_contents(file_contents) { 75 | Ok(_result) => { 76 | assert!(false); 77 | } 78 | Err(_e) => { 79 | assert!(true); 80 | } 81 | } 82 | } 83 | 84 | #[test] 85 | fn spaces_instead_of_tabs() { 86 | let file_contents = 87 | "# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"; 88 | let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); 89 | assert_eq!(result.len(), 0); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /tests/core/detect_media_type.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::core::detect_media_type; 13 | 14 | #[test] 15 | fn image_gif87() { 16 | let dummy_url: Url = Url::parse("data:,").unwrap(); 17 | assert_eq!(detect_media_type(b"GIF87a", &dummy_url), "image/gif"); 18 | } 19 | 20 | #[test] 21 | fn image_gif89() { 22 | let dummy_url: Url = Url::parse("data:,").unwrap(); 23 | assert_eq!(detect_media_type(b"GIF89a", &dummy_url), "image/gif"); 24 | } 25 | 26 | #[test] 27 | fn image_jpeg() { 28 | let dummy_url: Url = Url::parse("data:,").unwrap(); 29 | assert_eq!(detect_media_type(b"\xFF\xD8\xFF", &dummy_url), "image/jpeg"); 30 | } 31 | 32 | #[test] 33 | fn image_png() { 34 | let dummy_url: Url = Url::parse("data:,").unwrap(); 35 | assert_eq!( 36 | detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url), 37 | "image/png" 38 | ); 39 | } 40 | 41 | #[test] 42 | fn image_svg() { 43 | let dummy_url: Url = Url::parse("data:,").unwrap(); 44 | assert_eq!(detect_media_type(b":"|?/%title%.html"#, 55 | r#"/\<>:"|?"#, 56 | MonolithOutputFormat::HTML, 57 | ); 58 | 59 | assert_eq!( 60 | final_destination, 61 | r#"/home/username/Downloads/<>:"|?/__[] - -.html"# 62 | ); 63 | } 64 | 65 | #[test] 66 | fn level_up() { 67 | let final_destination = 68 | format_output_path("../%title%.html", ".Title", MonolithOutputFormat::HTML); 69 | 70 | assert_eq!(final_destination, r#"../Title.html"#); 71 | } 72 | 73 | #[test] 74 | fn file_name_extension() { 75 | let final_destination = 76 | format_output_path("%title%.%extension%", "Title", MonolithOutputFormat::HTML); 77 | 78 | assert_eq!(final_destination, r#"Title.html"#); 79 | } 80 | 81 | #[test] 82 | fn file_name_extension_mhtml() { 83 | let final_destination = 84 | format_output_path("%title%.%extension%", "Title", MonolithOutputFormat::MHTML); 85 | 86 | assert_eq!(final_destination, r#"Title.mhtml"#); 87 | } 88 | 89 | #[test] 90 | fn file_name_extension_short() { 91 | let final_destination = 92 | format_output_path("%title%.%ext%", "Title", MonolithOutputFormat::HTML); 93 | 94 | assert_eq!(final_destination, r#"Title.htm"#); 95 | } 96 | 97 | #[test] 98 | fn file_name_extension_short_mhtml() { 99 | let final_destination = 100 | format_output_path("%title%.%ext%", "Title", MonolithOutputFormat::MHTML); 101 | 102 | assert_eq!(final_destination, r#"Title.mht"#); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /tests/core/mod.rs: -------------------------------------------------------------------------------- 1 | mod detect_media_type; 2 | mod format_output_path; 3 | mod options; 4 | mod parse_content_type; 5 | -------------------------------------------------------------------------------- /tests/core/options.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::core::{MonolithOptions, MonolithOutputFormat}; 11 | 12 | #[test] 13 | fn defaults() { 14 | let options: MonolithOptions = MonolithOptions::default(); 15 | 16 | assert!(!options.no_audio); 17 | assert_eq!(options.base_url, None); 18 | assert!(!options.no_css); 19 | assert_eq!(options.encoding, None); 20 | assert!(!options.no_frames); 21 | assert!(!options.no_fonts); 22 | assert!(!options.no_images); 23 | assert!(!options.isolate); 24 | assert!(!options.no_js); 25 | assert!(!options.insecure); 26 | assert!(!options.no_metadata); 27 | assert_eq!(options.output_format, MonolithOutputFormat::HTML); 28 | assert!(!options.silent); 29 | assert_eq!(options.timeout, 0); 30 | assert_eq!(options.user_agent, None); 31 | assert!(!options.no_video); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tests/core/parse_content_type.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::core::parse_content_type; 11 | 12 | #[test] 13 | fn text_plain_utf8() { 14 | let (media_type, charset, is_base64) = parse_content_type("text/plain;charset=utf8"); 15 | assert_eq!(media_type, "text/plain"); 16 | assert_eq!(charset, "utf8"); 17 | assert!(!is_base64); 18 | } 19 | 20 | #[test] 21 | fn text_plain_utf8_spaces() { 22 | let (media_type, charset, is_base64) = parse_content_type(" text/plain ; charset=utf8 "); 23 | assert_eq!(media_type, "text/plain"); 24 | assert_eq!(charset, "utf8"); 25 | assert!(!is_base64); 26 | } 27 | 28 | #[test] 29 | fn empty() { 30 | let (media_type, charset, is_base64) = parse_content_type(""); 31 | assert_eq!(media_type, "text/plain"); 32 | assert_eq!(charset, "US-ASCII"); 33 | assert!(!is_base64); 34 | } 35 | 36 | #[test] 37 | fn base64() { 38 | let (media_type, charset, is_base64) = parse_content_type(";base64"); 39 | assert_eq!(media_type, "text/plain"); 40 | assert_eq!(charset, "US-ASCII"); 41 | assert!(is_base64); 42 | } 43 | 44 | #[test] 45 | fn text_html_base64() { 46 | let (media_type, charset, is_base64) = parse_content_type("text/html;base64"); 47 | assert_eq!(media_type, "text/html"); 48 | assert_eq!(charset, "US-ASCII"); 49 | assert!(is_base64); 50 | } 51 | 52 | #[test] 53 | fn only_media_type() { 54 | let (media_type, charset, is_base64) = parse_content_type("text/html"); 55 | assert_eq!(media_type, "text/html"); 56 | assert_eq!(charset, "US-ASCII"); 57 | assert!(!is_base64); 58 | } 59 | 60 | #[test] 61 | fn only_media_type_colon() { 62 | let (media_type, charset, is_base64) = parse_content_type("text/html;"); 63 | assert_eq!(media_type, "text/html"); 64 | assert_eq!(charset, "US-ASCII"); 65 | assert!(!is_base64); 66 | } 67 | 68 | #[test] 69 | fn media_type_gb2312_filename() { 70 | let (media_type, charset, is_base64) = 71 | parse_content_type("text/html;charset=GB2312;filename=index.html"); 72 | assert_eq!(media_type, "text/html"); 73 | assert_eq!(charset, "GB2312"); 74 | assert!(!is_base64); 75 | } 76 | 77 | #[test] 78 | fn media_type_filename_gb2312() { 79 | let (media_type, charset, is_base64) = 80 | parse_content_type("text/html;filename=index.html;charset=GB2312"); 81 | assert_eq!(media_type, "text/html"); 82 | assert_eq!(charset, "GB2312"); 83 | assert!(!is_base64); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /tests/css/is_image_url_prop.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::css; 11 | 12 | #[test] 13 | fn background() { 14 | assert!(css::is_image_url_prop("background")); 15 | } 16 | 17 | #[test] 18 | fn background_image() { 19 | assert!(css::is_image_url_prop("background-image")); 20 | } 21 | 22 | #[test] 23 | fn background_image_uppercase() { 24 | assert!(css::is_image_url_prop("BACKGROUND-IMAGE")); 25 | } 26 | 27 | #[test] 28 | fn border_image() { 29 | assert!(css::is_image_url_prop("border-image")); 30 | } 31 | 32 | #[test] 33 | fn content() { 34 | assert!(css::is_image_url_prop("content")); 35 | } 36 | 37 | #[test] 38 | fn cursor() { 39 | assert!(css::is_image_url_prop("cursor")); 40 | } 41 | 42 | #[test] 43 | fn list_style() { 44 | assert!(css::is_image_url_prop("list-style")); 45 | } 46 | 47 | #[test] 48 | fn list_style_image() { 49 | assert!(css::is_image_url_prop("list-style-image")); 50 | } 51 | 52 | #[test] 53 | fn mask_image() { 54 | assert!(css::is_image_url_prop("mask-image")); 55 | } 56 | } 57 | 58 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 59 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 60 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 61 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 62 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 63 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 64 | 65 | #[cfg(test)] 66 | mod failing { 67 | use monolith::css; 68 | 69 | #[test] 70 | fn empty() { 71 | assert!(!css::is_image_url_prop("")); 72 | } 73 | 74 | #[test] 75 | fn width() { 76 | assert!(!css::is_image_url_prop("width")); 77 | } 78 | 79 | #[test] 80 | fn color() { 81 | assert!(!css::is_image_url_prop("color")); 82 | } 83 | 84 | #[test] 85 | fn z_index() { 86 | assert!(!css::is_image_url_prop("z-index")); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /tests/css/mod.rs: -------------------------------------------------------------------------------- 1 | mod embed_css; 2 | mod is_image_url_prop; 3 | -------------------------------------------------------------------------------- /tests/html/add_favicon.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use html5ever::serialize::{serialize, SerializeOpts}; 11 | use markup5ever_rcdom::SerializableHandle; 12 | 13 | use monolith::html; 14 | 15 | #[test] 16 | fn basic() { 17 | let html = "
text
"; 18 | let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 19 | 20 | dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string()); 21 | 22 | let mut buf: Vec = Vec::new(); 23 | serialize( 24 | &mut buf, 25 | &SerializableHandle::from(dom.document.clone()), 26 | SerializeOpts::default(), 27 | ) 28 | .unwrap(); 29 | 30 | assert_eq!( 31 | buf.iter().map(|&c| c as char).collect::(), 32 | "
text
" 33 | ); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tests/html/check_integrity.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html; 11 | 12 | #[test] 13 | fn empty_input_sha256() { 14 | assert!(html::check_integrity( 15 | "".as_bytes(), 16 | "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=" 17 | )); 18 | } 19 | 20 | #[test] 21 | fn sha256() { 22 | assert!(html::check_integrity( 23 | "abcdef0123456789".as_bytes(), 24 | "sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM=" 25 | )); 26 | } 27 | 28 | #[test] 29 | fn sha384() { 30 | assert!(html::check_integrity( 31 | "abcdef0123456789".as_bytes(), 32 | "sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw" 33 | )); 34 | } 35 | 36 | #[test] 37 | fn sha512() { 38 | assert!(html::check_integrity( 39 | "abcdef0123456789".as_bytes(), 40 | "sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww==" 41 | )); 42 | } 43 | } 44 | 45 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 46 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 47 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 48 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 49 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 50 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 51 | 52 | #[cfg(test)] 53 | mod failing { 54 | use monolith::html; 55 | 56 | #[test] 57 | fn empty_hash() { 58 | assert!(!html::check_integrity("abcdef0123456789".as_bytes(), "")); 59 | } 60 | 61 | #[test] 62 | fn empty_input_empty_hash() { 63 | assert!(!html::check_integrity("".as_bytes(), "")); 64 | } 65 | 66 | #[test] 67 | fn sha256() { 68 | assert!(!html::check_integrity( 69 | "abcdef0123456789".as_bytes(), 70 | "sha256-badhash" 71 | )); 72 | } 73 | 74 | #[test] 75 | fn sha384() { 76 | assert!(!html::check_integrity( 77 | "abcdef0123456789".as_bytes(), 78 | "sha384-badhash" 79 | )); 80 | } 81 | 82 | #[test] 83 | fn sha512() { 84 | assert!(!html::check_integrity( 85 | "abcdef0123456789".as_bytes(), 86 | "sha512-badhash" 87 | )); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /tests/html/compose_csp.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::core::MonolithOptions; 11 | use monolith::html; 12 | 13 | #[test] 14 | fn isolated() { 15 | let mut options = MonolithOptions::default(); 16 | options.isolate = true; 17 | let csp_content = html::compose_csp(&options); 18 | 19 | assert_eq!( 20 | csp_content, 21 | "default-src 'unsafe-eval' 'unsafe-inline' data:;" 22 | ); 23 | } 24 | 25 | #[test] 26 | fn no_css() { 27 | let mut options = MonolithOptions::default(); 28 | options.no_css = true; 29 | let csp_content = html::compose_csp(&options); 30 | 31 | assert_eq!(csp_content, "style-src 'none';"); 32 | } 33 | 34 | #[test] 35 | fn no_fonts() { 36 | let mut options = MonolithOptions::default(); 37 | options.no_fonts = true; 38 | let csp_content = html::compose_csp(&options); 39 | 40 | assert_eq!(csp_content, "font-src 'none';"); 41 | } 42 | 43 | #[test] 44 | fn no_frames() { 45 | let mut options = MonolithOptions::default(); 46 | options.no_frames = true; 47 | let csp_content = html::compose_csp(&options); 48 | 49 | assert_eq!(csp_content, "frame-src 'none'; child-src 'none';"); 50 | } 51 | 52 | #[test] 53 | fn no_js() { 54 | let mut options = MonolithOptions::default(); 55 | options.no_js = true; 56 | let csp_content = html::compose_csp(&options); 57 | 58 | assert_eq!(csp_content, "script-src 'none';"); 59 | } 60 | 61 | #[test] 62 | fn no_images() { 63 | let mut options = MonolithOptions::default(); 64 | options.no_images = true; 65 | let csp_content = html::compose_csp(&options); 66 | 67 | assert_eq!(csp_content, "img-src data:;"); 68 | } 69 | 70 | #[test] 71 | fn all() { 72 | let mut options = MonolithOptions::default(); 73 | options.isolate = true; 74 | options.no_css = true; 75 | options.no_fonts = true; 76 | options.no_frames = true; 77 | options.no_js = true; 78 | options.no_images = true; 79 | let csp_content = html::compose_csp(&options); 80 | 81 | assert_eq!( 82 | csp_content, 83 | "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;" 84 | ); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /tests/html/create_metadata_tag.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use chrono::prelude::*; 11 | use reqwest::Url; 12 | 13 | use monolith::html; 14 | 15 | #[test] 16 | fn http_url() { 17 | let url: Url = Url::parse("http://192.168.1.1/").unwrap(); 18 | let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); 19 | let metadata_comment: String = html::create_metadata_tag(&url); 20 | 21 | assert_eq!( 22 | metadata_comment, 23 | format!( 24 | "", 25 | &url, 26 | timestamp, 27 | env!("CARGO_PKG_NAME"), 28 | env!("CARGO_PKG_VERSION"), 29 | ) 30 | ); 31 | } 32 | 33 | #[test] 34 | fn file_url() { 35 | let url: Url = Url::parse("file:///home/monolith/index.html").unwrap(); 36 | let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); 37 | let metadata_comment: String = html::create_metadata_tag(&url); 38 | 39 | assert_eq!( 40 | metadata_comment, 41 | format!( 42 | "", 43 | timestamp, 44 | env!("CARGO_PKG_NAME"), 45 | env!("CARGO_PKG_VERSION"), 46 | ) 47 | ); 48 | } 49 | 50 | #[test] 51 | fn data_url() { 52 | let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap(); 53 | let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); 54 | let metadata_comment: String = html::create_metadata_tag(&url); 55 | 56 | assert_eq!( 57 | metadata_comment, 58 | format!( 59 | "", 60 | timestamp, 61 | env!("CARGO_PKG_NAME"), 62 | env!("CARGO_PKG_VERSION"), 63 | ) 64 | ); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tests/html/embed_srcset.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::core::MonolithOptions; 13 | use monolith::html; 14 | use monolith::session::Session; 15 | use monolith::url::EMPTY_IMAGE_DATA_URL; 16 | 17 | #[test] 18 | fn small_medium_large() { 19 | let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x"; 20 | let mut options = MonolithOptions::default(); 21 | options.no_images = true; 22 | options.silent = true; 23 | let mut session: Session = Session::new(None, None, options); 24 | let embedded_css = 25 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 26 | 27 | assert_eq!( 28 | embedded_css, 29 | format!( 30 | "{dataurl} 1x, {dataurl} 1.5x, {dataurl} 2x", 31 | dataurl = EMPTY_IMAGE_DATA_URL, 32 | ), 33 | ); 34 | } 35 | 36 | #[test] 37 | fn small_medium_only_medium_has_scale() { 38 | let srcset_value = "small.png, medium.png 1.5x"; 39 | let mut options = MonolithOptions::default(); 40 | options.no_images = true; 41 | options.silent = true; 42 | let mut session: Session = Session::new(None, None, options); 43 | let embedded_css = 44 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 45 | 46 | assert_eq!( 47 | embedded_css, 48 | format!("{dataurl}, {dataurl} 1.5x", dataurl = EMPTY_IMAGE_DATA_URL), 49 | ); 50 | } 51 | 52 | #[test] 53 | fn commas_within_file_names() { 54 | let srcset_value = "small,s.png 1x, large,l.png 2x"; 55 | let mut options = MonolithOptions::default(); 56 | options.no_images = true; 57 | options.silent = true; 58 | let mut session: Session = Session::new(None, None, options); 59 | let embedded_css = 60 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 61 | 62 | assert_eq!( 63 | embedded_css, 64 | format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), 65 | ); 66 | } 67 | 68 | #[test] 69 | fn narrow_whitespaces_within_file_names() { 70 | let srcset_value = "small\u{202f}s.png 1x, large\u{202f}l.png 2x"; 71 | let mut options = MonolithOptions::default(); 72 | options.no_images = true; 73 | options.silent = true; 74 | let mut session: Session = Session::new(None, None, options); 75 | let embedded_css = 76 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 77 | 78 | assert_eq!( 79 | embedded_css, 80 | format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), 81 | ); 82 | } 83 | 84 | #[test] 85 | fn tabs_and_newlines_after_commas() { 86 | let srcset_value = "small-s.png 1x,\tmedium,m.png 2x,\nlarge-l.png 3x"; 87 | let mut options = MonolithOptions::default(); 88 | options.no_images = true; 89 | options.silent = true; 90 | let mut session: Session = Session::new(None, None, options); 91 | let embedded_css = 92 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 93 | 94 | assert_eq!( 95 | embedded_css, 96 | format!( 97 | "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", 98 | dataurl = EMPTY_IMAGE_DATA_URL 99 | ), 100 | ); 101 | } 102 | 103 | #[test] 104 | fn no_whitespace_after_commas() { 105 | let srcset_value = "small-s.png 1x,medium-m.png 2x,large-l.png 3x"; 106 | let mut options = MonolithOptions::default(); 107 | options.no_images = true; 108 | options.silent = true; 109 | let mut session: Session = Session::new(None, None, options); 110 | let embedded_css = 111 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 112 | 113 | assert_eq!( 114 | embedded_css, 115 | format!( 116 | "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", 117 | dataurl = EMPTY_IMAGE_DATA_URL 118 | ), 119 | ); 120 | } 121 | 122 | #[test] 123 | fn last_without_descriptor() { 124 | let srcset_value = "small-s.png 400w, medium-m.png 800w, large-l.png"; 125 | let mut options = MonolithOptions::default(); 126 | options.no_images = true; 127 | options.silent = true; 128 | let mut session: Session = Session::new(None, None, options); 129 | let embedded_css = 130 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 131 | 132 | assert_eq!( 133 | embedded_css, 134 | format!( 135 | "{dataurl} 400w, {dataurl} 800w, {dataurl}", 136 | dataurl = EMPTY_IMAGE_DATA_URL 137 | ), 138 | ); 139 | } 140 | } 141 | 142 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 143 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 144 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 145 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 146 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 147 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 148 | 149 | #[cfg(test)] 150 | mod failing { 151 | use reqwest::Url; 152 | 153 | use monolith::core::MonolithOptions; 154 | use monolith::html; 155 | use monolith::session::Session; 156 | use monolith::url::EMPTY_IMAGE_DATA_URL; 157 | 158 | #[test] 159 | fn trailing_comma() { 160 | let srcset_value = "small.png 1x, large.png 2x,"; 161 | let mut options = MonolithOptions::default(); 162 | options.no_images = true; 163 | options.silent = true; 164 | let mut session: Session = Session::new(None, None, options); 165 | let embedded_css = 166 | html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); 167 | 168 | assert_eq!( 169 | embedded_css, 170 | format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), 171 | ); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /tests/html/get_base_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html; 11 | 12 | #[test] 13 | fn present() { 14 | let html = " 15 | 16 | 17 | 18 | 19 | 20 | 21 | "; 22 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 23 | 24 | assert_eq!( 25 | html::get_base_url(&dom.document), 26 | Some("https://musicbrainz.org".to_string()) 27 | ); 28 | } 29 | 30 | #[test] 31 | fn multiple_tags() { 32 | let html = " 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | "; 41 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 42 | 43 | assert_eq!( 44 | html::get_base_url(&dom.document), 45 | Some("https://www.discogs.com/".to_string()) 46 | ); 47 | } 48 | } 49 | 50 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 51 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 52 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 53 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 54 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 55 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 56 | 57 | #[cfg(test)] 58 | mod failing { 59 | use monolith::html; 60 | 61 | #[test] 62 | fn absent() { 63 | let html = " 64 | 65 | 66 | 67 | 68 | 69 | "; 70 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 71 | 72 | assert_eq!(html::get_base_url(&dom.document), None); 73 | } 74 | 75 | #[test] 76 | fn no_href() { 77 | let html = " 78 | 79 | 80 | 81 | 82 | 83 | 84 | "; 85 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 86 | 87 | assert_eq!(html::get_base_url(&dom.document), None); 88 | } 89 | 90 | #[test] 91 | fn empty_href() { 92 | let html = " 93 | 94 | 95 | 96 | 97 | 98 | 99 | "; 100 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 101 | 102 | assert_eq!(html::get_base_url(&dom.document), Some("".to_string())); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /tests/html/get_charset.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html; 11 | 12 | #[test] 13 | fn meta_content_type() { 14 | let html = " 15 | 16 | 17 | 18 | 19 | 20 | 21 | "; 22 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 23 | 24 | assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); 25 | } 26 | 27 | #[test] 28 | fn meta_charset() { 29 | let html = " 30 | 31 | 32 | 33 | 34 | 35 | 36 | "; 37 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 38 | 39 | assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); 40 | } 41 | 42 | #[test] 43 | fn multiple_conflicting_meta_charset_first() { 44 | let html = " 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | "; 53 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 54 | 55 | assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string())); 56 | } 57 | #[test] 58 | fn multiple_conflicting_meta_content_type_first() { 59 | let html = " 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | "; 68 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 69 | 70 | assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /tests/html/get_node_attr.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use markup5ever_rcdom::{Handle, NodeData}; 11 | 12 | use monolith::html; 13 | 14 | #[test] 15 | fn div_two_style_attributes() { 16 | let html = "
"; 17 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 18 | let mut count = 0; 19 | 20 | fn test_walk(node: &Handle, i: &mut i8) { 21 | *i += 1; 22 | 23 | match &node.data { 24 | NodeData::Document => { 25 | // Dig deeper 26 | for child in node.children.borrow().iter() { 27 | test_walk(child, &mut *i); 28 | } 29 | } 30 | NodeData::Element { name, .. } => { 31 | let node_name = name.local.as_ref().to_string(); 32 | 33 | if node_name == "body" { 34 | assert_eq!(html::get_node_attr(node, "class"), None); 35 | } else if node_name == "div" { 36 | assert_eq!( 37 | html::get_node_attr(node, "style"), 38 | Some("color: blue;".to_string()) 39 | ); 40 | } 41 | 42 | for child in node.children.borrow().iter() { 43 | test_walk(child, &mut *i); 44 | } 45 | } 46 | _ => (), 47 | }; 48 | } 49 | 50 | test_walk(&dom.document, &mut count); 51 | 52 | assert_eq!(count, 6); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /tests/html/get_node_name.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use markup5ever_rcdom::{Handle, NodeData}; 11 | 12 | use monolith::html; 13 | 14 | #[test] 15 | fn parent_node_names() { 16 | let html = "

"; 17 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 18 | let mut count = 0; 19 | 20 | fn test_walk(node: &Handle, i: &mut i8) { 21 | *i += 1; 22 | 23 | match &node.data { 24 | NodeData::Document => { 25 | for child in node.children.borrow().iter() { 26 | test_walk(child, &mut *i); 27 | } 28 | } 29 | NodeData::Element { name, .. } => { 30 | let node_name = name.local.as_ref().to_string(); 31 | let parent = html::get_parent_node(node); 32 | let parent_node_name = html::get_node_name(&parent); 33 | if node_name == "head" || node_name == "body" { 34 | assert_eq!(parent_node_name, Some("html")); 35 | } else if node_name == "div" { 36 | assert_eq!(parent_node_name, Some("body")); 37 | } else if node_name == "p" { 38 | assert_eq!(parent_node_name, Some("div")); 39 | } 40 | 41 | for child in node.children.borrow().iter() { 42 | test_walk(child, &mut *i); 43 | } 44 | } 45 | _ => (), 46 | }; 47 | } 48 | 49 | test_walk(&dom.document, &mut count); 50 | 51 | assert_eq!(count, 7); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/html/has_favicon.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html; 11 | 12 | #[test] 13 | fn icon() { 14 | let html = r#"
text
"#; 15 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 16 | let res: bool = html::has_favicon(&dom.document); 17 | 18 | assert!(res); 19 | } 20 | 21 | #[test] 22 | fn shortcut_icon() { 23 | let html = r#"
text
"#; 24 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 25 | let res: bool = html::has_favicon(&dom.document); 26 | 27 | assert!(res); 28 | } 29 | } 30 | 31 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 32 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 33 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 34 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 35 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 36 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 37 | 38 | #[cfg(test)] 39 | mod failing { 40 | use monolith::html; 41 | 42 | #[test] 43 | fn absent() { 44 | let html = "
text
"; 45 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 46 | let res: bool = html::has_favicon(&dom.document); 47 | 48 | assert!(!res); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tests/html/is_favicon.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html::is_favicon; 11 | 12 | #[test] 13 | fn icon() { 14 | assert!(is_favicon("icon")); 15 | } 16 | 17 | #[test] 18 | fn shortcut_icon_capitalized() { 19 | assert!(is_favicon("Shortcut Icon")); 20 | } 21 | 22 | #[test] 23 | fn icon_uppercase() { 24 | assert!(is_favicon("ICON")); 25 | } 26 | } 27 | 28 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 29 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 30 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 31 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 32 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 33 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 34 | 35 | #[cfg(test)] 36 | mod failing { 37 | use monolith::html::is_favicon; 38 | 39 | #[test] 40 | fn apple_touch_icon() { 41 | assert!(!is_favicon("apple-touch-icon")); 42 | } 43 | 44 | #[test] 45 | fn mask_icon() { 46 | assert!(!is_favicon("mask-icon")); 47 | } 48 | 49 | #[test] 50 | fn fluid_icon() { 51 | assert!(!is_favicon("fluid-icon")); 52 | } 53 | 54 | #[test] 55 | fn stylesheet() { 56 | assert!(!is_favicon("stylesheet")); 57 | } 58 | 59 | #[test] 60 | fn empty_string() { 61 | assert!(!is_favicon("")); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/html/mod.rs: -------------------------------------------------------------------------------- 1 | mod add_favicon; 2 | mod check_integrity; 3 | mod compose_csp; 4 | mod create_metadata_tag; 5 | mod embed_srcset; 6 | mod get_base_url; 7 | mod get_charset; 8 | mod get_node_attr; 9 | mod get_node_name; 10 | mod has_favicon; 11 | mod is_favicon; 12 | mod parse_link_type; 13 | mod parse_srcset; 14 | mod serialize_document; 15 | mod set_node_attr; 16 | mod walk; 17 | -------------------------------------------------------------------------------- /tests/html/parse_link_type.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html; 11 | 12 | #[test] 13 | fn icon() { 14 | assert!(html::parse_link_type("icon").contains(&html::LinkType::Favicon)); 15 | } 16 | 17 | #[test] 18 | fn shortcut_icon_capitalized() { 19 | assert!(html::parse_link_type("Shortcut Icon").contains(&html::LinkType::Favicon)); 20 | } 21 | 22 | #[test] 23 | fn stylesheet() { 24 | assert!(html::parse_link_type("stylesheet").contains(&html::LinkType::Stylesheet)); 25 | } 26 | 27 | #[test] 28 | fn preload_stylesheet() { 29 | assert!(html::parse_link_type("preload stylesheet").contains(&html::LinkType::Stylesheet)); 30 | } 31 | 32 | #[test] 33 | fn apple_touch_icon() { 34 | assert!(html::parse_link_type("apple-touch-icon").contains(&html::LinkType::AppleTouchIcon)); 35 | } 36 | } 37 | 38 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 39 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 40 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 41 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 42 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 43 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 44 | 45 | #[cfg(test)] 46 | mod failing { 47 | use monolith::html; 48 | 49 | #[test] 50 | fn mask_icon() { 51 | assert!(html::parse_link_type("mask-icon").is_empty()); 52 | } 53 | 54 | #[test] 55 | fn fluid_icon() { 56 | assert!(html::parse_link_type("fluid-icon").is_empty()); 57 | } 58 | 59 | #[test] 60 | fn empty_string() { 61 | assert!(html::parse_link_type("").is_empty()); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/html/parse_srcset.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::html::{parse_srcset, SrcSetItem}; 11 | 12 | #[test] 13 | fn three_items_with_width_descriptors_and_newlines() { 14 | let srcset = r#"https://some-site.com/width/600/https://media2.some-site.com/2021/07/some-image-073362.jpg 600w, 15 | https://some-site.com/width/960/https://media2.some-site.com/2021/07/some-image-073362.jpg 960w, 16 | https://some-site.com/width/1200/https://media2.some-site.com/2021/07/some-image-073362.jpg 1200w"#; 17 | let srcset_items: Vec = parse_srcset(srcset); 18 | 19 | assert_eq!(srcset_items.len(), 3); 20 | assert_eq!(srcset_items[0].path, "https://some-site.com/width/600/https://media2.some-site.com/2021/07/some-image-073362.jpg"); 21 | assert_eq!(srcset_items[0].descriptor, "600w"); 22 | assert_eq!(srcset_items[1].path, "https://some-site.com/width/960/https://media2.some-site.com/2021/07/some-image-073362.jpg"); 23 | assert_eq!(srcset_items[1].descriptor, "960w"); 24 | assert_eq!(srcset_items[2].path, "https://some-site.com/width/1200/https://media2.some-site.com/2021/07/some-image-073362.jpg"); 25 | assert_eq!(srcset_items[2].descriptor, "1200w"); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tests/html/serialize_document.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::core::MonolithOptions; 11 | use monolith::html; 12 | 13 | #[test] 14 | fn div_as_root_element() { 15 | let html = "
"; 16 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 17 | let options = MonolithOptions::default(); 18 | 19 | assert_eq!( 20 | String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), 21 | "
" 22 | ); 23 | } 24 | 25 | #[test] 26 | fn full_page_with_no_html_head_or_body() { 27 | let html = "Isolated document\ 28 | \ 29 | \ 30 |
"; 31 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 32 | let mut options = MonolithOptions::default(); 33 | options.isolate = true; 34 | 35 | assert_eq!( 36 | String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), 37 | "\ 38 | \ 39 | \ 40 | Isolated document\ 41 | \ 42 | \ 43 | \ 44 | \ 45 |
\ 46 | \ 47 |
\ 48 | \ 49 | " 50 | ); 51 | } 52 | 53 | #[test] 54 | fn doctype_and_the_rest_no_html_head_or_body() { 55 | let html = "\ 56 | Unstyled document\ 57 | \ 58 |
"; 59 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 60 | let mut options = MonolithOptions::default(); 61 | options.no_css = true; 62 | 63 | assert_eq!( 64 | String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), 65 | "\ 66 | \ 67 | \ 68 | \ 69 | Unstyled document\ 70 | \ 71 | \ 72 |
\ 73 | " 74 | ); 75 | } 76 | 77 | #[test] 78 | fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() { 79 | let html = "\ 80 | Frameless document\ 81 | \ 82 |
"; 83 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 84 | let mut options = MonolithOptions::default(); 85 | options.no_frames = true; 86 | 87 | assert_eq!( 88 | String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), 89 | "\ 90 | \ 91 | \ 92 | \ 93 | Frameless document\ 94 | \ 95 | \ 96 |
\ 97 | " 98 | ); 99 | } 100 | 101 | #[test] 102 | fn doctype_and_the_rest_all_forbidden() { 103 | let html = "\ 104 | no-frame no-css no-js no-image isolated document\ 105 | \ 106 | \ 107 |
\ 108 | \ 109 | \ 110 | \ 111 |
"; 112 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 113 | let mut options = MonolithOptions::default(); 114 | options.isolate = true; 115 | options.no_css = true; 116 | options.no_fonts = true; 117 | options.no_frames = true; 118 | options.no_js = true; 119 | options.no_images = true; 120 | 121 | assert_eq!( 122 | String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), 123 | "\ 124 | \ 125 | \ 126 | \ 127 | no-frame no-css no-js no-image isolated document\ 128 | \ 129 | \ 130 | \ 131 | \ 132 |
\ 133 | \ 134 | \ 135 | \ 136 |
\ 137 | \ 138 | " 139 | ); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /tests/html/set_node_attr.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use markup5ever_rcdom::{Handle, NodeData}; 11 | 12 | use monolith::html; 13 | 14 | #[test] 15 | fn html_lang_and_body_style() { 16 | let html = ""; 17 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 18 | let mut count = 0; 19 | 20 | fn test_walk(node: &Handle, i: &mut i8) { 21 | *i += 1; 22 | 23 | match &node.data { 24 | NodeData::Document => { 25 | // Dig deeper 26 | for child in node.children.borrow().iter() { 27 | test_walk(child, &mut *i); 28 | } 29 | } 30 | NodeData::Element { name, .. } => { 31 | let node_name = name.local.as_ref().to_string(); 32 | 33 | if node_name == "html" { 34 | assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string())); 35 | 36 | html::set_node_attr(node, "lang", Some("de".to_string())); 37 | assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string())); 38 | 39 | html::set_node_attr(node, "lang", None); 40 | assert_eq!(html::get_node_attr(node, "lang"), None); 41 | 42 | html::set_node_attr(node, "lang", Some("".to_string())); 43 | assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string())); 44 | } else if node_name == "body" { 45 | assert_eq!(html::get_node_attr(node, "style"), None); 46 | 47 | html::set_node_attr(node, "style", Some("display: none;".to_string())); 48 | assert_eq!( 49 | html::get_node_attr(node, "style"), 50 | Some("display: none;".to_string()) 51 | ); 52 | } 53 | 54 | for child in node.children.borrow().iter() { 55 | test_walk(child, &mut *i); 56 | } 57 | } 58 | _ => (), 59 | }; 60 | } 61 | 62 | test_walk(&dom.document, &mut count); 63 | 64 | assert_eq!(count, 5); 65 | } 66 | 67 | #[test] 68 | fn body_background() { 69 | let html = ""; 70 | let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); 71 | let mut count = 0; 72 | 73 | fn test_walk(node: &Handle, i: &mut i8) { 74 | *i += 1; 75 | 76 | match &node.data { 77 | NodeData::Document => { 78 | // Dig deeper 79 | for child in node.children.borrow().iter() { 80 | test_walk(child, &mut *i); 81 | } 82 | } 83 | NodeData::Element { name, .. } => { 84 | let node_name = name.local.as_ref().to_string(); 85 | 86 | if node_name == "body" { 87 | assert_eq!( 88 | html::get_node_attr(node, "background"), 89 | Some("1".to_string()) 90 | ); 91 | 92 | html::set_node_attr(node, "background", None); 93 | assert_eq!(html::get_node_attr(node, "background"), None); 94 | } 95 | 96 | for child in node.children.borrow().iter() { 97 | test_walk(child, &mut *i); 98 | } 99 | } 100 | _ => (), 101 | }; 102 | } 103 | 104 | test_walk(&dom.document, &mut count); 105 | 106 | assert_eq!(count, 5); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /tests/js/attr_is_event_handler.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::js; 11 | 12 | #[test] 13 | fn onblur_camelcase() { 14 | assert!(js::attr_is_event_handler("onBlur")); 15 | } 16 | 17 | #[test] 18 | fn onclick_lowercase() { 19 | assert!(js::attr_is_event_handler("onclick")); 20 | } 21 | 22 | #[test] 23 | fn onclick_camelcase() { 24 | assert!(js::attr_is_event_handler("onClick")); 25 | } 26 | } 27 | 28 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 29 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 30 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 31 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 32 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 33 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 34 | 35 | #[cfg(test)] 36 | mod failing { 37 | use monolith::js; 38 | 39 | #[test] 40 | fn href() { 41 | assert!(!js::attr_is_event_handler("href")); 42 | } 43 | 44 | #[test] 45 | fn empty_string() { 46 | assert!(!js::attr_is_event_handler("")); 47 | } 48 | 49 | #[test] 50 | fn class() { 51 | assert!(!js::attr_is_event_handler("class")); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/js/mod.rs: -------------------------------------------------------------------------------- 1 | mod attr_is_event_handler; 2 | -------------------------------------------------------------------------------- /tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod cli; 2 | mod cookies; 3 | mod core; 4 | mod css; 5 | mod html; 6 | mod js; 7 | mod session; 8 | mod url; 9 | -------------------------------------------------------------------------------- /tests/session/mod.rs: -------------------------------------------------------------------------------- 1 | mod retrieve_asset; 2 | -------------------------------------------------------------------------------- /tests/session/retrieve_asset.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | use std::env; 12 | 13 | use monolith::core::MonolithOptions; 14 | use monolith::session::Session; 15 | use monolith::url; 16 | 17 | #[test] 18 | fn read_data_url() { 19 | let mut options = MonolithOptions::default(); 20 | options.silent = true; 21 | 22 | let mut session: Session = Session::new(None, None, options); 23 | 24 | // If both source and target are data URLs, 25 | // ensure the result contains target data URL 26 | let (data, final_url, media_type, charset) = session 27 | .retrieve_asset( 28 | &Url::parse("data:text/html;base64,c291cmNl").unwrap(), 29 | &Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), 30 | ) 31 | .unwrap(); 32 | assert_eq!(&media_type, "text/html"); 33 | assert_eq!(&charset, "US-ASCII"); 34 | assert_eq!( 35 | url::create_data_url(&media_type, &charset, &data, &final_url), 36 | Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), 37 | ); 38 | assert_eq!( 39 | final_url, 40 | Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), 41 | ); 42 | } 43 | 44 | #[test] 45 | fn read_local_file_with_file_url_parent() { 46 | let mut options = MonolithOptions::default(); 47 | options.silent = true; 48 | 49 | let mut session: Session = Session::new(None, None, options); 50 | 51 | let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; 52 | 53 | // Inclusion of local assets from local sources should be allowed 54 | let cwd = env::current_dir().unwrap(); 55 | let (data, final_url, media_type, charset) = session 56 | .retrieve_asset( 57 | &Url::parse(&format!( 58 | "{file}{cwd}/tests/_data_/basic/local-file.html", 59 | file = file_url_protocol, 60 | cwd = cwd.to_str().unwrap() 61 | )) 62 | .unwrap(), 63 | &Url::parse(&format!( 64 | "{file}{cwd}/tests/_data_/basic/local-script.js", 65 | file = file_url_protocol, 66 | cwd = cwd.to_str().unwrap() 67 | )) 68 | .unwrap(), 69 | ) 70 | .unwrap(); 71 | assert_eq!(&media_type, "text/javascript"); 72 | assert_eq!(&charset, ""); 73 | let data_url = "data:text/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="; 74 | assert_eq!( 75 | url::create_data_url(&media_type, &charset, &data, &final_url), 76 | Url::parse(data_url).unwrap() 77 | ); 78 | assert_eq!( 79 | final_url, 80 | Url::parse(&format!( 81 | "{file}{cwd}/tests/_data_/basic/local-script.js", 82 | file = file_url_protocol, 83 | cwd = cwd.to_str().unwrap() 84 | )) 85 | .unwrap() 86 | ); 87 | } 88 | } 89 | 90 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 91 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 92 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 93 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 94 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 95 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 96 | 97 | #[cfg(test)] 98 | mod failing { 99 | use reqwest::Url; 100 | 101 | use monolith::core::MonolithOptions; 102 | use monolith::session::Session; 103 | 104 | #[test] 105 | fn read_local_file_with_data_url_parent() { 106 | let mut options = MonolithOptions::default(); 107 | options.silent = true; 108 | 109 | let mut session: Session = Session::new(None, None, options); 110 | 111 | // Inclusion of local assets from data URL sources should not be allowed 112 | match session.retrieve_asset( 113 | &Url::parse("data:text/html;base64,SoUrCe").unwrap(), 114 | &Url::parse("file:///etc/passwd").unwrap(), 115 | ) { 116 | Ok((..)) => { 117 | assert!(false); 118 | } 119 | Err(_) => { 120 | assert!(true); 121 | } 122 | } 123 | } 124 | 125 | #[test] 126 | fn read_local_file_with_https_parent() { 127 | let mut options = MonolithOptions::default(); 128 | options.silent = true; 129 | 130 | let mut session: Session = Session::new(None, None, options); 131 | 132 | // Inclusion of local assets from remote sources should not be allowed 133 | match session.retrieve_asset( 134 | &Url::parse("https://kernel.org/").unwrap(), 135 | &Url::parse("file:///etc/passwd").unwrap(), 136 | ) { 137 | Ok((..)) => { 138 | assert!(false); 139 | } 140 | Err(_) => { 141 | assert!(true); 142 | } 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /tests/url/clean_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::url; 13 | 14 | #[test] 15 | fn preserve_original() { 16 | let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap(); 17 | 18 | let clean_u: Url = url::clean_url(u.clone()); 19 | 20 | assert_eq!(clean_u.as_str(), "https://somewhere.com/font.eot"); 21 | assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix"); 22 | } 23 | 24 | #[test] 25 | fn removes_fragment() { 26 | assert_eq!( 27 | url::clean_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()).as_str(), 28 | "https://somewhere.com/font.eot" 29 | ); 30 | } 31 | 32 | #[test] 33 | fn removes_empty_fragment() { 34 | assert_eq!( 35 | url::clean_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(), 36 | "https://somewhere.com/font.eot" 37 | ); 38 | } 39 | 40 | #[test] 41 | fn removes_empty_fragment_and_keeps_empty_query() { 42 | assert_eq!( 43 | url::clean_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(), 44 | "https://somewhere.com/font.eot?" 45 | ); 46 | } 47 | 48 | #[test] 49 | fn removes_empty_fragment_and_keeps_query() { 50 | assert_eq!( 51 | url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(), 52 | "https://somewhere.com/font.eot?a=b&" 53 | ); 54 | } 55 | 56 | #[test] 57 | fn keeps_credentials() { 58 | assert_eq!( 59 | url::clean_url(Url::parse("https://cookie:monster@gibson.internet/").unwrap()).as_str(), 60 | "https://cookie:monster@gibson.internet/" 61 | ); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/url/create_data_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::url; 13 | 14 | #[test] 15 | fn encode_string_with_specific_media_type() { 16 | let media_type = "application/javascript"; 17 | let data = "var word = 'hello';\nalert(word);\n"; 18 | let data_url = url::create_data_url( 19 | media_type, 20 | "", 21 | data.as_bytes(), 22 | &Url::parse("data:,").unwrap(), 23 | ); 24 | 25 | assert_eq!( 26 | data_url.as_str(), 27 | "data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK" 28 | ); 29 | } 30 | 31 | #[test] 32 | fn encode_append_fragment() { 33 | let data = "\n"; 34 | let data_url = url::create_data_url( 35 | "image/svg+xml", 36 | "", 37 | data.as_bytes(), 38 | &Url::parse("data:,").unwrap(), 39 | ); 40 | 41 | assert_eq!( 42 | data_url.as_str(), 43 | "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K" 44 | ); 45 | } 46 | 47 | #[test] 48 | fn encode_string_with_specific_media_type_and_charset() { 49 | let media_type = "application/javascript"; 50 | let charset = "utf8"; 51 | let data = "var word = 'hello';\nalert(word);\n"; 52 | let data_url = url::create_data_url( 53 | media_type, 54 | charset, 55 | data.as_bytes(), 56 | &Url::parse("data:,").unwrap(), 57 | ); 58 | 59 | assert_eq!( 60 | data_url.as_str(), 61 | "data:application/javascript;charset=utf8;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK" 62 | ); 63 | } 64 | 65 | #[test] 66 | fn create_data_url_with_us_ascii_charset() { 67 | let media_type = ""; 68 | let charset = "us-ascii"; 69 | let data = ""; 70 | let data_url = url::create_data_url( 71 | media_type, 72 | charset, 73 | data.as_bytes(), 74 | &Url::parse("data:,").unwrap(), 75 | ); 76 | 77 | assert_eq!(data_url.as_str(), "data:;base64,"); 78 | } 79 | 80 | #[test] 81 | fn create_data_url_with_utf8_charset() { 82 | let media_type = ""; 83 | let charset = "utf8"; 84 | let data = ""; 85 | let data_url = url::create_data_url( 86 | media_type, 87 | charset, 88 | data.as_bytes(), 89 | &Url::parse("data:,").unwrap(), 90 | ); 91 | 92 | assert_eq!(data_url.as_str(), "data:;charset=utf8;base64,"); 93 | } 94 | 95 | #[test] 96 | fn create_data_url_with_media_type_text_plain_and_utf8_charset() { 97 | let media_type = "text/plain"; 98 | let charset = "utf8"; 99 | let data = ""; 100 | let data_url = url::create_data_url( 101 | media_type, 102 | charset, 103 | data.as_bytes(), 104 | &Url::parse("data:,").unwrap(), 105 | ); 106 | 107 | assert_eq!(data_url.as_str(), "data:text/plain;charset=utf8;base64,"); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /tests/url/domain_is_within_domain.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::url::domain_is_within_domain; 11 | 12 | #[test] 13 | fn sub_domain_is_within_dotted_sub_domain() { 14 | assert!(domain_is_within_domain( 15 | "news.ycombinator.com", 16 | ".news.ycombinator.com" 17 | )); 18 | } 19 | 20 | #[test] 21 | fn domain_is_within_dotted_domain() { 22 | assert!(domain_is_within_domain( 23 | "ycombinator.com", 24 | ".ycombinator.com" 25 | )); 26 | } 27 | 28 | #[test] 29 | fn sub_domain_is_within_dotted_domain() { 30 | assert!(domain_is_within_domain( 31 | "news.ycombinator.com", 32 | ".ycombinator.com" 33 | )); 34 | } 35 | 36 | #[test] 37 | fn sub_domain_is_within_dotted_top_level_domain() { 38 | assert!(domain_is_within_domain("news.ycombinator.com", ".com")); 39 | } 40 | 41 | #[test] 42 | fn domain_is_within_itself() { 43 | assert!(domain_is_within_domain( 44 | "ycombinator.com", 45 | "ycombinator.com" 46 | )); 47 | } 48 | 49 | #[test] 50 | fn domain_with_trailing_dot_is_within_itself() { 51 | assert!(domain_is_within_domain( 52 | "ycombinator.com.", 53 | "ycombinator.com" 54 | )); 55 | } 56 | 57 | #[test] 58 | fn domain_with_trailing_dot_is_within_single_dot() { 59 | assert!(domain_is_within_domain("ycombinator.com.", ".")); 60 | } 61 | 62 | #[test] 63 | fn domain_matches_single_dot() { 64 | assert!(domain_is_within_domain("ycombinator.com", ".")); 65 | } 66 | 67 | #[test] 68 | fn dotted_domain_must_be_within_dotted_domain() { 69 | assert!(domain_is_within_domain( 70 | ".ycombinator.com", 71 | ".ycombinator.com" 72 | )); 73 | } 74 | 75 | #[test] 76 | fn empty_is_within_dot() { 77 | assert!(domain_is_within_domain("", ".")); 78 | } 79 | 80 | #[test] 81 | fn both_dots() { 82 | assert!(domain_is_within_domain(".", ".")); 83 | } 84 | } 85 | 86 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 87 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 88 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 89 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 90 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 91 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 92 | 93 | #[cfg(test)] 94 | mod failing { 95 | use monolith::url::domain_is_within_domain; 96 | 97 | #[test] 98 | fn sub_domain_must_not_be_within_domain() { 99 | assert!(!domain_is_within_domain( 100 | "news.ycombinator.com", 101 | "ycombinator.com" 102 | )); 103 | } 104 | 105 | #[test] 106 | fn domain_must_not_be_within_top_level_domain() { 107 | assert!(!domain_is_within_domain("ycombinator.com", "com")); 108 | } 109 | 110 | #[test] 111 | fn different_domains_must_not_be_within_one_another() { 112 | assert!(!domain_is_within_domain( 113 | "news.ycombinator.com", 114 | "kernel.org" 115 | )); 116 | } 117 | 118 | #[test] 119 | fn sub_domain_is_not_within_wrong_top_level_domain() { 120 | assert!(!domain_is_within_domain("news.ycombinator.com", "org")); 121 | } 122 | 123 | #[test] 124 | fn dotted_domain_is_not_within_domain() { 125 | assert!(!domain_is_within_domain( 126 | ".ycombinator.com", 127 | "ycombinator.com" 128 | )); 129 | } 130 | 131 | #[test] 132 | fn different_domain_is_not_within_dotted_domain() { 133 | assert!(!domain_is_within_domain( 134 | "www.doodleoptimize.com", 135 | ".ycombinator.com" 136 | )); 137 | } 138 | 139 | #[test] 140 | fn no_domain_can_be_within_empty_domain() { 141 | assert!(!domain_is_within_domain("ycombinator.com", "")); 142 | } 143 | 144 | #[test] 145 | fn both_can_not_be_empty() { 146 | assert!(!domain_is_within_domain("", "")); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /tests/url/get_referer_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::url; 13 | 14 | #[test] 15 | fn preserve_original() { 16 | let original_url: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap(); 17 | let referer_url: Url = url::get_referer_url(original_url.clone()); 18 | assert_eq!(referer_url.as_str(), "https://somewhere.com/font.eot"); 19 | assert_eq!( 20 | original_url.as_str(), 21 | "https://somewhere.com/font.eot#iefix" 22 | ); 23 | } 24 | 25 | #[test] 26 | fn removes_fragment() { 27 | assert_eq!( 28 | url::get_referer_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()) 29 | .as_str(), 30 | "https://somewhere.com/font.eot" 31 | ); 32 | } 33 | 34 | #[test] 35 | fn removes_empty_fragment() { 36 | assert_eq!( 37 | url::get_referer_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(), 38 | "https://somewhere.com/font.eot" 39 | ); 40 | } 41 | 42 | #[test] 43 | fn removes_empty_fragment_and_keeps_empty_query() { 44 | assert_eq!( 45 | url::get_referer_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(), 46 | "https://somewhere.com/font.eot?" 47 | ); 48 | } 49 | 50 | #[test] 51 | fn removes_empty_fragment_and_keeps_query() { 52 | assert_eq!( 53 | url::get_referer_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()) 54 | .as_str(), 55 | "https://somewhere.com/font.eot?a=b&" 56 | ); 57 | } 58 | 59 | #[test] 60 | fn removes_credentials() { 61 | assert_eq!( 62 | url::get_referer_url(Url::parse("https://cookie:monster@gibson.lan/path").unwrap()) 63 | .as_str(), 64 | "https://gibson.lan/path" 65 | ); 66 | } 67 | 68 | #[test] 69 | fn removes_empty_credentials() { 70 | assert_eq!( 71 | url::get_referer_url(Url::parse("https://@gibson.lan/path").unwrap()).as_str(), 72 | "https://gibson.lan/path" 73 | ); 74 | } 75 | 76 | #[test] 77 | fn removes_empty_username_credentials() { 78 | assert_eq!( 79 | url::get_referer_url(Url::parse("https://:monster@gibson.lan/path").unwrap()).as_str(), 80 | "https://gibson.lan/path" 81 | ); 82 | } 83 | 84 | #[test] 85 | fn removes_empty_password_credentials() { 86 | assert_eq!( 87 | url::get_referer_url(Url::parse("https://cookie@gibson.lan/path").unwrap()).as_str(), 88 | "https://gibson.lan/path" 89 | ); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /tests/url/is_url_and_has_protocol.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use monolith::url; 11 | 12 | #[test] 13 | fn mailto() { 14 | assert!(url::is_url_and_has_protocol( 15 | "mailto:somebody@somewhere.com?subject=hello" 16 | )); 17 | } 18 | 19 | #[test] 20 | fn tel() { 21 | assert!(url::is_url_and_has_protocol("tel:5551234567")); 22 | } 23 | 24 | #[test] 25 | fn ftp_no_slashes() { 26 | assert!(url::is_url_and_has_protocol("ftp:some-ftp-server.com")); 27 | } 28 | 29 | #[test] 30 | fn ftp_with_credentials() { 31 | assert!(url::is_url_and_has_protocol( 32 | "ftp://user:password@some-ftp-server.com" 33 | )); 34 | } 35 | 36 | #[test] 37 | fn javascript() { 38 | assert!(url::is_url_and_has_protocol("javascript:void(0)")); 39 | } 40 | 41 | #[test] 42 | fn http() { 43 | assert!(url::is_url_and_has_protocol("http://news.ycombinator.com")); 44 | } 45 | 46 | #[test] 47 | fn https() { 48 | assert!(url::is_url_and_has_protocol("https://github.com")); 49 | } 50 | 51 | #[test] 52 | fn file() { 53 | assert!(url::is_url_and_has_protocol("file:///tmp/image.png")); 54 | } 55 | 56 | #[test] 57 | fn mailto_uppercase() { 58 | assert!(url::is_url_and_has_protocol( 59 | "MAILTO:somebody@somewhere.com?subject=hello" 60 | )); 61 | } 62 | 63 | #[test] 64 | fn empty_data_url() { 65 | assert!(url::is_url_and_has_protocol("data:text/html,")); 66 | } 67 | 68 | #[test] 69 | fn empty_data_url_surrounded_by_spaces() { 70 | assert!(url::is_url_and_has_protocol(" data:text/html, ")); 71 | } 72 | } 73 | 74 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 75 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 76 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 77 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 78 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 79 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 80 | 81 | #[cfg(test)] 82 | mod failing { 83 | use monolith::url; 84 | 85 | #[test] 86 | fn url_with_no_protocol() { 87 | assert!(!url::is_url_and_has_protocol( 88 | "//some-hostname.com/some-file.html" 89 | )); 90 | } 91 | 92 | #[test] 93 | fn relative_path() { 94 | assert!(!url::is_url_and_has_protocol( 95 | "some-hostname.com/some-file.html" 96 | )); 97 | } 98 | 99 | #[test] 100 | fn relative_to_root_path() { 101 | assert!(!url::is_url_and_has_protocol("/some-file.html")); 102 | } 103 | 104 | #[test] 105 | fn empty_string() { 106 | assert!(!url::is_url_and_has_protocol("")); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /tests/url/mod.rs: -------------------------------------------------------------------------------- 1 | mod clean_url; 2 | mod create_data_url; 3 | mod domain_is_within_domain; 4 | mod get_referer_url; 5 | mod is_url_and_has_protocol; 6 | mod parse_data_url; 7 | mod resolve_url; 8 | -------------------------------------------------------------------------------- /tests/url/parse_data_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::url; 13 | 14 | #[test] 15 | fn parse_text_html_base64() { 16 | let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap()); 17 | 18 | assert_eq!(media_type, "text/html"); 19 | assert_eq!(charset, "US-ASCII"); 20 | assert_eq!( 21 | String::from_utf8_lossy(&data), 22 | "Work expands so as to fill the time available for its completion" 23 | ); 24 | } 25 | 26 | #[test] 27 | fn parse_text_html_utf8() { 28 | let (media_type, charset, data) = url::parse_data_url( 29 | &Url::parse("data:text/html;charset=utf8,Work expands so as to fill the time available for its completion").unwrap(), 30 | ); 31 | 32 | assert_eq!(media_type, "text/html"); 33 | assert_eq!(charset, "utf8"); 34 | assert_eq!( 35 | String::from_utf8_lossy(&data), 36 | "Work expands so as to fill the time available for its completion" 37 | ); 38 | } 39 | 40 | #[test] 41 | fn parse_text_html_plaintext() { 42 | let (media_type, charset, data) = url::parse_data_url( 43 | &Url::parse( 44 | "data:text/html,Work expands so as to fill the time available for its completion", 45 | ) 46 | .unwrap(), 47 | ); 48 | 49 | assert_eq!(media_type, "text/html"); 50 | assert_eq!(charset, "US-ASCII"); 51 | assert_eq!( 52 | String::from_utf8_lossy(&data), 53 | "Work expands so as to fill the time available for its completion" 54 | ); 55 | } 56 | 57 | #[test] 58 | fn parse_text_css_url_encoded() { 59 | let (media_type, charset, data) = 60 | url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap()); 61 | 62 | assert_eq!(media_type, "text/css"); 63 | assert_eq!(charset, "US-ASCII"); 64 | assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); 65 | } 66 | 67 | #[test] 68 | fn parse_no_media_type_base64() { 69 | let (media_type, charset, data) = 70 | url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap()); 71 | 72 | assert_eq!(media_type, "text/plain"); 73 | assert_eq!(charset, "US-ASCII"); 74 | assert_eq!(String::from_utf8_lossy(&data), "test"); 75 | } 76 | 77 | #[test] 78 | fn parse_no_media_type_no_encoding() { 79 | let (media_type, charset, data) = 80 | url::parse_data_url(&Url::parse("data:;,test%20test").unwrap()); 81 | 82 | assert_eq!(media_type, "text/plain"); 83 | assert_eq!(charset, "US-ASCII"); 84 | assert_eq!(String::from_utf8_lossy(&data), "test test"); 85 | } 86 | } 87 | 88 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 89 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 90 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 91 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 92 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 93 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 94 | 95 | #[cfg(test)] 96 | mod failing { 97 | use reqwest::Url; 98 | 99 | use monolith::url; 100 | 101 | #[test] 102 | fn empty_data_url() { 103 | let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:,").unwrap()); 104 | 105 | assert_eq!(media_type, "text/plain"); 106 | assert_eq!(charset, "US-ASCII"); 107 | assert_eq!(String::from_utf8_lossy(&data), ""); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /tests/url/resolve_url.rs: -------------------------------------------------------------------------------- 1 | // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ 2 | // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ 3 | // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ 4 | // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ 5 | // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ 6 | // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 7 | 8 | #[cfg(test)] 9 | mod passing { 10 | use reqwest::Url; 11 | 12 | use monolith::url; 13 | 14 | #[test] 15 | fn basic_httsp_relative() { 16 | assert_eq!( 17 | url::resolve_url( 18 | &Url::parse("https://www.kernel.org").unwrap(), 19 | "category/signatures.html" 20 | ) 21 | .as_str(), 22 | Url::parse("https://www.kernel.org/category/signatures.html") 23 | .unwrap() 24 | .as_str() 25 | ); 26 | } 27 | 28 | #[test] 29 | fn basic_httsp_absolute() { 30 | assert_eq!( 31 | url::resolve_url( 32 | &Url::parse("https://www.kernel.org").unwrap(), 33 | "/category/signatures.html" 34 | ) 35 | .as_str(), 36 | Url::parse("https://www.kernel.org/category/signatures.html") 37 | .unwrap() 38 | .as_str() 39 | ); 40 | } 41 | 42 | #[test] 43 | fn from_https_to_level_up_relative() { 44 | assert_eq!( 45 | url::resolve_url( 46 | &Url::parse("https://www.kernel.org").unwrap(), 47 | "../category/signatures.html" 48 | ) 49 | .as_str(), 50 | Url::parse("https://www.kernel.org/category/signatures.html") 51 | .unwrap() 52 | .as_str() 53 | ); 54 | } 55 | 56 | #[test] 57 | fn from_https_url_to_url_with_no_protocol() { 58 | assert_eq!( 59 | url::resolve_url( 60 | &Url::parse("https://www.kernel.org").unwrap(), 61 | "//www.kernel.org/theme/images/logos/tux.png", 62 | ) 63 | .as_str(), 64 | "https://www.kernel.org/theme/images/logos/tux.png" 65 | ); 66 | } 67 | 68 | #[test] 69 | fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() { 70 | assert_eq!( 71 | url::resolve_url( 72 | &Url::parse("https://www.kernel.org").unwrap(), 73 | "//another-host.org/theme/images/logos/tux.png", 74 | ) 75 | .as_str(), 76 | "https://another-host.org/theme/images/logos/tux.png" 77 | ); 78 | } 79 | 80 | #[test] 81 | fn from_https_url_to_absolute_path() { 82 | assert_eq!( 83 | url::resolve_url( 84 | &Url::parse("https://www.kernel.org/category/signatures.html").unwrap(), 85 | "/theme/images/logos/tux.png", 86 | ) 87 | .as_str(), 88 | "https://www.kernel.org/theme/images/logos/tux.png" 89 | ); 90 | } 91 | 92 | #[test] 93 | fn from_https_to_just_filename() { 94 | assert_eq!( 95 | url::resolve_url( 96 | &Url::parse("https://www.w3schools.com/html/html_iframe.asp").unwrap(), 97 | "default.asp", 98 | ) 99 | .as_str(), 100 | "https://www.w3schools.com/html/default.asp" 101 | ); 102 | } 103 | 104 | #[test] 105 | fn from_data_url_to_https() { 106 | assert_eq!( 107 | url::resolve_url( 108 | &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") 109 | .unwrap(), 110 | "https://www.kernel.org/category/signatures.html", 111 | ) 112 | .as_str(), 113 | "https://www.kernel.org/category/signatures.html" 114 | ); 115 | } 116 | 117 | #[test] 118 | fn from_data_url_to_data_url() { 119 | assert_eq!( 120 | url::resolve_url( 121 | &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") 122 | .unwrap(), 123 | "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", 124 | ) 125 | .as_str(), 126 | "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K" 127 | ); 128 | } 129 | 130 | #[test] 131 | fn from_file_url_to_relative_path() { 132 | assert_eq!( 133 | url::resolve_url( 134 | &Url::parse("file:///home/user/Websites/my-website/index.html").unwrap(), 135 | "assets/images/logo.png", 136 | ) 137 | .as_str(), 138 | "file:///home/user/Websites/my-website/assets/images/logo.png" 139 | ); 140 | } 141 | 142 | #[test] 143 | fn from_file_url_to_relative_path_with_backslashes() { 144 | assert_eq!( 145 | url::resolve_url( 146 | &Url::parse("file:\\\\\\home\\user\\Websites\\my-website\\index.html").unwrap(), 147 | "assets\\images\\logo.png", 148 | ) 149 | .as_str(), 150 | "file:///home/user/Websites/my-website/assets/images/logo.png" 151 | ); 152 | } 153 | 154 | #[test] 155 | fn from_data_url_to_file_url() { 156 | assert_eq!( 157 | url::resolve_url( 158 | &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") 159 | .unwrap(), 160 | "file:///etc/passwd", 161 | ) 162 | .as_str(), 163 | "file:///etc/passwd" 164 | ); 165 | } 166 | 167 | #[test] 168 | fn preserve_fragment() { 169 | assert_eq!( 170 | url::resolve_url( 171 | &Url::parse("http://doesnt-matter.local/").unwrap(), 172 | "css/fonts/fontmarvelous.svg#fontmarvelous", 173 | ) 174 | .as_str(), 175 | "http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous" 176 | ); 177 | } 178 | 179 | #[test] 180 | fn resolve_from_file_url_to_file_url() { 181 | if cfg!(windows) { 182 | assert_eq!( 183 | url::resolve_url( 184 | &Url::parse("file:///c:/index.html").unwrap(), 185 | "file:///c:/image.png" 186 | ) 187 | .as_str(), 188 | "file:///c:/image.png" 189 | ); 190 | } else { 191 | assert_eq!( 192 | url::resolve_url( 193 | &Url::parse("file:///tmp/index.html").unwrap(), 194 | "file:///tmp/image.png" 195 | ) 196 | .as_str(), 197 | "file:///tmp/image.png" 198 | ); 199 | } 200 | } 201 | } 202 | 203 | // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ 204 | // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ 205 | // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ 206 | // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ 207 | // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ 208 | // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ 209 | 210 | #[cfg(test)] 211 | mod failing { 212 | use reqwest::Url; 213 | 214 | use monolith::url; 215 | 216 | #[test] 217 | fn from_data_url_to_url_with_no_protocol() { 218 | assert_eq!( 219 | url::resolve_url( 220 | &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") 221 | .unwrap(), 222 | "//www.w3schools.com/html/html_iframe.asp", 223 | ) 224 | .as_str(), 225 | "data:," 226 | ); 227 | } 228 | } 229 | --------------------------------------------------------------------------------