├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── adblock ├── __init__.py ├── adblock.pyi └── py.typed ├── pyproject.toml ├── src └── lib.rs ├── tests ├── test_engine.py ├── test_exceptions.py ├── test_imports.py ├── test_metadata.py ├── test_redirect.py ├── test_repr.py └── test_typestubs.py └── web ├── create_site.bash └── static └── index.html /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | push: 7 | pull_request: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v1 15 | 16 | - name: Install latest nightly 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | toolchain: nightly 20 | override: true 21 | components: rustfmt, clippy 22 | 23 | - name: Lint with rustfmt 24 | uses: actions-rs/cargo@v1 25 | with: 26 | command: fmt 27 | args: -- --check 28 | 29 | # Disabled because of https://github.com/rust-lang/rust-clippy/issues/8971 30 | # - name: Lint with clippy 31 | # uses: actions-rs/cargo@v1 32 | # with: 33 | # command: clippy 34 | # args: --all-targets --all-features -- -D clippy::all 35 | 36 | - name: Lint with Black 37 | run: pip install black && black --check . 38 | 39 | - name: Lint with mypy 40 | run: pip install mypy && mypy --non-interactive --install-types --ignore-missing-imports . 41 | 42 | macos: 43 | runs-on: macos-latest 44 | needs: lint 45 | steps: 46 | - uses: actions/checkout@v2 47 | 48 | - uses: actions/setup-python@v2 49 | with: 50 | python-version: 3.7 51 | 52 | - name: Install Rust toolchain 53 | uses: actions-rs/toolchain@v1 54 | with: 55 | toolchain: stable 56 | target: aarch64-apple-darwin 57 | profile: minimal 58 | default: true 59 | 60 | - name: Install maturin 61 | run: pip install maturin 62 | 63 | - name: Build wheels - x86_64 64 | run: | 65 | maturin build -i python --target x86_64-apple-darwin --release --out dist 66 | pip install adblock --no-index --find-links dist --force-reinstall 67 | 68 | - name: Build wheels - universal2 69 | env: 70 | DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer 71 | MACOSX_DEPLOYMENT_TARGET: "10.9" 72 | PYO3_CROSS_LIB_DIR: /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib 73 | run: | 74 | # Build wheels 75 | maturin build -i python --release --universal2 --out dist 76 | pip install adblock --no-index --find-links dist --force-reinstall 77 | 78 | - name: Run PyTest 79 | run: | 80 | pip install pytest toml 81 | pytest -vv --color=yes 82 | 83 | - name: Check wheels with Twine 84 | run: pip install twine && twine check dist/* 85 | shell: bash 86 | 87 | - name: Upload wheels 88 | uses: actions/upload-artifact@v2 89 | with: 90 | name: wheels 91 | path: dist 92 | 93 | windows: 94 | runs-on: windows-latest 95 | needs: lint 96 | strategy: 97 | matrix: 98 | platform: 99 | [ 100 | { python-architecture: "x64", target: "x86_64-pc-windows-msvc" }, 101 | { python-architecture: "x86", target: "i686-pc-windows-msvc" }, 102 | ] 103 | steps: 104 | - uses: actions/checkout@v2 105 | 106 | - uses: actions/setup-python@v2 107 | with: 108 | python-version: 3.7 109 | architecture: ${{ matrix.platform.python-architecture }} 110 | 111 | - name: Install Rust toolchain 112 | uses: actions-rs/toolchain@v1 113 | with: 114 | toolchain: stable 115 | target: ${{ matrix.platform.target }} 116 | profile: minimal 117 | default: true 118 | 119 | - name: Install maturin 120 | run: pip install maturin 121 | 122 | - name: Build wheels 123 | run: | 124 | maturin build -i python --release --out dist --target ${{ matrix.platform.target }} 125 | pip install adblock --no-index --find-links dist --force-reinstall 126 | 127 | - name: Run PyTest 128 | run: | 129 | pip install pytest toml 130 | pytest -vv --color=yes 131 | 132 | - name: Check wheels with Twine 133 | run: pip install twine && twine check dist/* 134 | shell: bash 135 | 136 | - name: Upload wheels 137 | uses: actions/upload-artifact@v2 138 | with: 139 | name: wheels 140 | path: dist 141 | 142 | linux: 143 | runs-on: ubuntu-latest 144 | needs: lint 145 | strategy: 146 | matrix: 147 | platform: 148 | [ 149 | { 150 | toolchain: "1.53", 151 | manylinux: "2014", 152 | target: "x86_64-unknown-linux-gnu", 153 | arch: "x86_64", 154 | python-version: "3.7" 155 | }, 156 | { 157 | toolchain: "nightly", 158 | manylinux: "2014", 159 | target: "x86_64-unknown-linux-gnu", 160 | arch: "x86_64", 161 | python-version: "3.7" 162 | }, 163 | { 164 | toolchain: "stable", 165 | manylinux: "2014", 166 | target: "x86_64-unknown-linux-gnu", 167 | arch: "x86_64", 168 | python-version: "3.7" 169 | }, 170 | { 171 | toolchain: "stable", 172 | manylinux: "2014", 173 | target: "x86_64-unknown-linux-gnu", 174 | arch: "x86_64", 175 | python-version: "3.11" 176 | } 177 | ] 178 | steps: 179 | - uses: actions/checkout@v2 180 | 181 | - uses: actions/setup-python@v2 182 | with: 183 | python-version: ${{ matrix.platform.python-version }} 184 | 185 | - name: Build Wheels 186 | run: | 187 | echo 'curl -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${{ matrix.platform.toolchain }} 188 | source ~/.cargo/env 189 | export PATH=/opt/python/cp38-cp38/bin:$PATH 190 | pip install maturin 191 | maturin build -i python --release --out dist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} 192 | ' > build-wheel.sh 193 | chmod +x build-wheel.sh 194 | docker run --rm -v "$PWD":/io -w /io quay.io/pypa/manylinux${{ matrix.platform.manylinux }}_${{ matrix.platform.arch }} bash build-wheel.sh 195 | 196 | - name: Run PyTest 197 | run: | 198 | pip install adblock --no-index --find-links dist --force-reinstall 199 | pip install pytest toml 200 | pytest -vv --color=yes 201 | 202 | - name: Auditwheel Symbols 203 | run: | 204 | pip install auditwheel-symbols 205 | auditwheel-symbols dist/*.whl 206 | 207 | - name: Check wheels with Twine 208 | run: pip install twine && twine check dist/* 209 | shell: bash 210 | 211 | - name: Upload wheels 212 | if: matrix.platform.toolchain == 'stable' 213 | uses: actions/upload-artifact@v2 214 | with: 215 | name: wheels 216 | path: dist 217 | 218 | linux-cross: 219 | runs-on: ubuntu-latest 220 | needs: lint 221 | strategy: 222 | matrix: 223 | platform: 224 | [ 225 | { 226 | manylinux: "2014", 227 | target: "aarch64-unknown-linux-gnu", 228 | arch: "aarch64", 229 | }, 230 | { 231 | manylinux: "2014", 232 | target: "armv7-unknown-linux-gnueabihf", 233 | arch: "armv7", 234 | }, 235 | ] 236 | steps: 237 | - uses: actions/checkout@v2 238 | 239 | - uses: actions/setup-python@v2 240 | with: 241 | python-version: 3.7 242 | 243 | - name: Build Wheels 244 | run: | 245 | echo 'curl -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable 246 | source ~/.cargo/env 247 | rustup target add ${{ matrix.platform.target }} 248 | pip install maturin 249 | maturin build -i python --release --out dist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} 250 | ' > build-wheel.sh 251 | chmod +x build-wheel.sh 252 | docker run --rm -v "$PWD":/io -w /io messense/manylinux2014-cross:${{ matrix.platform.arch }} bash build-wheel.sh 253 | 254 | - uses: uraimo/run-on-arch-action@v2.2.0 255 | name: Install built wheel 256 | with: 257 | arch: ${{ matrix.platform.arch }} 258 | distro: ubuntu22.04 259 | # Mount the dist directory as /artifacts in the container 260 | dockerRunArgs: | 261 | --volume "${PWD}/dist:/artifacts" 262 | install: | 263 | apt-get update 264 | apt-get install -y --no-install-recommends python3 python3-pip 265 | pip3 install -U pip 266 | run: | 267 | ls -lrth /artifacts 268 | pip3 install adblock --no-index --find-links /artifacts --force-reinstall 269 | cd ~ && python3 -c "import adblock" 270 | 271 | - name: Auditwheel Symbols 272 | run: | 273 | pip install auditwheel-symbols 274 | auditwheel-symbols dist/*.whl 275 | 276 | - name: Check wheels with Twine 277 | run: pip install twine && twine check dist/* 278 | shell: bash 279 | 280 | - name: Upload wheels 281 | uses: actions/upload-artifact@v2 282 | with: 283 | name: wheels 284 | path: dist 285 | 286 | python-publish: 287 | runs-on: ubuntu-latest 288 | needs: [ macos, windows, linux, linux-cross ] 289 | steps: 290 | - uses: actions/download-artifact@v2 291 | with: 292 | name: wheels 293 | 294 | - uses: actions/setup-python@v2 295 | with: 296 | python-version: 3.9 297 | 298 | - name: Wheel filename sanity checks 299 | run: | 300 | ls -lah 301 | num_abi3_whl=$(find | grep "\./adblock.*-abi3.*\.whl" | wc -l) 302 | num_whl=$(find | grep "\./adblock.*\.whl" | wc -l) 303 | test $num_abi3_whl -eq $num_whl 304 | test $num_whl -ge 1 305 | 306 | - name: PyPi publish 307 | if: github.event_name == 'release' && github.event.action == 'created' 308 | env: 309 | TWINE_PASSWORD: ${{ secrets.PYPI }} 310 | run: | 311 | pip install --upgrade wheel pip setuptools twine 312 | twine upload --non-interactive --skip-existing --username __token__ ./* 313 | 314 | - name: GitHub release 315 | uses: softprops/action-gh-release@v1 316 | if: startsWith(github.ref, 'refs/tags/') 317 | with: 318 | files: ./* 319 | env: 320 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 321 | 322 | docs-publish: 323 | runs-on: ubuntu-latest 324 | if: github.ref == 'refs/heads/master' && github.event.action == 'push' 325 | steps: 326 | - uses: actions/checkout@v2 327 | with: 328 | submodules: true 329 | fetch-depth: 0 330 | 331 | - name: Install latest nightly 332 | uses: actions-rs/toolchain@v1 333 | with: 334 | toolchain: nightly 335 | override: true 336 | 337 | - name: Build Github Pages 338 | run: bash web/create_site.bash 339 | 340 | - name: Deploy Github Pages 341 | uses: peaceiris/actions-gh-pages@v3 342 | with: 343 | github_token: ${{ secrets.GITHUB_TOKEN }} 344 | publish_dir: ./target/github-pages 345 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # -- Rust -- 2 | /target 3 | 4 | # -- Editors -- 5 | # Created by https://www.gitignore.io/api/vim,emacs,visualstudiocode 6 | # Edit at https://www.gitignore.io/?templates=vim,emacs,visualstudiocode 7 | 8 | ### Emacs ### 9 | # -*- mode: gitignore; -*- 10 | *~ 11 | \#*\# 12 | /.emacs.desktop 13 | /.emacs.desktop.lock 14 | *.elc 15 | auto-save-list 16 | tramp 17 | .\#* 18 | 19 | # Org-mode 20 | .org-id-locations 21 | *_archive 22 | 23 | # flymake-mode 24 | *_flymake.* 25 | 26 | # eshell files 27 | /eshell/history 28 | /eshell/lastdir 29 | 30 | # elpa packages 31 | /elpa/ 32 | 33 | # reftex files 34 | *.rel 35 | 36 | # AUCTeX auto folder 37 | /auto/ 38 | 39 | # cask packages 40 | .cask/ 41 | dist/ 42 | 43 | # Flycheck 44 | flycheck_*.el 45 | 46 | # server auth directory 47 | /server/ 48 | 49 | # projectiles files 50 | .projectile 51 | 52 | # directory configuration 53 | .dir-locals.el 54 | 55 | # network security 56 | /network-security.data 57 | 58 | 59 | ### Vim ### 60 | # Swap 61 | [._]*.s[a-v][a-z] 62 | [._]*.sw[a-p] 63 | [._]s[a-rt-v][a-z] 64 | [._]ss[a-gi-z] 65 | [._]sw[a-p] 66 | 67 | # Session 68 | Session.vim 69 | Sessionx.vim 70 | 71 | # Temporary 72 | .netrwhist 73 | 74 | # Auto-generated tag files 75 | tags 76 | 77 | # Persistent undo 78 | [._]*.un~ 79 | 80 | # Coc configuration directory 81 | .vim 82 | 83 | ### VisualStudioCode ### 84 | .vscode 85 | 86 | ### VisualStudioCode Patch ### 87 | # Ignore all local history of files 88 | .history 89 | 90 | # End of https://www.gitignore.io/api/vim,emacs,visualstudiocode 91 | 92 | # -- Python -- 93 | 94 | # Byte-compiled / optimized / DLL files 95 | __pycache__/ 96 | *.py[cod] 97 | *$py.class 98 | 99 | # Distribution / packaging 100 | .Python 101 | build/ 102 | develop-eggs/ 103 | dist/ 104 | downloads/ 105 | eggs/ 106 | .eggs/ 107 | lib/ 108 | lib64/ 109 | parts/ 110 | sdist/ 111 | var/ 112 | wheels/ 113 | pip-wheel-metadata/ 114 | share/python-wheels/ 115 | *.egg-info/ 116 | .installed.cfg 117 | *.egg 118 | MANIFEST 119 | *.so 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 122 | __pypackages__/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # pycharm project settings 132 | .idea 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # poetry 140 | poetry.lock 141 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | All notable changes to this project will be documented in this file. 4 | This project adheres to [Semantic Versioning](http://semver.org/) and [Keep a Changelog](http://keepachangelog.com/). 5 | 6 | ## Unreleased 7 | --- 8 | 9 | ## 0.6.0 - (2022-07-17) 10 | --- 11 | ### Added 12 | * Added `aliases` optional argument to `Engine.add_resource` 13 | 14 | ### Changes 15 | * Update PyO3 dependency to `0.16`. 16 | * Update upstream dependency to `0.5.6`. 17 | 18 | ### Breaks 19 | * Minimum Rust version is now `1.53`. 20 | * Minimum Python version is now `3.7`. 21 | 22 | ## 0.5.2 - (2022-03-01) 23 | --- 24 | * Include complete redirect rule feature ([#59](https://github.com/ArniDagur/python-adblock/pull/59)). Thanks @x0day! 25 | 26 | ## 0.5.1 - (2021-06-26) 27 | --- 28 | ### Fixes 29 | * Fix test suite for Python `3.10`. 30 | 31 | ## 0.5.0 - (2021-06-26) 32 | --- 33 | ### Breaks 34 | * Library now throws the custom `adblock.AdblockException` exception, instead of `ValueError`. 35 | 36 | ## 0.4.4 - (2021-04-13) 37 | --- 38 | ### Changes 39 | * PyO3 is now configured to use [`abi3`](https://pyo3.rs/v0.13.2/building_and_distribution.html#py_limited_apiabi3). 40 | 41 | ## 0.4.3 - (2021-03-20) 42 | --- 43 | ### Changes 44 | * Update `adblock` dependency to `0.3.10` 45 | 46 | ## 0.4.2 - (2021-02-01) 47 | --- 48 | ### Fixes 49 | * Remove relative import which caused problems in [#17](https://github.com/ArniDagur/python-adblock/issues/17). 50 | 51 | 52 | ## 0.4.1 - (2021-01-27) 53 | --- 54 | 55 | ### New 56 | * Windows 32-bit prebuilt wheels. 57 | 58 | ### Changes 59 | * Updated PyO3 to version `0.13`. 60 | * Changed `__repr__` methods of classes to be more idiomatic. 61 | 62 | ### Breaks 63 | * Dropped Python `3.5` support. 64 | 65 | 66 | ## 0.4.0 - (2020-12-16) 67 | --- 68 | 69 | ### New 70 | * Maintain a `CHANGELOG.md` file. 71 | * Include `generichide` field in `UrlSpecificResources`. 72 | 73 | ### Fixes 74 | * Include `Cargo.lock` in source control, fixing incorrect dependency resolution [#15](https://github.com/ArniDagur/python-adblock/issues/15). 75 | 76 | ### Breaks 77 | * Remove `explicit_cancel` field from `BlockerResult`, as it has been removed upstream. 78 | 79 | 80 | ## 0.3.2 - (2020-09-22) 81 | --- 82 | 83 | ### New 84 | * Build Python 3.9 wheels. 85 | 86 | ### Changes 87 | * Updated PyO3 to version `0.12`. 88 | 89 | ### Fixes 90 | * Don't use star imports in `__init__.py` to give linters and type checkers more information. 91 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adblock" 7 | version = "0.5.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "27785ef5b89bc88fd2def3e1ecfefdf5ec1583795b916667afd15b13058f8315" 10 | dependencies = [ 11 | "addr", 12 | "base64", 13 | "bitflags", 14 | "flate2", 15 | "idna", 16 | "itertools", 17 | "once_cell", 18 | "percent-encoding", 19 | "regex", 20 | "rmp-serde 0.13.7", 21 | "rmp-serde 0.15.5", 22 | "seahash", 23 | "serde", 24 | "twoway", 25 | "url", 26 | ] 27 | 28 | [[package]] 29 | name = "adblock" 30 | version = "0.6.0" 31 | dependencies = [ 32 | "adblock 0.5.6", 33 | "pyo3", 34 | ] 35 | 36 | [[package]] 37 | name = "addr" 38 | version = "0.14.0" 39 | source = "registry+https://github.com/rust-lang/crates.io-index" 40 | checksum = "c54ccac949a2afafdfc889e15c753bbc6ee8783e026bbe3d057b00b13907db70" 41 | dependencies = [ 42 | "psl", 43 | "psl-types", 44 | ] 45 | 46 | [[package]] 47 | name = "adler" 48 | version = "1.0.2" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 51 | 52 | [[package]] 53 | name = "aho-corasick" 54 | version = "0.7.18" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 57 | dependencies = [ 58 | "memchr", 59 | ] 60 | 61 | [[package]] 62 | name = "autocfg" 63 | version = "1.1.0" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 66 | 67 | [[package]] 68 | name = "base64" 69 | version = "0.13.0" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" 72 | 73 | [[package]] 74 | name = "bitflags" 75 | version = "1.3.2" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 78 | 79 | [[package]] 80 | name = "byteorder" 81 | version = "1.4.3" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 84 | 85 | [[package]] 86 | name = "cfg-if" 87 | version = "1.0.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 90 | 91 | [[package]] 92 | name = "crc32fast" 93 | version = "1.3.2" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 96 | dependencies = [ 97 | "cfg-if", 98 | ] 99 | 100 | [[package]] 101 | name = "either" 102 | version = "1.7.0" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" 105 | 106 | [[package]] 107 | name = "flate2" 108 | version = "1.0.24" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" 111 | dependencies = [ 112 | "crc32fast", 113 | "miniz_oxide", 114 | ] 115 | 116 | [[package]] 117 | name = "form_urlencoded" 118 | version = "1.0.1" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" 121 | dependencies = [ 122 | "matches", 123 | "percent-encoding", 124 | ] 125 | 126 | [[package]] 127 | name = "idna" 128 | version = "0.2.3" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" 131 | dependencies = [ 132 | "matches", 133 | "unicode-bidi", 134 | "unicode-normalization", 135 | ] 136 | 137 | [[package]] 138 | name = "indoc" 139 | version = "1.0.6" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" 142 | 143 | [[package]] 144 | name = "itertools" 145 | version = "0.10.3" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" 148 | dependencies = [ 149 | "either", 150 | ] 151 | 152 | [[package]] 153 | name = "libc" 154 | version = "0.2.126" 155 | source = "registry+https://github.com/rust-lang/crates.io-index" 156 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 157 | 158 | [[package]] 159 | name = "lock_api" 160 | version = "0.4.7" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" 163 | dependencies = [ 164 | "autocfg", 165 | "scopeguard", 166 | ] 167 | 168 | [[package]] 169 | name = "matches" 170 | version = "0.1.9" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" 173 | 174 | [[package]] 175 | name = "memchr" 176 | version = "2.5.0" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 179 | 180 | [[package]] 181 | name = "miniz_oxide" 182 | version = "0.5.3" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" 185 | dependencies = [ 186 | "adler", 187 | ] 188 | 189 | [[package]] 190 | name = "num-traits" 191 | version = "0.2.15" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 194 | dependencies = [ 195 | "autocfg", 196 | ] 197 | 198 | [[package]] 199 | name = "once_cell" 200 | version = "1.13.0" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" 203 | 204 | [[package]] 205 | name = "parking_lot" 206 | version = "0.12.1" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 209 | dependencies = [ 210 | "lock_api", 211 | "parking_lot_core", 212 | ] 213 | 214 | [[package]] 215 | name = "parking_lot_core" 216 | version = "0.9.3" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" 219 | dependencies = [ 220 | "cfg-if", 221 | "libc", 222 | "redox_syscall", 223 | "smallvec", 224 | "windows-sys", 225 | ] 226 | 227 | [[package]] 228 | name = "paste" 229 | version = "1.0.7" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" 232 | 233 | [[package]] 234 | name = "percent-encoding" 235 | version = "2.1.0" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" 238 | 239 | [[package]] 240 | name = "proc-macro2" 241 | version = "1.0.40" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 244 | dependencies = [ 245 | "unicode-ident", 246 | ] 247 | 248 | [[package]] 249 | name = "psl" 250 | version = "2.0.90" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "7e9c7d362659b525758d09e2a3ae965e371b347605e5fb2bc5bec5c2a1ecf41b" 253 | dependencies = [ 254 | "psl-types", 255 | ] 256 | 257 | [[package]] 258 | name = "psl-types" 259 | version = "2.0.10" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "e8eda7c62d9ecaafdf8b62374c006de0adf61666ae96a96ba74a37134aa4e470" 262 | 263 | [[package]] 264 | name = "pyo3" 265 | version = "0.16.5" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc" 268 | dependencies = [ 269 | "cfg-if", 270 | "indoc", 271 | "libc", 272 | "parking_lot", 273 | "pyo3-build-config", 274 | "pyo3-ffi", 275 | "pyo3-macros", 276 | "unindent", 277 | ] 278 | 279 | [[package]] 280 | name = "pyo3-build-config" 281 | version = "0.16.5" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8" 284 | dependencies = [ 285 | "once_cell", 286 | "target-lexicon", 287 | ] 288 | 289 | [[package]] 290 | name = "pyo3-ffi" 291 | version = "0.16.5" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b" 294 | dependencies = [ 295 | "libc", 296 | "pyo3-build-config", 297 | ] 298 | 299 | [[package]] 300 | name = "pyo3-macros" 301 | version = "0.16.5" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7" 304 | dependencies = [ 305 | "proc-macro2", 306 | "pyo3-macros-backend", 307 | "quote", 308 | "syn", 309 | ] 310 | 311 | [[package]] 312 | name = "pyo3-macros-backend" 313 | version = "0.16.5" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf" 316 | dependencies = [ 317 | "proc-macro2", 318 | "quote", 319 | "syn", 320 | ] 321 | 322 | [[package]] 323 | name = "quote" 324 | version = "1.0.20" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 327 | dependencies = [ 328 | "proc-macro2", 329 | ] 330 | 331 | [[package]] 332 | name = "redox_syscall" 333 | version = "0.2.13" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 336 | dependencies = [ 337 | "bitflags", 338 | ] 339 | 340 | [[package]] 341 | name = "regex" 342 | version = "1.6.0" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 345 | dependencies = [ 346 | "aho-corasick", 347 | "memchr", 348 | "regex-syntax", 349 | ] 350 | 351 | [[package]] 352 | name = "regex-syntax" 353 | version = "0.6.27" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 356 | 357 | [[package]] 358 | name = "rmp" 359 | version = "0.8.11" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f" 362 | dependencies = [ 363 | "byteorder", 364 | "num-traits", 365 | "paste", 366 | ] 367 | 368 | [[package]] 369 | name = "rmp-serde" 370 | version = "0.13.7" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "011e1d58446e9fa3af7cdc1fb91295b10621d3ac4cb3a85cc86385ee9ca50cd3" 373 | dependencies = [ 374 | "byteorder", 375 | "rmp", 376 | "serde", 377 | ] 378 | 379 | [[package]] 380 | name = "rmp-serde" 381 | version = "0.15.5" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "723ecff9ad04f4ad92fe1c8ca6c20d2196d9286e9c60727c4cb5511629260e9d" 384 | dependencies = [ 385 | "byteorder", 386 | "rmp", 387 | "serde", 388 | ] 389 | 390 | [[package]] 391 | name = "scopeguard" 392 | version = "1.1.0" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 395 | 396 | [[package]] 397 | name = "seahash" 398 | version = "3.0.7" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "58f57ca1d128a43733fd71d583e837b1f22239a37ebea09cde11d8d9a9080f47" 401 | 402 | [[package]] 403 | name = "serde" 404 | version = "1.0.139" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6" 407 | dependencies = [ 408 | "serde_derive", 409 | ] 410 | 411 | [[package]] 412 | name = "serde_derive" 413 | version = "1.0.139" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb" 416 | dependencies = [ 417 | "proc-macro2", 418 | "quote", 419 | "syn", 420 | ] 421 | 422 | [[package]] 423 | name = "smallvec" 424 | version = "1.9.0" 425 | source = "registry+https://github.com/rust-lang/crates.io-index" 426 | checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" 427 | 428 | [[package]] 429 | name = "syn" 430 | version = "1.0.98" 431 | source = "registry+https://github.com/rust-lang/crates.io-index" 432 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 433 | dependencies = [ 434 | "proc-macro2", 435 | "quote", 436 | "unicode-ident", 437 | ] 438 | 439 | [[package]] 440 | name = "target-lexicon" 441 | version = "0.12.4" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1" 444 | 445 | [[package]] 446 | name = "tinyvec" 447 | version = "1.6.0" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 450 | dependencies = [ 451 | "tinyvec_macros", 452 | ] 453 | 454 | [[package]] 455 | name = "tinyvec_macros" 456 | version = "0.1.0" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" 459 | 460 | [[package]] 461 | name = "twoway" 462 | version = "0.2.2" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "c57ffb460d7c24cd6eda43694110189030a3d1dfe418416d9468fd1c1d290b47" 465 | dependencies = [ 466 | "memchr", 467 | "unchecked-index", 468 | ] 469 | 470 | [[package]] 471 | name = "unchecked-index" 472 | version = "0.2.2" 473 | source = "registry+https://github.com/rust-lang/crates.io-index" 474 | checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" 475 | 476 | [[package]] 477 | name = "unicode-bidi" 478 | version = "0.3.8" 479 | source = "registry+https://github.com/rust-lang/crates.io-index" 480 | checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" 481 | 482 | [[package]] 483 | name = "unicode-ident" 484 | version = "1.0.2" 485 | source = "registry+https://github.com/rust-lang/crates.io-index" 486 | checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" 487 | 488 | [[package]] 489 | name = "unicode-normalization" 490 | version = "0.1.21" 491 | source = "registry+https://github.com/rust-lang/crates.io-index" 492 | checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" 493 | dependencies = [ 494 | "tinyvec", 495 | ] 496 | 497 | [[package]] 498 | name = "unindent" 499 | version = "0.1.9" 500 | source = "registry+https://github.com/rust-lang/crates.io-index" 501 | checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" 502 | 503 | [[package]] 504 | name = "url" 505 | version = "2.2.2" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" 508 | dependencies = [ 509 | "form_urlencoded", 510 | "idna", 511 | "matches", 512 | "percent-encoding", 513 | ] 514 | 515 | [[package]] 516 | name = "windows-sys" 517 | version = "0.36.1" 518 | source = "registry+https://github.com/rust-lang/crates.io-index" 519 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 520 | dependencies = [ 521 | "windows_aarch64_msvc", 522 | "windows_i686_gnu", 523 | "windows_i686_msvc", 524 | "windows_x86_64_gnu", 525 | "windows_x86_64_msvc", 526 | ] 527 | 528 | [[package]] 529 | name = "windows_aarch64_msvc" 530 | version = "0.36.1" 531 | source = "registry+https://github.com/rust-lang/crates.io-index" 532 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 533 | 534 | [[package]] 535 | name = "windows_i686_gnu" 536 | version = "0.36.1" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 539 | 540 | [[package]] 541 | name = "windows_i686_msvc" 542 | version = "0.36.1" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 545 | 546 | [[package]] 547 | name = "windows_x86_64_gnu" 548 | version = "0.36.1" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 551 | 552 | [[package]] 553 | name = "windows_x86_64_msvc" 554 | version = "0.36.1" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 557 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | publish = false 3 | name = "adblock" 4 | version = "0.6.0" 5 | edition = "2018" 6 | authors = ["Árni Dagur "] 7 | license = "MIT OR Apache-2.0" 8 | readme = "README.md" 9 | homepage = "https://github.com/ArniDagur/python-adblock" 10 | repository = "https://github.com/ArniDagur/python-adblock" 11 | 12 | [profile.release] 13 | debug = true 14 | 15 | [dependencies] 16 | adblock = { version = "=0.5.6", default-features = false, features = ["full-regex-handling", "embedded-domain-resolver"] } 17 | pyo3 = { version = "0.16", features = ["abi3-py37", "extension-module"] } 18 | 19 | [lib] 20 | name = "adblock" 21 | crate-type = ["rlib", "cdylib"] 22 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Árni Dagur Guðmundsson 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Árni Dagur Guðmundsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-adblock 2 | 3 | Python wrapper for Brave's adblocking library, which is written in Rust. 4 | 5 | ### Building from source 6 | 7 | #### Build dependencies 8 | 9 | | Build Dependency | Versions | Arch Linux | Url | 10 | | ---------------- | -------- | ---------- | ------------------------------- | 11 | | Python | `>=3.7` | `python` | - | 12 | | Rust | `>=1.53` | `rust` | - | 13 | | Maturin | `>=0.10` | `maturin` | https://github.com/PyO3/maturin | 14 | 15 | #### PEP 517 16 | 17 | The `python-adblock` library is [PEP 517](https://www.python.org/dev/peps/pep-0517/) compatible, so you can build and install it from source, simply by running 18 | 19 | ``` 20 | pip install . 21 | ``` 22 | 23 | from the root of this directory. 24 | 25 | #### Wheels 26 | 27 | To create a wheel for this library, run the following command 28 | 29 | ``` 30 | maturin build --release --no-sdist --out dist/ 31 | ``` 32 | 33 | the result can be found in the `dist/` directory. 34 | 35 | ### Developing 36 | 37 | I use Poetry for development. To create and enter a virtual environment, do 38 | 39 | ``` 40 | poetry install 41 | poetry shell 42 | ``` 43 | 44 | then, to install the `adblock` module into the virtual environment, do 45 | 46 | ``` 47 | maturin develop 48 | ``` 49 | 50 | ### Documentation 51 | 52 | Rust documentation for the latest `master` branch can be found at https://arnidagur.github.io/python-adblock/docs/adblock/index.html. 53 | 54 | ### License 55 | 56 | This project is licensed under either of 57 | 58 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or 59 | http://www.apache.org/licenses/LICENSE-2.0) 60 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or 61 | http://opensource.org/licenses/MIT) 62 | 63 | at your option. 64 | -------------------------------------------------------------------------------- /adblock/__init__.py: -------------------------------------------------------------------------------- 1 | from adblock.adblock import ( 2 | __version__, 3 | Engine, 4 | FilterSet, 5 | BlockerResult, 6 | UrlSpecificResources, 7 | AdblockException, 8 | BlockerException, 9 | SerializationError, 10 | DeserializationError, 11 | OptimizedFilterExistence, 12 | BadFilterAddUnsupported, 13 | FilterExists, 14 | AddResourceException, 15 | InvalidUtf8ContentError, 16 | InvalidBase64ContentError, 17 | ) 18 | 19 | 20 | __all__ = ( 21 | "Engine", 22 | "FilterSet", 23 | "BlockerResult", 24 | "UrlSpecificResources", 25 | "AdblockException", 26 | "BlockerException", 27 | "SerializationError", 28 | "DeserializationError", 29 | "OptimizedFilterExistence", 30 | "BadFilterAddUnsupported", 31 | "FilterExists", 32 | "AddResourceException", 33 | "InvalidUtf8ContentError", 34 | "InvalidBase64ContentError", 35 | ) 36 | -------------------------------------------------------------------------------- /adblock/adblock.pyi: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict, List, Set 2 | 3 | __version__: str 4 | 5 | class AdblockException(Exception): 6 | pass 7 | 8 | class BlockerException(AdblockException): 9 | pass 10 | 11 | class SerializationError(BlockerException): 12 | pass 13 | 14 | class DeserializationError(BlockerException): 15 | pass 16 | 17 | class OptimizedFilterExistence(BlockerException): 18 | pass 19 | 20 | class BadFilterAddUnsupported(BlockerException): 21 | pass 22 | 23 | class FilterExists(BlockerException): 24 | pass 25 | 26 | class AddResourceException(AdblockException): 27 | pass 28 | 29 | class InvalidUtf8ContentError(AddResourceException): 30 | pass 31 | 32 | class InvalidBase64ContentError(AddResourceException): 33 | pass 34 | 35 | class BlockerResult: 36 | matched: bool 37 | explicit_cancel: bool 38 | important: bool 39 | redirect_type: Optional[str] 40 | redirect: Optional[str] 41 | exception: Optional[str] 42 | filter: Optional[str] 43 | error: Optional[str] 44 | def __repr__(self) -> str: 45 | pass 46 | 47 | class UrlSpecificResources: 48 | hide_selectors: Set[str] 49 | style_selectors: Dict[str, List[str]] 50 | exceptions: Set[str] 51 | injected_script: str 52 | def __repr__(self) -> str: 53 | pass 54 | 55 | class FilterSet: 56 | def __init__(self, debug: bool = False) -> None: 57 | pass 58 | def add_filter_list( 59 | self, 60 | filter_list: str, 61 | format: str = "standard", 62 | include_redirect_urls: bool = False, 63 | rule_types: str = "all", 64 | ) -> None: 65 | pass 66 | def add_filters( 67 | self, 68 | filters: List[str], 69 | format: str = "standard", 70 | include_redirect_urls: bool = False, 71 | rule_types: str = "all", 72 | ) -> None: 73 | pass 74 | 75 | class Engine: 76 | def __init__(self, filter_set: FilterSet, optimize: bool = True) -> None: 77 | pass 78 | def check_network_urls( 79 | self, url: str, source_url: str, request_type: str 80 | ) -> BlockerResult: 81 | pass 82 | def check_network_urls_with_hostnames( 83 | self, 84 | url: str, 85 | hostname: str, 86 | source_hostname: str, 87 | request_type: str, 88 | third_party_request: Optional[bool], 89 | ) -> BlockerResult: 90 | pass 91 | def check_network_urls_with_hostnames_subset( 92 | self, 93 | url: str, 94 | hostname: str, 95 | source_hostname: str, 96 | request_type: str, 97 | third_party_request: Optional[bool], 98 | previously_matched_rule: bool, 99 | force_check_exceptions: bool, 100 | ) -> BlockerResult: 101 | pass 102 | def serialize(self) -> bytes: 103 | pass 104 | def serialize_to_file(self, file: str) -> None: 105 | pass 106 | def deserialize(self, serialized: bytes) -> None: 107 | pass 108 | def deserialize_from_file(self, file: str) -> None: 109 | pass 110 | def filter_exists(self, filter: str) -> bool: 111 | pass 112 | def use_tags(self, tags: List[str]) -> None: 113 | pass 114 | def enable_tags(self, tags: List[str]) -> None: 115 | pass 116 | def disable_tags(self, tags: List[str]) -> None: 117 | pass 118 | def tag_exists(self, tag: str) -> bool: 119 | pass 120 | def add_resource( 121 | self, 122 | name: str, 123 | content_type: str, 124 | content: str, 125 | aliases: Optional[List[str]] = None, 126 | ) -> bool: 127 | pass 128 | def url_cosmetic_resources(self, url: str) -> UrlSpecificResources: 129 | pass 130 | def hidden_class_id_selectors( 131 | self, classes: List[str], ids: List[str], exceptions: Set[str] 132 | ) -> List[str]: 133 | pass 134 | -------------------------------------------------------------------------------- /adblock/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArniDagur/python-adblock/a340dfcb37b402b0427b2dd7ac3c64cfe7edb38b/adblock/py.typed -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "adblock" 3 | version = "0.6.0" 4 | description = "Brave's adblocking in Python" 5 | requires-python = ">=3.7" 6 | authors = [{ name = "Árni Dagur", email = "arni@dagur.eu" }] 7 | classifiers = [ 8 | "Programming Language :: Python", 9 | "Programming Language :: Rust", 10 | "License :: OSI Approved :: MIT License", 11 | "License :: OSI Approved :: Apache Software License", 12 | ] 13 | 14 | [tool.poetry] 15 | name = "adblock" 16 | version = "0.6.0" 17 | description = "Brave's adblocking in Python" 18 | authors = ["Árni Dagur "] 19 | 20 | [tool.poetry.dependencies] 21 | python = "^3.6" 22 | 23 | [tool.poetry.dev-dependencies] 24 | maturin = "*" 25 | pytest = "*" 26 | toml = "*" 27 | 28 | [build-system] 29 | requires = ["maturin>=0.12"] 30 | build-backend = "maturin" 31 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Python wrapper for Brave's adblocking library, which is written in Rust. 2 | #![deny( 3 | future_incompatible, 4 | nonstandard_style, 5 | rust_2018_idioms, 6 | missing_copy_implementations, 7 | trivial_casts, 8 | trivial_numeric_casts, 9 | unsafe_code, 10 | unused_qualifications, 11 | deprecated 12 | )] 13 | 14 | use adblock::blocker::BlockerResult as RustBlockerResult; 15 | use adblock::blocker::{BlockerError as RustBlockerError, Redirection}; 16 | use adblock::cosmetic_filter_cache::UrlSpecificResources as RustUrlSpecificResources; 17 | use adblock::engine::Engine as RustEngine; 18 | use adblock::lists::FilterSet as RustFilterSet; 19 | use adblock::lists::{FilterFormat, ParseOptions, RuleTypes}; 20 | use pyo3::create_exception; 21 | use pyo3::exceptions::PyException; 22 | use pyo3::prelude::*; 23 | use pyo3::types::PyBytes; 24 | use pyo3::PyErr; 25 | 26 | use adblock::resources::{ 27 | AddResourceError as RustAddResourceError, MimeType, Resource, ResourceType, 28 | }; 29 | use std::collections::HashMap; 30 | use std::collections::HashSet; 31 | use std::error::Error; 32 | use std::fmt::{self, Display}; 33 | use std::fs; 34 | use std::io::{Read, Write}; 35 | 36 | /// Brave's adblocking library in Python! 37 | #[pymodule] 38 | fn adblock(py: Python<'_>, m: &PyModule) -> PyResult<()> { 39 | m.add("__version__", env!("CARGO_PKG_VERSION"))?; 40 | m.add_class::()?; 41 | m.add_class::()?; 42 | m.add_class::()?; 43 | m.add_class::()?; 44 | m.add("AdblockException", py.get_type::())?; 45 | m.add("BlockerException", py.get_type::())?; 46 | m.add("SerializationError", py.get_type::())?; 47 | m.add( 48 | "DeserializationError", 49 | py.get_type::(), 50 | )?; 51 | m.add( 52 | "OptimizedFilterExistence", 53 | py.get_type::(), 54 | )?; 55 | m.add( 56 | "BadFilterAddUnsupported", 57 | py.get_type::(), 58 | )?; 59 | m.add("FilterExists", py.get_type::())?; 60 | m.add( 61 | "AddResourceException", 62 | py.get_type::(), 63 | )?; 64 | m.add( 65 | "InvalidBase64ContentError", 66 | py.get_type::(), 67 | )?; 68 | m.add( 69 | "InvalidUtf8ContentError", 70 | py.get_type::(), 71 | )?; 72 | Ok(()) 73 | } 74 | 75 | /// The result of an ad-blocking check. 76 | #[pyclass] 77 | pub struct BlockerResult { 78 | #[pyo3(get)] 79 | pub matched: bool, 80 | /// Important is used to signal that a rule with the `important` option 81 | /// matched. An `important` match means that exceptions should not apply 82 | /// and no further checking is neccesary--the request should be blocked 83 | /// (empty body or cancelled). 84 | /// 85 | /// Brave Browser keeps seperate instances of Blocker for default lists 86 | /// and regional ones, so `important` here is used to correct behaviour 87 | /// between them: checking should stop instead of moving to the next 88 | /// instance iff an `important` rule matched. 89 | #[pyo3(get)] 90 | pub important: bool, 91 | /// Iff the blocker matches a rule which has the `redirect` option, as per 92 | /// [uBlock Origin's redirect syntax][1], the `redirect` is not `None`. 93 | /// The `redirect` field contains the body of the redirect to be injected. 94 | /// 95 | /// [1]: https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#redirect 96 | #[pyo3(get)] 97 | pub redirect_type: Option, 98 | /// Exception is not `None` when the blocker matched on an exception rule. 99 | /// Effectively this means that there was a match, but the request should 100 | /// not be blocked. It is a non-empty string if the blocker was initialized 101 | /// from a list of rules with debugging enabled, otherwise the original 102 | /// string representation is discarded to reduce memory use. 103 | #[pyo3(get)] 104 | pub redirect: Option, 105 | /// Exception is not `None` when the blocker matched on an exception rule. 106 | /// Effectively this means that there was a match, but the request should 107 | /// not be blocked. It is a non-empty string if the blocker was initialized 108 | /// from a list of rules with debugging enabled, otherwise the original 109 | /// string representation is discarded to reduce memory use. 110 | #[pyo3(get)] 111 | pub exception: Option, 112 | /// Filter--similarly to exception--includes the string representation of 113 | /// the rule when there is a match and debugging is enabled. Otherwise, on 114 | /// a match, it is not `None`. 115 | #[pyo3(get)] 116 | pub filter: Option, 117 | /// The `error` field is only used to signal that there was an error in 118 | /// parsing the provided URLs when using the simpler 119 | /// `check_network_urls` method. 120 | #[pyo3(get)] 121 | pub error: Option, 122 | } 123 | 124 | impl From for BlockerResult { 125 | fn from(br: RustBlockerResult) -> Self { 126 | let (redirect, redirect_type) = if let Some(resource) = br.redirect { 127 | match resource { 128 | Redirection::Resource(resource) => (Some(resource), Some("resource".to_string())), 129 | Redirection::Url(url) => (Some(url), Some("url".to_string())), 130 | } 131 | } else { 132 | (None, None) 133 | }; 134 | 135 | Self { 136 | matched: br.matched, 137 | important: br.important, 138 | exception: br.exception, 139 | filter: br.filter, 140 | error: br.error, 141 | redirect_type, 142 | redirect, 143 | } 144 | } 145 | } 146 | 147 | #[pymethods] 148 | impl BlockerResult { 149 | fn __repr__(&self) -> PyResult { 150 | Ok(format!( 151 | "BlockerResult(matched={}, important={}, redirect={}, exception={}, filter={}, error={})", 152 | self.matched.diy_python_repr(), 153 | self.important.diy_python_repr(), 154 | self.redirect.diy_python_repr(), 155 | self.exception.diy_python_repr(), 156 | self.filter.diy_python_repr(), 157 | self.error.diy_python_repr(), 158 | )) 159 | } 160 | } 161 | 162 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] 163 | pub enum BlockerError { 164 | SerializationError, 165 | DeserializationError, 166 | OptimizedFilterExistence, 167 | BadFilterAddUnsupported, 168 | FilterExists, 169 | } 170 | 171 | impl Error for BlockerError { 172 | fn source(&self) -> Option<&(dyn Error + 'static)> { 173 | None 174 | } 175 | } 176 | 177 | impl Display for BlockerError { 178 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 179 | write!( 180 | f, 181 | "{}", 182 | match self { 183 | Self::SerializationError => "Serialization error", 184 | Self::DeserializationError => "Deserialization error", 185 | Self::OptimizedFilterExistence => "Optimized filter exists", 186 | Self::BadFilterAddUnsupported => "Bad filter add unsupported", 187 | Self::FilterExists => "Filter exists", 188 | } 189 | ) 190 | } 191 | } 192 | 193 | create_exception!(adblock, AdblockException, PyException); 194 | create_exception!(adblock, BlockerException, AdblockException); 195 | create_exception!(adblock, AddResourceException, AdblockException); 196 | create_exception!(adblock, InvalidBase64ContentError, AddResourceException); 197 | create_exception!(adblock, InvalidUtf8ContentError, AddResourceException); 198 | create_exception!(adblock, SerializationError, BlockerException); 199 | create_exception!(adblock, DeserializationError, BlockerException); 200 | create_exception!(adblock, OptimizedFilterExistence, BlockerException); 201 | create_exception!(adblock, BadFilterAddUnsupported, BlockerException); 202 | create_exception!(adblock, FilterExists, BlockerException); 203 | 204 | impl From for PyErr { 205 | fn from(err: BlockerError) -> Self { 206 | let msg = format!("{:?}", err); 207 | match err { 208 | BlockerError::SerializationError => Self::new::(msg), 209 | BlockerError::DeserializationError => Self::new::(msg), 210 | BlockerError::OptimizedFilterExistence => Self::new::(msg), 211 | BlockerError::BadFilterAddUnsupported => Self::new::(msg), 212 | BlockerError::FilterExists => Self::new::(msg), 213 | } 214 | } 215 | } 216 | 217 | impl From for BlockerError { 218 | fn from(err: RustBlockerError) -> Self { 219 | match err { 220 | RustBlockerError::SerializationError => Self::SerializationError, 221 | RustBlockerError::DeserializationError => Self::DeserializationError, 222 | RustBlockerError::OptimizedFilterExistence => Self::OptimizedFilterExistence, 223 | RustBlockerError::BadFilterAddUnsupported => Self::BadFilterAddUnsupported, 224 | RustBlockerError::FilterExists => Self::FilterExists, 225 | } 226 | } 227 | } 228 | 229 | fn filter_format_from_string(filter_format: &str) -> PyResult { 230 | match filter_format { 231 | "standard" => Ok(FilterFormat::Standard), 232 | "hosts" => Ok(FilterFormat::Hosts), 233 | _ => Err(PyErr::new::( 234 | "Invalid FilterFormat value", 235 | )), 236 | } 237 | } 238 | 239 | fn rule_types_from_string(rule_types: &str) -> PyResult { 240 | match rule_types { 241 | "all" => Ok(RuleTypes::All), 242 | "networkonly" => Ok(RuleTypes::NetworkOnly), 243 | "cosmeticonly" => Ok(RuleTypes::CosmeticOnly), 244 | _ => Err(PyErr::new::("Invalid RuleTypes value")), 245 | } 246 | } 247 | 248 | /// Manages a set of rules to be added to an Engine. 249 | /// 250 | /// To be able to efficiently handle special options like $badfilter, and to 251 | /// allow optimizations, all rules must be available when the Engine is first 252 | /// created. FilterSet allows assembling a compound list from multiple 253 | /// different sources before compiling the rules into an Engine. 254 | #[pyclass] 255 | #[pyo3(text_signature = "($self, debug)")] 256 | #[derive(Clone)] 257 | pub struct FilterSet { 258 | filter_set: RustFilterSet, 259 | debug: bool, 260 | } 261 | 262 | #[pymethods] 263 | impl FilterSet { 264 | /// Creates a new `FilterSet`. The `debug` argument specifies whether or 265 | /// not to save information about the original raw filter rules alongside 266 | /// the more compact internal representation. If enabled, this information 267 | /// will be passed to the corresponding Engine. 268 | #[new] 269 | #[args(debug = false)] 270 | pub fn new(debug: bool) -> Self { 271 | Self { 272 | filter_set: RustFilterSet::new(debug), 273 | debug, 274 | } 275 | } 276 | 277 | /// Adds the contents of an entire filter list to this FilterSet. Filters 278 | /// that cannot be parsed successfully are ignored. 279 | /// 280 | /// The format is a string containing either "standard" (ABP/uBO-style) 281 | /// or "hosts". 282 | #[pyo3(text_signature = "($self, filter_list, format, include_redirect_urls, rule_types)")] 283 | #[args( 284 | filter_list, 285 | format = "\"standard\"", 286 | include_redirect_urls = "false", 287 | rule_types = "\"all\"" 288 | )] 289 | pub fn add_filter_list( 290 | &mut self, 291 | filter_list: &str, 292 | format: &str, 293 | include_redirect_urls: bool, 294 | rule_types: &str, 295 | ) -> PyResult<()> { 296 | let filter_format = filter_format_from_string(format)?; 297 | let rule_types = rule_types_from_string(rule_types)?; 298 | self.filter_set.add_filter_list( 299 | filter_list, 300 | ParseOptions { 301 | format: filter_format, 302 | include_redirect_urls, 303 | rule_types, 304 | }, 305 | ); 306 | Ok(()) 307 | } 308 | 309 | /// Adds a collection of filter rules to this FilterSet. Filters that 310 | /// cannot be parsed successfully are ignored. 311 | /// 312 | /// The format is a string containing either "standard" (ABP/uBO-style) 313 | /// or "hosts". 314 | #[pyo3(text_signature = "($self, filters, format, include_redirect_urls, rule_types)")] 315 | #[args( 316 | filters, 317 | format = "\"standard\"", 318 | include_redirect_urls = "false", 319 | rule_types = "\"all\"" 320 | )] 321 | pub fn add_filters( 322 | &mut self, 323 | filters: Vec, 324 | format: &str, 325 | include_redirect_urls: bool, 326 | rule_types: &str, 327 | ) -> PyResult<()> { 328 | let filter_format = filter_format_from_string(format)?; 329 | let rule_types = rule_types_from_string(rule_types)?; 330 | self.filter_set.add_filters( 331 | &filters, 332 | ParseOptions { 333 | format: filter_format, 334 | include_redirect_urls, 335 | rule_types, 336 | }, 337 | ); 338 | Ok(()) 339 | } 340 | 341 | fn __repr__(&self) -> PyResult { 342 | Ok(format!("FilterSet(debug={})", self.debug.diy_python_repr())) 343 | } 344 | } 345 | 346 | /// Contains cosmetic filter information intended to be injected into a 347 | /// particular hostname. 348 | #[pyclass] 349 | pub struct UrlSpecificResources { 350 | /// A set of any CSS selector on the page that should be hidden, i.e. 351 | /// styled as `{ display: none !important; }`. 352 | #[pyo3(get)] 353 | pub hide_selectors: HashSet, 354 | /// A map of CSS selectors on the page to respective non-hide style rules, 355 | /// i.e. any required styles other than `display: none`. 356 | #[pyo3(get)] 357 | pub style_selectors: HashMap>, 358 | /// A set of any class or id CSS selectors that should not have generic 359 | /// rules applied. 360 | // In practice, these should be passed to `class_id_stylesheet` and not 361 | // used otherwise. 362 | #[pyo3(get)] 363 | pub exceptions: HashSet, 364 | /// Javascript code for any scriptlets that should be injected into the 365 | /// page. 366 | #[pyo3(get)] 367 | pub injected_script: String, 368 | /// `generichide` is set to `True` if there is a corresponding 369 | /// `$generichide` exception network filter. If so, the page should not 370 | /// query for additional generic rules using hidden_class_id_selectors. 371 | #[pyo3(get)] 372 | pub generichide: bool, 373 | } 374 | 375 | impl From for UrlSpecificResources { 376 | fn from(r: RustUrlSpecificResources) -> Self { 377 | Self { 378 | hide_selectors: r.hide_selectors, 379 | style_selectors: r.style_selectors, 380 | exceptions: r.exceptions, 381 | injected_script: r.injected_script, 382 | generichide: r.generichide, 383 | } 384 | } 385 | } 386 | 387 | #[pymethods] 388 | impl UrlSpecificResources { 389 | fn __repr__(&self) -> PyResult { 390 | Ok(format!( 391 | "UrlSpecificResources<{} hide selectors, {} style selectors, {} exceptions, injected_javascript={}, generichide={}>", 392 | self.hide_selectors.len(), 393 | self.style_selectors.len(), 394 | self.exceptions.len(), 395 | self.injected_script.diy_python_repr(), 396 | self.generichide.diy_python_repr(), 397 | )) 398 | } 399 | } 400 | 401 | /// The main object featured in this library. This object holds the adblocker's 402 | /// state, and can be queried to see if a given request should be blocked or 403 | /// not. 404 | /// 405 | /// # Request types 406 | /// A few of `Engine`'s methods have a field specifying a "resource type", 407 | /// valid examples are: 408 | /// * `beacon` 409 | /// * `csp_report` 410 | /// * `document` 411 | /// * `font` 412 | /// * `media` 413 | /// * `object` 414 | /// * `script` 415 | /// * `stylesheet` 416 | /// * and et cetera... 417 | /// See the [Mozilla Web Documentation][1] for more info. 418 | /// 419 | /// [1]: https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webRequest/ResourceType 420 | #[pyclass] 421 | #[pyo3(text_signature = "($self, filter_set, optimize)")] 422 | pub struct Engine { 423 | engine: RustEngine, 424 | optimize: bool, 425 | } 426 | 427 | #[pymethods] 428 | impl Engine { 429 | /// Create a new adblocking engine 430 | #[new] 431 | #[args(filter_set, optimize = true)] 432 | pub fn new(filter_set: FilterSet, optimize: bool) -> Self { 433 | let engine = RustEngine::from_filter_set(filter_set.filter_set, optimize); 434 | Self { engine, optimize } 435 | } 436 | 437 | /// Check if the given `url`—pointing to a resource of type `request_type`— 438 | /// is blocked, assuming the request is made from the given `source_url`. 439 | /// Returns an object of type `BlockerResult`. 440 | /// 441 | /// # Arguments 442 | /// * `url` - The URL of the request to check 443 | /// * `source_url` - The URL from where the request is made 444 | /// * `request_type` - The resource type that the request points to 445 | #[pyo3(text_signature = "($self, url, source_url, request_type)")] 446 | pub fn check_network_urls( 447 | &self, 448 | url: &str, 449 | source_url: &str, 450 | request_type: &str, 451 | ) -> BlockerResult { 452 | let blocker_result = self 453 | .engine 454 | .check_network_urls(url, source_url, request_type); 455 | blocker_result.into() 456 | } 457 | 458 | /// Check if a request should be blocked based on the given parameters. 459 | /// 460 | /// # Arguments 461 | /// * `url` - The URL of the request to check 462 | /// * `hostname` - The given `url`'s hostname 463 | /// * `source_hostname` - The hostname of the source URL. 464 | /// * `request_type` - The resource type that the request points to 465 | /// * `third_party_request` - Is the given request to a third-party? Here, 466 | /// `None` can be given and the engine will figure it out based on the 467 | /// `hostname` and `source_hostname`. 468 | #[pyo3( 469 | text_signature = "($self, url, hostname, source_hostname, requsest_type, third_party_request)" 470 | )] 471 | pub fn check_network_urls_with_hostnames( 472 | &self, 473 | url: &str, 474 | hostname: &str, 475 | source_hostname: &str, 476 | request_type: &str, 477 | third_party_request: Option, 478 | ) -> BlockerResult { 479 | let blocker_result = self.engine.check_network_urls_with_hostnames( 480 | url, 481 | hostname, 482 | source_hostname, 483 | request_type, 484 | third_party_request, 485 | ); 486 | blocker_result.into() 487 | } 488 | 489 | /// Check if a request should be blocked based on the given parameters. 490 | /// 491 | /// # Arguments 492 | /// * `url` - The URL of the request to check 493 | /// * `hostname` - The given `url`'s hostname 494 | /// * `source_hostname` - The hostname of the source URL. 495 | /// * `request_type` - The resource type that the request points to 496 | /// * `third_party_request` - Is the given request to a third-party? Here, 497 | /// `None` can be given and the engine will figure it out based on the 498 | /// `hostname` and `source_hostname`. 499 | /// * `previously_matched_rule` - Return a match as long as there are no 500 | /// exceptions 501 | /// * `force_check_exceptions` - Check exceptions even if no other rule matches 502 | #[pyo3( 503 | text_signature = "($self, url, hostname, source_hostname, request_type, \ 504 | third_party_request, previously_matched_rule, force_check_exceptions)" 505 | )] 506 | #[allow(clippy::too_many_arguments)] 507 | pub fn check_network_urls_with_hostnames_subset( 508 | &self, 509 | url: &str, 510 | hostname: &str, 511 | source_hostname: &str, 512 | request_type: &str, 513 | third_party_request: Option, 514 | previously_matched_rule: bool, 515 | force_check_exceptions: bool, 516 | ) -> BlockerResult { 517 | let blocker_result = self.engine.check_network_urls_with_hostnames_subset( 518 | url, 519 | hostname, 520 | source_hostname, 521 | request_type, 522 | third_party_request, 523 | previously_matched_rule, 524 | force_check_exceptions, 525 | ); 526 | blocker_result.into() 527 | } 528 | 529 | /// Sets this engine's resources to additionally include `resource`. 530 | /// 531 | /// # Arguments 532 | /// * `name`: Represents the primary name of the resource, often a filename 533 | /// * `content_type`: How to interpret the resource data within `content`. 534 | /// Use `"template"` if wanting to specify a template resource type. 535 | /// * `content`: The resource data, encoded using standard base64 configuration 536 | /// * `aliases`: List of aliases for the resource 537 | #[pyo3(text_signature = "($self, name, content_type, content, aliases)")] 538 | pub fn add_resource( 539 | &mut self, 540 | name: &str, 541 | content_type: &str, 542 | content: &str, 543 | aliases: Option>, 544 | ) -> PyResult<()> { 545 | let result = self.engine.add_resource(Resource { 546 | name: name.to_string(), 547 | aliases: aliases.unwrap_or_default(), 548 | kind: match content_type { 549 | "template" => ResourceType::Template, 550 | _ => ResourceType::Mime(MimeType::from(std::borrow::Cow::from( 551 | content_type.to_string(), 552 | ))), 553 | }, 554 | content: content.to_string(), 555 | }); 556 | 557 | match result { 558 | Ok(_) => Ok(()), 559 | Err(err) => match err { 560 | RustAddResourceError::InvalidBase64Content => Err( 561 | InvalidBase64ContentError::new_err("invalid base64 content".to_string()), 562 | ), 563 | RustAddResourceError::InvalidUtf8Content => Err(InvalidUtf8ContentError::new_err( 564 | "invalid utf content".to_string(), 565 | )), 566 | }, 567 | } 568 | } 569 | 570 | /// Serialize this blocking engine to bytes. They can then be deserialized 571 | /// using `deserialize()` to get the same engine again. 572 | #[pyo3(text_signature = "($self)")] 573 | pub fn serialize<'p>(&mut self, py: Python<'p>) -> PyResult<&'p PyBytes> { 574 | let bytes = self.serialize_inner()?; 575 | let py_bytes = PyBytes::new(py, &bytes); 576 | Ok(py_bytes) 577 | } 578 | 579 | fn serialize_inner(&mut self) -> PyResult> { 580 | let result = self.engine.serialize_raw(); 581 | match result { 582 | Ok(x) => Ok(x), 583 | Err(error) => { 584 | let my_blocker_error: BlockerError = error.into(); 585 | Err(my_blocker_error.into()) 586 | } 587 | } 588 | } 589 | 590 | /// Serialize this blocking engine to a file. The file can then be 591 | /// deserialized using `deserialize_from_file()` to get the same engine 592 | /// again. 593 | #[pyo3(text_signature = "($self, file)")] 594 | pub fn serialize_to_file(&mut self, file: &str) -> PyResult<()> { 595 | let data = self.serialize_inner()?; 596 | let mut fd = fs::OpenOptions::new() 597 | .create(true) 598 | .truncate(true) 599 | .write(true) 600 | .open(file)?; 601 | fd.write_all(&data)?; 602 | Ok(()) 603 | } 604 | 605 | /// Deserialize a blocking engine from bytes produced with `serialize()`. 606 | #[pyo3(text_signature = "($self, serialized)")] 607 | pub fn deserialize(&mut self, serialized: &[u8]) -> PyResult<()> { 608 | let result = self.engine.deserialize(serialized); 609 | match result { 610 | Ok(_) => Ok(()), 611 | Err(error) => { 612 | let my_blocker_error: BlockerError = error.into(); 613 | Err(my_blocker_error.into()) 614 | } 615 | } 616 | } 617 | 618 | /// Deserialize a blocking engine from file produced with 619 | /// `serialize_to_file()`. 620 | #[pyo3(text_signature = "($self, file)")] 621 | pub fn deserialize_from_file(&mut self, file: &str) -> PyResult<()> { 622 | let mut fd = fs::File::open(file)?; 623 | let mut data: Vec = Vec::new(); 624 | fd.read_to_end(&mut data)?; 625 | self.deserialize(&data) 626 | } 627 | 628 | /// Checks if the given filter exists in the blocking engine. 629 | #[pyo3(text_signature = "($self, filter)")] 630 | pub fn filter_exists(&self, filter: &str) -> bool { 631 | self.engine.filter_exists(filter) 632 | } 633 | 634 | /// Sets this engine's tags to be _only_ the ones provided in tags. 635 | /// 636 | /// Tags can be used to cheaply enable or disable network rules with a 637 | /// corresponding $tag option. 638 | #[pyo3(text_signature = "($self, tags)")] 639 | pub fn use_tags(&mut self, tags: Vec<&str>) { 640 | self.engine.use_tags(&tags); 641 | } 642 | 643 | /// Sets this engine's tags to additionally include the ones provided in 644 | /// tags. 645 | /// 646 | /// Tags can be used to cheaply enable or disable network rules with a 647 | /// corresponding $tag option. 648 | #[pyo3(text_signature = "($self, tags)")] 649 | pub fn enable_tags(&mut self, tags: Vec<&str>) { 650 | self.engine.enable_tags(&tags); 651 | } 652 | 653 | /// Sets this engine's tags to no longer include the ones provided in 654 | /// tags. 655 | /// 656 | /// Tags can be used to cheaply enable or disable network rules with a 657 | /// corresponding $tag option. 658 | #[pyo3(text_signature = "($self, tags)")] 659 | pub fn disable_tags(&mut self, tags: Vec<&str>) { 660 | self.engine.disable_tags(&tags); 661 | } 662 | 663 | /// Checks if a given tag exists in this engine. 664 | /// 665 | /// Tags can be used to cheaply enable or disable network rules with a 666 | /// corresponding $tag option. 667 | #[pyo3(text_signature = "($self, tag)")] 668 | pub fn tag_exists(&self, tag: &str) -> bool { 669 | self.engine.tag_exists(tag) 670 | } 671 | 672 | /// Returns a set of cosmetic filter resources required for a particular 673 | /// url. Once this has been called, all CSS ids and classes on a 674 | /// page should be passed to hidden_class_id_selectors to obtain any 675 | /// stylesheets consisting of generic rules. 676 | #[pyo3(text_signature = "($self, url)")] 677 | pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources { 678 | self.engine.url_cosmetic_resources(url).into() 679 | } 680 | 681 | /// If any of the provided CSS classes or ids could cause a certain generic 682 | /// CSS hide rule (i.e. `{ display: none !important; }`) to be required, this 683 | /// method will return a list of CSS selectors corresponding to rules 684 | /// referencing those classes or ids, provided that the corresponding rules 685 | /// are not excepted. 686 | /// 687 | /// Exceptions should be passed directly from UrlSpecificResources. 688 | #[pyo3(text_signature = "($self, classes, ids, exceptions)")] 689 | pub fn hidden_class_id_selectors( 690 | &self, 691 | classes: Vec, 692 | ids: Vec, 693 | exceptions: HashSet, 694 | ) -> PyResult> { 695 | Ok(self 696 | .engine 697 | .hidden_class_id_selectors(&classes, &ids, &exceptions)) 698 | } 699 | 700 | fn __repr__(&self) -> PyResult { 701 | Ok(format!( 702 | "Engine", 703 | self.optimize.diy_python_repr() 704 | )) 705 | } 706 | } 707 | 708 | /// PyO3 doesn't offer the ability to get the Python representation of a Rust 709 | /// object, so we make our own trait. 710 | trait DiyPythonRepr { 711 | fn diy_python_repr(&self) -> String; 712 | } 713 | 714 | impl DiyPythonRepr for Option 715 | where 716 | T: DiyPythonRepr, 717 | { 718 | fn diy_python_repr(&self) -> String { 719 | match self { 720 | None => "None".to_owned(), 721 | Some(x) => x.diy_python_repr(), 722 | } 723 | } 724 | } 725 | 726 | impl DiyPythonRepr for String { 727 | fn diy_python_repr(&self) -> String { 728 | let mut res = format!("{:?}", self); 729 | // This is safe to do since we know that `res` will always be of 730 | // length >= 2. 731 | res.replace_range(0..1, "'"); 732 | res.replace_range(res.len() - 1..res.len(), "'"); 733 | res 734 | } 735 | } 736 | 737 | impl DiyPythonRepr for bool { 738 | fn diy_python_repr(&self) -> String { 739 | if *self { 740 | "True".to_owned() 741 | } else { 742 | "False".to_owned() 743 | } 744 | } 745 | } 746 | -------------------------------------------------------------------------------- /tests/test_engine.py: -------------------------------------------------------------------------------- 1 | import adblock 2 | import pytest 3 | 4 | SMALL_FILTER_LIST = """ 5 | ||wikipedia.org^ 6 | ||old.reddit.com^ 7 | ||lobste.rs^ 8 | """ 9 | 10 | 11 | def empty_engine(): 12 | return adblock.Engine(adblock.FilterSet()) 13 | 14 | 15 | def test_engine_creation_and_blocking(): 16 | filter_set = adblock.FilterSet(debug=True) 17 | filter_set.add_filter_list(SMALL_FILTER_LIST) 18 | engine = adblock.Engine(filter_set=filter_set) 19 | 20 | blocker_result_wikipedia = engine.check_network_urls( 21 | url="https://wikipedia.org/img.png", 22 | source_url="https://google.com/", 23 | request_type="image", 24 | ) 25 | assert isinstance(blocker_result_wikipedia, adblock.BlockerResult) 26 | assert blocker_result_wikipedia.matched 27 | 28 | blocker_result_facebook = engine.check_network_urls( 29 | "https://facebook.com/directory/img.png", 30 | "https://old.reddit.com/r/all", 31 | "image", 32 | ) 33 | assert isinstance(blocker_result_facebook, adblock.BlockerResult) 34 | assert not blocker_result_facebook.matched 35 | 36 | 37 | def test_serde_file(tmpdir): 38 | path = str(tmpdir / "cache.dat") 39 | 40 | engine0 = empty_engine() 41 | with pytest.raises(FileNotFoundError): 42 | # We haven't created the cache.dat file, so we should get an exception 43 | # when attempting to deserialize. 44 | engine0.deserialize_from_file(path) 45 | 46 | engine1 = empty_engine() 47 | serialization_result = engine1.serialize_to_file(path) 48 | assert serialization_result is None 49 | 50 | engine2 = empty_engine() 51 | deserialization_result = engine2.deserialize_from_file(path) 52 | assert deserialization_result is None 53 | 54 | 55 | def test_deserialize_corrupt(tmpdir): 56 | path = str(tmpdir / "corrupt_cache.dat") 57 | with open(path, "w", encoding="utf-8") as f: 58 | f.write("abc") 59 | 60 | engine = empty_engine() 61 | with pytest.raises(adblock.DeserializationError): 62 | engine.deserialize_from_file(path) 63 | with pytest.raises(adblock.DeserializationError): 64 | engine.deserialize(b"abc") 65 | 66 | 67 | def test_serde(): 68 | engine = empty_engine() 69 | serialization_result = engine.serialize() 70 | assert isinstance(serialization_result, bytes) 71 | 72 | engine2 = empty_engine() 73 | deserialization_result = engine2.deserialize(serialization_result) 74 | assert deserialization_result is None 75 | -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import adblock 2 | import pytest 3 | 4 | 5 | def test_correct_baseclasses(): 6 | assert issubclass(adblock.AdblockException, Exception) 7 | assert issubclass(adblock.BlockerException, adblock.AdblockException) 8 | assert issubclass(adblock.AddResourceException, adblock.AdblockException) 9 | assert issubclass(adblock.InvalidUtf8ContentError, adblock.AddResourceException) 10 | assert issubclass(adblock.InvalidBase64ContentError, adblock.AddResourceException) 11 | assert issubclass(adblock.SerializationError, adblock.BlockerException) 12 | assert issubclass(adblock.DeserializationError, adblock.BlockerException) 13 | assert issubclass(adblock.OptimizedFilterExistence, adblock.BlockerException) 14 | assert issubclass(adblock.BadFilterAddUnsupported, adblock.BlockerException) 15 | assert issubclass(adblock.FilterExists, adblock.BlockerException) 16 | 17 | 18 | def test_add_resource_error(): 19 | filter_set = adblock.FilterSet() 20 | engine = adblock.Engine(filter_set=filter_set) 21 | 22 | with pytest.raises(adblock.InvalidBase64ContentError) as exc: 23 | engine.add_resource(name="aa", content_type="image/jpeg", content="111") 24 | assert "invalid base64 content" in str(exc.value) 25 | 26 | with pytest.raises(adblock.InvalidUtf8ContentError) as exc: 27 | # // Ensure any text contents are also valid utf8 28 | # MimeType::ApplicationJavascript | MimeType::TextPlain | MimeType::TextHtml => { 29 | # let _ = String::from_utf8(decoded)?; 30 | # } 31 | # xOO6ww== => base64.b64encode('你好'.encode('gbk')) 32 | engine.add_resource( 33 | name="aa", 34 | content_type="application/javascript", 35 | content="xOO6ww==", 36 | aliases=[], 37 | ) 38 | assert "invalid utf content" in str(exc.value) 39 | -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | import re 2 | import adblock 3 | 4 | 5 | def get_added_classes(): 6 | """ 7 | Try to get the names of all classes that we added to the Python module 8 | from Rust. As always, we unfortunately don't have access to the Rust AST 9 | so we have to make do with regular expressions. 10 | """ 11 | classes = [] 12 | with open("src/lib.rs", "r", encoding="utf-8") as rs_f: 13 | for line in rs_f: 14 | match = re.match(r"m\.add_class::<(.+)>\(\)\?;", line.strip()) 15 | if match is not None: 16 | classes.append(match.group(1)) 17 | continue 18 | return classes 19 | 20 | 21 | def test_added_classes(): 22 | """ 23 | Make sure that there's no class that we added in Rust but didn't import in 24 | `__init__.py`. 25 | """ 26 | added_classes = get_added_classes() 27 | for c in added_classes: 28 | assert c in adblock.__all__ 29 | 30 | 31 | def test_dunder_all_classes_imported(): 32 | """ 33 | Make sure that there's no class in `__all__` that we haven't imported. 34 | """ 35 | for c in adblock.__all__: 36 | assert hasattr(adblock, c) 37 | -------------------------------------------------------------------------------- /tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | import toml 5 | import adblock 6 | 7 | 8 | def parse_version(version): 9 | parts = version.split(".") 10 | return tuple(map(int, parts)) 11 | 12 | 13 | def get_version_value_cargo(): 14 | with open("Cargo.toml", encoding="utf-8") as f: 15 | cargo_toml = toml.loads(f.read()) 16 | return parse_version(cargo_toml["package"]["version"]) 17 | 18 | 19 | def get_version_values_pyproject(): 20 | with open("pyproject.toml", encoding="utf-8") as f: 21 | pyproject_toml = toml.loads(f.read()) 22 | return [ 23 | parse_version(pyproject_toml["project"]["version"]), 24 | parse_version(pyproject_toml["tool"]["poetry"]["version"]), 25 | ] 26 | 27 | 28 | def get_version_value_changelog(): 29 | """ 30 | Try to get the names of all classes that we added to the Python module 31 | from Rust. As always, we unfortunately don't have access to the Rust AST 32 | so we have to make do with regular expressions. 33 | """ 34 | versions = [] 35 | with open("CHANGELOG.md", "r", encoding="utf-8") as f: 36 | for line in f: 37 | match = re.match( 38 | r"## ([0-9]+\.[0-9]+\.[0-9]+) - \(20[0-9]+-[0-1][0-9]-[0-3][0-9]\)", 39 | line.strip(), 40 | ) 41 | if match is not None: 42 | versions.append(parse_version(match.group(1))) 43 | assert versions == sorted(versions, reverse=True) 44 | return versions[0] 45 | 46 | 47 | def test_version_numbers_all_same(): 48 | """ 49 | Makes sure that `pyproject.toml`, `Cargo.toml` and `CHANGELOG.md` contain 50 | the same version number as the one attached to the `adblock` module. 51 | """ 52 | cargo_version = get_version_value_cargo() 53 | changelog_version = get_version_value_changelog() 54 | pyproject_versions = get_version_values_pyproject() 55 | module_version = parse_version(adblock.__version__) 56 | 57 | assert cargo_version == module_version 58 | assert module_version == changelog_version 59 | assert changelog_version == pyproject_versions[0] 60 | assert pyproject_versions[0] == pyproject_versions[1] 61 | 62 | 63 | def get_current_python_version(): 64 | return (sys.version_info.major, sys.version_info.minor, sys.version_info.micro) 65 | 66 | 67 | def test_required_python_version(): 68 | """ 69 | Make sure that the Python interpreter we're running this test suite on 70 | falls into the required Python range. 71 | """ 72 | with open("pyproject.toml", encoding="utf-8") as f: 73 | pyproject_toml = toml.loads(f.read()) 74 | 75 | required_python = pyproject_toml["project"]["requires-python"] 76 | assert required_python.startswith(">=") 77 | required_python = required_python[2:] 78 | assert get_current_python_version() >= parse_version(required_python) 79 | -------------------------------------------------------------------------------- /tests/test_redirect.py: -------------------------------------------------------------------------------- 1 | import adblock 2 | 3 | 4 | def test_redirect_worked_as_excepted_with_include_redirect_urls(): 5 | # https://github.com/brave/adblock-rust/blob/b7f29af8c0a0d000201d8d769b6a0b25a9dd4e89/src/blocker.rs#L1242 6 | filter_set = adblock.FilterSet() 7 | filter_set.add_filter_list( 8 | "||foo.com$important,redirect-url=http://xyz.com", include_redirect_urls=True 9 | ) 10 | 11 | engine = adblock.Engine(filter_set=filter_set) 12 | 13 | res = engine.check_network_urls("https://foo.com", "https://foo.com", "script") 14 | assert res.matched is True 15 | assert res.important is True 16 | assert res.redirect_type == "url" 17 | assert res.redirect == "http://xyz.com" 18 | 19 | 20 | def test_redirect_url_is_not_recognized_without_include_redirect_urls(): 21 | # https://github.com/brave/adblock-rust/blob/b7f29af8c0a0d000201d8d769b6a0b25a9dd4e89/src/blocker.rs#L1267 22 | filter_set2 = adblock.FilterSet() 23 | filter_set2.add_filter_list( 24 | "||foo.com$important,redirect-url=http://xyz.com", include_redirect_urls=False 25 | ) 26 | 27 | engine2 = adblock.Engine(filter_set=filter_set2) 28 | 29 | res = engine2.check_network_urls("https://foo.com", "https://foo.com", "script") 30 | assert res.matched is False 31 | assert res.redirect is None 32 | assert res.redirect_type is None 33 | 34 | 35 | def test_redirect_url_exception(): 36 | # https://github.com/brave/adblock-rust/blob/b7f29af8c0a0d000201d8d769b6a0b25a9dd4e89/src/blocker.rs#L1314 37 | filter_set = adblock.FilterSet(debug=True) 38 | filter_set.add_filters( 39 | [ 40 | "||imdb-video.media-imdb.com$media,redirect-url=http://xyz.com", 41 | "@@||imdb-video.media-imdb.com^$domain=imdb.com", 42 | ], 43 | include_redirect_urls=True, 44 | ) 45 | 46 | engine2 = adblock.Engine(filter_set=filter_set, optimize=False) 47 | 48 | res = engine2.check_network_urls( 49 | "https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", 50 | "https://www.imdb.com/video/13", 51 | "media", 52 | ) 53 | assert res.error is None 54 | assert res.matched is False 55 | assert res.redirect == "http://xyz.com" 56 | assert res.redirect_type == "url" 57 | assert res.exception is None 58 | 59 | 60 | def test_redirect_with_custom_resource(): 61 | filters = adblock.FilterSet() 62 | filters.add_filter_list("-advertisement-$redirect=test\n") 63 | 64 | engine = adblock.Engine(filter_set=filters) 65 | engine.add_resource( 66 | name="test", content_type="application/javascript", content="YWxlcnQoMSk=" 67 | ) 68 | 69 | result = engine.check_network_urls( 70 | url="http://example.com/-advertisement-icon.", 71 | source_url="example.com", 72 | request_type="image", 73 | ) 74 | 75 | assert result.matched 76 | assert not result.exception 77 | assert not result.important 78 | assert result.redirect == "data:application/javascript;base64,YWxlcnQoMSk=" 79 | -------------------------------------------------------------------------------- /tests/test_repr.py: -------------------------------------------------------------------------------- 1 | import adblock 2 | import re 3 | 4 | 5 | def assert_acceptable_repr(obj): 6 | # Default repr is r"<[A-Za-z]+ object at 0x[0-9a-f]+>" 7 | assert "object at" not in repr(obj) 8 | assert re.match(r"[A-Z][a-zA-Z]+\(.*\)", repr(obj)) or re.match( 9 | r"([A-Z][a-zA-Z]+)?<.*>", repr(obj) 10 | ) 11 | 12 | 13 | def test_has_nondefault_repr(): 14 | for b in (True, False): 15 | fs = adblock.FilterSet(debug=b) 16 | assert_acceptable_repr(fs) 17 | assert repr(b) in repr(fs) 18 | 19 | fs.add_filters(["||example.com^"]) 20 | 21 | e = adblock.Engine(fs) 22 | assert_acceptable_repr(e) 23 | 24 | result = e.check_network_urls( 25 | "https://example.com/picture.png", "https://example.net", "image" 26 | ) 27 | assert_acceptable_repr(result) 28 | assert repr(result) == ( 29 | "BlockerResult(matched={}, important={}, redirect={}, exception={}, filter={}, error={})".format( 30 | repr(result.matched), 31 | repr(result.important), 32 | repr(result.redirect), 33 | repr(result.exception), 34 | repr(result.filter), 35 | repr(result.error), 36 | ) 37 | ) 38 | -------------------------------------------------------------------------------- /tests/test_typestubs.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | 4 | 5 | def read_stubfile(): 6 | with open("adblock/adblock.pyi", encoding="utf-8") as file: 7 | node = ast.parse(file.read()) 8 | return node 9 | 10 | 11 | def get_functions_and_methods(node): 12 | functions = [n for n in node.body if isinstance(n, ast.FunctionDef)] 13 | classes = [n for n in node.body if isinstance(n, ast.ClassDef)] 14 | 15 | methods = {} 16 | for c in classes: 17 | methods[c.name] = [n for n in c.body if isinstance(n, ast.FunctionDef)] 18 | 19 | return functions, methods 20 | 21 | 22 | def pattern_exists_in_file(filename, regex): 23 | """ 24 | Checks if the given regex is present in the given file 25 | """ 26 | with open(filename, "r", encoding="utf-8") as f: 27 | for line in f: 28 | if re.search(regex, line): 29 | return True 30 | return False 31 | 32 | 33 | def test_functions_and_methods_exist_in_rust(): 34 | """ 35 | Check that for each of the functions and methods present in the Python 36 | typestub file, there is a line in `src/lib.rs` containing a matching 37 | definition. Since we're doing a naive grep search, without access to the 38 | Rust AST, there may be false negatives. 39 | """ 40 | stubfile_node = read_stubfile() 41 | functions, methods = get_functions_and_methods(stubfile_node) 42 | 43 | methods_flattened = [] 44 | for class_methods in methods.values(): 45 | methods_flattened += class_methods 46 | 47 | for f in functions + methods_flattened: 48 | if f.name.startswith("__"): 49 | # Skip dunder methods since their names are the same for every 50 | # class, making the test not particularly useful. They are also not 51 | # marked `pub` in Rust. 52 | continue 53 | assert pattern_exists_in_file("src/lib.rs", r"pub fn {}".format(f.name)) 54 | -------------------------------------------------------------------------------- /web/create_site.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euxo pipefail 3 | 4 | # This script assumes that the current working directory is the repository 5 | # root. 6 | 7 | mkdir -p target 8 | cp -r web/static target/github-pages 9 | cargo doc --all-features 10 | cp -r target/doc target/github-pages/docs 11 | -------------------------------------------------------------------------------- /web/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Python Adblock 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 |

Python Adblock

13 | 17 |
18 | 19 | 20 | --------------------------------------------------------------------------------