├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── ci.yml ├── .python-version-default ├── CHANGELOG.md ├── LICENSE ├── README.md ├── chart ├── .helmignore ├── Chart.yaml ├── fediblockhole.conf.toml ├── templates │ ├── _helpers.tpl │ ├── configmap-conf-toml.yaml │ └── cronjob-fediblock-sync.yaml └── values.yaml ├── container ├── .dockerignore └── Dockerfile ├── etc └── sample.fediblockhole.conf.toml ├── pyproject.toml ├── requirements.txt ├── samples ├── demo-allowlist-01.csv ├── demo-allowlist-02.csv └── demo-blocklist-01.csv ├── src └── fediblockhole │ ├── __init__.py │ ├── blocklists.py │ └── const.py ├── tests ├── conftest.py ├── fixtures │ ├── __init__.py │ ├── data-mastodon.json │ ├── data-noop-01.csv │ ├── data-rapidblock.json │ ├── data-silences-01.csv │ └── data-suspends-01.csv ├── helpers │ ├── __init__.py │ └── util.py ├── test_allowlist.py ├── test_blockseverity.py ├── test_cmdline.py ├── test_configfile.py ├── test_domainblock.py ├── test_merge_comments.py ├── test_merge_thresholds.py ├── test_mergeplan.py ├── test_parser_csv.py ├── test_parser_csv_mastodon.py ├── test_parser_json.py ├── test_parser_rapidblockcsv.py └── test_parser_rapidblockjson.py └── uv.lock /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length: 88 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behaviour: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behaviour** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Environment** 24 | Help us understand where the code is running. 25 | - OS/Distribution: [e.g. Linux/Ubuntu/Debian] 26 | - Python version [e.g. Python 3.10] 27 | - Mastodon version [e.g. Mastodon 4.10] 28 | 29 | **Additional context** 30 | Add any other context about the problem here that could help us find and fix the bug. 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. E.g. "I am frustrated when [...]" 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 4 | on: 5 | merge_group: 6 | push: 7 | branches: [main, "ci-*"] 8 | tags: ["*"] 9 | pull_request: 10 | branches: [main, "ci-*"] 11 | workflow_dispatch: 12 | 13 | env: 14 | FORCE_COLOR: "1" 15 | PIP_DISABLE_PIP_VERSION_CHECK: "1" 16 | PIP_NO_PYTHON_VERSION_WARNING: "1" 17 | 18 | permissions: {} 19 | 20 | jobs: 21 | build-package: 22 | name: Build & verify package 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v4 27 | with: 28 | fetch-depth: 0 29 | 30 | - uses: hynek/build-and-inspect-python-package@v2 31 | id: baipp 32 | with: 33 | skip-wheel: true 34 | 35 | outputs: 36 | # Used to define the matrix for tests below. The value is based on 37 | # packaging metadata (trove classifiers). 38 | supported-python-versions: ${{ steps.baipp.outputs.supported_python_classifiers_json_array }} 39 | 40 | tests: 41 | name: Tests on ${{ matrix.python-version }} 42 | runs-on: ubuntu-latest 43 | needs: build-package 44 | 45 | strategy: 46 | fail-fast: false 47 | matrix: 48 | # Created by the build-and-inspect-python-package action above. 49 | python-version: ${{ fromJson(needs.build-package.outputs.supported-python-versions) }} 50 | 51 | steps: 52 | - name: Download pre-built packages 53 | uses: actions/download-artifact@v4 54 | with: 55 | name: Packages 56 | path: dist 57 | - run: tar xf dist/*.tar.gz --strip-components=1 58 | - uses: actions/setup-python@v5 59 | with: 60 | python-version: ${{ matrix.python-version }} 61 | allow-prereleases: true 62 | - uses: hynek/setup-cached-uv@v2 63 | - run: uv venv 64 | - run: uv pip install .[dev] 65 | - run: | 66 | cd tests 67 | uv run coverage run -p -m pytest 68 | 69 | - name: Upload coverage data 70 | uses: actions/upload-artifact@v4 71 | with: 72 | name: coverage-data-${{ matrix.python-version }} 73 | path: tests/.coverage.* 74 | include-hidden-files: true 75 | 76 | coverage: 77 | name: Combine & check coverage 78 | runs-on: ubuntu-latest 79 | needs: tests 80 | 81 | steps: 82 | - name: Download pre-built packages 83 | uses: actions/download-artifact@v4 84 | with: 85 | name: Packages 86 | path: dist 87 | - run: tar xf dist/*.tar.gz --strip-components=1 88 | - uses: actions/setup-python@v5 89 | with: 90 | python-version-file: .python-version-default 91 | - uses: hynek/setup-cached-uv@v2 92 | 93 | - name: Download coverage data 94 | uses: actions/download-artifact@v4 95 | with: 96 | pattern: coverage-data-* 97 | merge-multiple: true 98 | 99 | - run: uv venv 100 | - name: Combine coverage & fail if it's <70%. 101 | run: | 102 | uv pip install --system coverage[toml] 103 | 104 | uv run coverage combine 105 | uv run coverage html --skip-covered --skip-empty 106 | 107 | # Report and write to summary. 108 | uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY 109 | 110 | # Report again and fail if under 70%. 111 | uv run coverage report --fail-under=70 112 | 113 | - name: Upload HTML report if check failed. 114 | uses: actions/upload-artifact@v4 115 | with: 116 | name: html-report 117 | path: htmlcov 118 | if: ${{ failure() }} 119 | 120 | docs: 121 | name: Build docs & run doctests 122 | runs-on: ubuntu-latest 123 | needs: build-package 124 | steps: 125 | - name: Download pre-built packages 126 | uses: actions/download-artifact@v4 127 | with: 128 | name: Packages 129 | path: dist 130 | - run: tar xf dist/*.tar.gz --strip-components=1 131 | - uses: actions/setup-python@v5 132 | with: 133 | # Keep in sync with tox/docs and .readthedocs.yaml. 134 | python-version: "3.12" 135 | - uses: hynek/setup-cached-uv@v2 136 | - run: uv venv 137 | 138 | # pyright: 139 | # name: Check types using pyright 140 | # runs-on: ubuntu-latest 141 | # steps: 142 | # - uses: actions/checkout@v4 143 | # - uses: actions/setup-python@v5 144 | # with: 145 | # python-version-file: .python-version-default 146 | # - uses: hynek/setup-cached-uv@v2 147 | 148 | # - run: uv venv 149 | # - run: uv pip install .[dev] pyright typing 150 | # - run: uv run pyright src 151 | 152 | install-dev: 153 | name: Verify dev env 154 | runs-on: ubuntu-latest 155 | 156 | steps: 157 | - uses: actions/checkout@v4 158 | - uses: actions/setup-python@v5 159 | with: 160 | python-version-file: .python-version-default 161 | - uses: hynek/setup-cached-uv@v2 162 | 163 | - run: uv venv --python $(cat .python-version-default) 164 | - run: uv pip install -e .[dev] 165 | 166 | - name: Ensure we can import the fediblockhole package 167 | run: | 168 | source .venv/bin/activate 169 | 170 | python -Ic 'import fediblockhole; print(fediblockhole.__version__)' 171 | 172 | # Ensure everything required is passing for branch protection. 173 | required-checks-pass: 174 | if: always() 175 | 176 | needs: 177 | - coverage 178 | - docs 179 | - install-dev 180 | 181 | runs-on: ubuntu-latest 182 | 183 | steps: 184 | - name: Decide whether the needed jobs succeeded or failed 185 | uses: re-actors/alls-green@release/v1 186 | with: 187 | jobs: ${{ toJSON(needs) }} 188 | -------------------------------------------------------------------------------- /.python-version-default: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Notable changes to the project will be documented in this changelog. 4 | 5 | This project uses [Semantic Versioning] and generally follows the conventions of [Keep A Changelog]. 6 | 7 | ## [Unreleased] 8 | 9 | ## [v0.4.6] - 2024-11-01 10 | 11 | ### Added 12 | 13 | - Added use of Flake8 as linter to improve code consistency (59d306a) 14 | - Added use of the Black code formatting tool to improve code consistency (59d306a) 15 | - Cleaned up code lint and formatting (ffb8219) 16 | - Added issue templates to help new contributors get their issues addressed quicker (3b655d6) 17 | - Added ability to use environment variables to provide BearerTokens. Thanks to @offbyone (8c5761e) 18 | 19 | ## [v0.4.5] - 2023-12-30 20 | 21 | ### Added 22 | 23 | - Added `override_private_comment` option for annotation of automated blocks (4d12bac) 24 | - Added blocklist audit file option to debug and track blocklist operations (9200fc3) 25 | 26 | ## [v0.4.4] - 2023-07-09 27 | 28 | ### Added 29 | 30 | - Added citation for creators of #Fediblock (a64875b) 31 | - Added parser for Mastodon 4.1 blocklist CSV format (9f95f14) 32 | - Added container support (76d5b61) 33 | 34 | ### Fixed 35 | 36 | - Use __future__.annotations so type hints work with Python < 2.9 (8265639) 37 | - test util no longer tries to load default config file if conf tomldata is empty. (2da57b2) 38 | 39 | ## [v0.4.3] - 2023-02-13 40 | 41 | ### Added 42 | 43 | - Added Mastodon public API parser type because #33 (9fe9342) 44 | - Added ability to set scheme when talking to instances (9fe9342) 45 | - Added tests of comment merging. (fb3a7ec) 46 | - Added blocklist thresholds. (bb1d89e) 47 | - Added logging to help debug threshold-based merging. (b67ff0c) 48 | - Added extra documentation on configuring thresholds. (6c72af8) 49 | - Updated documentation to reflect Mastodon v4.1.0 changes to the application scopes screen. (b92dd21) 50 | 51 | ### Changed 52 | 53 | - Dropped minimum Python version to 3.6 (df3c16f) 54 | - Don't merge comments if new comment is empty. (b8aa11e) 55 | - Tweaked comment merging to pass tests. (fb3a7ec) 56 | 57 | ## [v0.4.2] - 2023-01-19 58 | 59 | ### Fixed 60 | 61 | - Blockdata var already converted to _asdict() (8d3b9da) 62 | 63 | ## [v0.4.1] - 2023-01-15 64 | 65 | Allowlist support. 66 | 67 | ### Added 68 | 69 | - Allowlists just remove blocks from merged list before push. (a25773f) 70 | - Added helper submodule for testing utils (bf48a96) 71 | - Added basic tests of allowlist config args. (a3d3571) 72 | - Added test cases for cmdline parsing. (11accf3) 73 | - Added test cases for configfile parsing. (11accf3) 74 | - Added documentation on allowlists. (26f5464) 75 | - Fixed bug in how DomainBlock defaults handle reject_media, reject_reports. (6d4e18b) 76 | - Added support for allowlists. Updated docstring for merge_blocklists() (7a31c33) 77 | - Added DomainBlock type hint to update_known_block(). (69c28f1) 78 | - Use ._asdict() to get info to pass to add block API call. (69c28f1) 79 | 80 | ### Changed 81 | 82 | - Updated README to explain allowlist mechanism. (dc4bbd7) 83 | - Edited sample config to better explain URL source (9bd7914) 84 | - Restructured argparsing for easier testing. (11accf3) 85 | - str2bool() now converts '' to False. Added some extra debug logging of blocklist parsing. (894b133) 86 | - Updated documentation to explain need for `admin:read` access to fetch followers stats. (2cec9e1) 87 | - Aligned API call rate limit with server default. (55dad3f) 88 | 89 | ### Removed 90 | 91 | - Remove implied setting of reject_media/reports if severity is set to 'suspend'. (3aa2e37) 92 | 93 | ### Fixed 94 | 95 | - Fixed bug: mergeplan in config file was ignored. Reported in #22 (11accf3) 96 | - Fixed bug in _asdict() of severity level. (9817c99) 97 | - Fix DomainBlock.id usage during __iter__() (a718af5) 98 | 99 | ## [v0.4.0] - 2023-01-13 100 | 101 | Substantial changes to better support multiple blocklist formats 102 | 103 | ### Added 104 | 105 | - Added support for RapidBlock blocklists, both CSV and JSON formats. (327a44d) 106 | - Added support for per-instance-source import_fields. (327a44d) 107 | - Updated sample config to include new formats. (327a44d) 108 | - A BlockSeverity of 'suspend' implies reject_media and reject_reports. (327a44d) 109 | - Added ability to limit max severity per-URL source. (10011a5) 110 | - Added boolean fields like 'reject_reports' to mergeplan handling. (66f0373) 111 | - Added tests for boolean merge situations. (66f0373) 112 | - Various other test cases added. 113 | 114 | ### Changed 115 | 116 | - Refactored to add a DomainBlock object. (10011a5) 117 | - Refactored to use a BlockParser structure. (10011a5) 118 | - Improved method for checking if changes are needed. (10011a5) 119 | - Refactored fetch from URLs and instances. (327a44d) 120 | - Improved check_followed_severity() behaviour. (327a44d) 121 | - Changed API delay to be in calls per hour. (327a44d) 122 | - Improved comment merging. (0a6eec4) 123 | - Clarified logic in apply_mergeplan() for boolean fields. (66f0373) 124 | - Updated README documentation. (ee9625d) 125 | - Aligned API call rate limit with server default. (55dad3f) 126 | 127 | ### Removed 128 | 129 | - Removed redundant global vars. (327a44d) 130 | 131 | ### Fixed 132 | 133 | - Fixed bug in severity change detection. (e0d40b5) 134 | - Fix DomainBlock.id usage during __iter__() (a718af5) 135 | 136 | ## [v0.3.0] - 2023-01-11 137 | 138 | ### Added 139 | 140 | - Added args to show version information. (1d0649a) 141 | - Added timeout to requests calls. (23b8833) 142 | - Added CHANGELOG.md (ca9d958) 143 | 144 | ### Changed 145 | 146 | - Changed min Python version to v3.10. (f37ab70) 147 | 148 | ## [v0.2.1] - 2023-01-10 149 | 150 | ### Added 151 | 152 | - User-Agent is set to FediBlockHole to identify ourselves to remote servers. (04d9eea) 153 | - Adding packaging to prepare for submission to PyPI. (4ab369f) 154 | - Added ability to set max severity level if an instance has followers of accounts on a to-be-blocked domain. (5518421) 155 | - Added ability to read domain_blocks from instances that make the list public. (4ef84b5) 156 | - Skip obfuscated domains when building the merged blocklist. (4ef84b5) 157 | 158 | ### Changed 159 | 160 | - Updated documentation in README and the sample config. (68a2c93) 161 | 162 | ### Fixed 163 | 164 | - Fixed a bug in config enablement of intermediate blocklists saving. (5518421) 165 | 166 | ## Before 2023-01-10 167 | 168 | - Initial rough versions that were not packaged. 169 | 170 | 171 | [keep a changelog]: https://keepachangelog.com/en/1.0.0/ 172 | [semantic versioning]: https://semver.org/spec/v2.0.0.html 173 | 174 | 175 | [unreleased]: https://github.com/eigenmagic/fediblockhole/compare/v0.4.2...HEAD 176 | [v0.4.2]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.2 177 | [v0.4.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.1 178 | [v0.4.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.0 179 | [v0.3.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.3.0 180 | [v0.2.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.2.1 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FediBlockHole 2 | 3 | A tool for keeping a Mastodon instance blocklist synchronised with remote lists. 4 | 5 | The broad design goal for FediBlockHole is to support pulling in a list of 6 | blocklists from a set of trusted sources, merge them into a combined blocklist, 7 | and then push that merged list to a set of managed instances. 8 | 9 | Mastodon admins can choose who they think maintain quality lists and subscribe 10 | to them, helping to distribute the load for maintaining blocklists among a 11 | community of people. Control ultimately rests with the admins themselves so they 12 | can outsource as much, or as little, of the effort to others as they deem 13 | appropriate. 14 | 15 | Inspired by the way PiHole works for maintaining a set of blocklists of adtech 16 | domains. Builds on the work of 17 | [@CaribenxMarciaX@scholar.social](https://scholar.social/@CaribenxMarciaX) and 18 | [@gingerrroot@kitty.town](https://kitty.town/@gingerrroot) who started the 19 | #Fediblock hashtag and did a lot of advocacy around it, often at great personal 20 | cost. 21 | 22 | ## Features 23 | 24 | ### Blocklist Sources 25 | 26 | - Read domain block lists from other instances via the Mastodon API. 27 | - Supports both public lists (no auth required) and 'admin' lists requiring 28 | authentication to an instance. 29 | - Read domain block lists from arbitrary URLs, including local files. 30 | - Supports CSV and JSON format blocklists 31 | - Supports RapidBlock CSV and JSON format blocklists 32 | 33 | ### Blocklist Export/Push 34 | 35 | - Push a merged blocklist to a set of Mastodon instances. 36 | - Export per-source, unmerged block lists to local files, in CSV format. 37 | - Export merged blocklists to local files, in CSV format. 38 | - Read block lists from multiple remote instances 39 | - Read block lists from multiple URLs, including local files 40 | - Write a unified block list to a local CSV file 41 | - Push unified blocklist updates to multiple remote instances 42 | - Control import and export fields 43 | 44 | ### Flexible Configuration 45 | 46 | - Provides (hopefully) sensible defaults to minimise first-time setup. 47 | - Global and fine-grained configuration options available for those complex situations that crop up sometimes. 48 | - Allowlists to override blocks in blocklists to ensure you never block instances you want to keep. 49 | - Blocklist thresholds if you want to only block when an instance shows up in multiple blocklists. 50 | 51 | ## Installing 52 | 53 | Installable using `pip`. 54 | 55 | ``` 56 | python3 -m pip install fediblockhole 57 | ``` 58 | 59 | Install from source by cloning the repo, `cd fediblockhole` and run: 60 | 61 | ``` 62 | python3 -m pip install . 63 | ``` 64 | 65 | Installation adds a commandline tool: `fediblock-sync` 66 | 67 | Instance admins who want to use this tool for their instance will need to add an 68 | Application at `https:///settings/applications/` so they can 69 | authorize the tool to create and update domain blocks with an OAuth token. 70 | 71 | More on authorization by token below. 72 | 73 | ### Reading remote instance blocklists 74 | 75 | If a remote instance makes its domain blocks public, you don't need 76 | a token to read them. 77 | 78 | If a remote instance only shows its domain blocks to local accounts 79 | you'll need to have a token with `read:blocks` authorization set up. 80 | If you have an account on that instance, you can get a token by setting up a new 81 | Application at `https:///settings/applications/`. 82 | 83 | To read admin blocks from a remote instance, you'll need to ask the instance 84 | admin to add a new Application at 85 | `https:///settings/applications/` and then tell you the access 86 | token. 87 | 88 | The application needs the `admin:read:domain_blocks` OAuth scope. You can allow 89 | full `admin:read` access, but be aware that this authorizes someone to read all 90 | the data in the instance. That's asking a lot of a remote instance admin who 91 | just wants to share domain_blocks with you. 92 | 93 | The `admin:read:domain_blocks` scope is available as of Mastodon v4.1.0, but for 94 | earlier versions admins will need to use the manual method described below. 95 | 96 | You can update the scope for your application in the database directly like 97 | this: 98 | 99 | ``` 100 | UPDATE oauth_applications as app 101 | SET scopes = 'admin:read:domain_blocks' 102 | FROM oauth_access_tokens as tok 103 | WHERE app.id = tok.application_id 104 | AND app.name = '' 105 | ; 106 | ``` 107 | 108 | When that's done, regenerate the token (so it has the new scopes) in the 109 | application screen in the instance GUI. FediBlockHole should then able to use 110 | the app token to read domain blocks via the API, but nothing else. 111 | 112 | Alternately, you could ask the remote instance admin to set up FediBlockHole and 113 | use it to dump out a CSV blocklist from their instance and then put it somewhere 114 | trusted parties can read it. Then you can define the blocklist as a URL source, 115 | as explained below. 116 | 117 | ### Writing instance blocklists 118 | 119 | To write domain blocks into an instance requires both the `admin:read` and 120 | `admin:write:domain_blocks` OAuth scopes. 121 | 122 | The tool needs `admin:read:domain_blocks` scope to read the current list of 123 | domain blocks so we update ones that already exist, rather than trying to add 124 | all new ones and clutter up the instance. 125 | 126 | `admin:read` access is needed to check if the instance has any accounts that 127 | follow accounts on a domain that is about to get `suspend`ed and automatically 128 | drop the block severity to `silence` level so people have time to migrate 129 | accounts before a full defederation takes effect. Unfortunately, the statistics 130 | measure used to learn this information requires `admin:read` scope. 131 | 132 | You can add `admin:read` scope in the application admin screen. Please be aware 133 | that this grants full read access to all information in the instance to the 134 | application token, so make sure you keep it a secret. At least remove 135 | world-readable permission to any config file you put it in, e.g.: 136 | 137 | ``` 138 | chmod o-r 139 | ``` 140 | 141 | You can also grant full `admin:write` scope to the application, but if you'd 142 | prefer to keep things more tightly secured, limit the scope to 143 | `admin:read:domain_blocks`. 144 | 145 | Again, this scope is only available in the application config screen as of 146 | Mastodon v4.1.0. If your instance is on an earlier version, you'll need to use 147 | SQL to set the scopes in the database and then regenerate the token: 148 | 149 | ``` 150 | UPDATE oauth_applications as app 151 | SET scopes = 'admin:read admin:write:domain_blocks' 152 | FROM oauth_access_tokens as tok 153 | WHERE app.id = tok.application_id 154 | AND app.name = '' 155 | ; 156 | ``` 157 | 158 | When that's done, FediBlockHole should be able to use its token to authorise 159 | adding or updating domain blocks via the API. 160 | 161 | ## Using the tool 162 | 163 | Run the tool like this: 164 | 165 | ``` 166 | fediblock-sync -c 167 | ``` 168 | 169 | If you put the config file in `/etc/default/fediblockhole.conf.toml` you don't 170 | need to pass in the config file path. 171 | 172 | For a list of possible configuration options, check the `--help`. 173 | 174 | You can also read the heavily commented sample configuration file in the repo at 175 | [etc/sample.fediblockhole.conf.toml](https://github.com/eigenmagic/fediblockhole/blob/main/etc/sample.fediblockhole.conf.toml). 176 | 177 | ## Configuring 178 | 179 | Once you have your applications and tokens and scopes set up, create a 180 | configuration file for FediBlockHole to use. You can put it anywhere and use the 181 | `-c ` commandline parameter to tell FediBlockHole where it is. 182 | 183 | Or you can use the default location of `/etc/default/fediblockhole.conf.toml`. 184 | 185 | As the filename suggests, FediBlockHole uses TOML syntax. 186 | 187 | There are 4 key sections: 188 | 189 | 1. `blocklist_urls_sources`: A list of URLs to read blocklists from 190 | 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API 191 | 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API 192 | 1. `allowlist_url_sources`: A list of URLs to read allowlists from 193 | 194 | More detail on configuring the tool is provided below. 195 | 196 | ### URL sources 197 | 198 | The URL sources is a list of URLs to fetch blocklists from. 199 | 200 | Supported formats are currently: 201 | 202 | - Comma-Separated Values (CSV) 203 | - JSON 204 | - Mastodon v4.1 flavoured CSV 205 | - RapidBlock CSV 206 | - RapidBlock JSON 207 | 208 | Blocklists must provide a `domain` field, and should provide a `severity` field. 209 | 210 | `domain` is the domain name of the instance to be blocked/limited. 211 | 212 | `severity` is the severity level of the block/limit. Supported values are: `noop`, `silence`, and `suspend`. 213 | 214 | Optional fields that the tool understands are `public_comment`, `private_comment`, `reject_media`, `reject_reports`, and `obfuscate`. 215 | 216 | #### CSV format 217 | 218 | A CSV format blocklist must contain a header row with at least a `domain` and `severity` field. 219 | 220 | Optional fields, as listed about, may also be included. 221 | 222 | #### Mastodon v4.1 CSV format 223 | 224 | As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their 225 | infinite wisdom, the Mastodon devs decided that field names should begin with a 226 | `#` character in the header, unlike the field names in the JSON output via the 227 | API… or in pretty much any other CSV file anywhere else. 228 | 229 | Setting the format to `mastodon_csv` will strip off the `#` character when 230 | parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any 231 | other CSV formatted blocklist. 232 | 233 | #### JSON format 234 | 235 | JSON is also supported. It uses the same format as the JSON returned from the Mastodon API. 236 | 237 | This is a list of dictionaries, with at minimum a `domain` field, and preferably 238 | a `severity` field. The other optional fields are, well, optional. 239 | 240 | #### RapidBlock CSV format 241 | 242 | The RapidBlock CSV format has no header and a single field, so it's not 243 | _strictly_ a CSV file as there are no commas separating values. It is basically 244 | just a list of domains to block, separated by '\r\n'. 245 | 246 | When using this format, the tool assumes the `severity` level is `suspend`. 247 | 248 | #### RapidBlock JSON format 249 | 250 | The RapidBlock JSON format provides more detailed information about domain 251 | blocks, but is still somewhat limited. 252 | 253 | It has a single `isBlocked` flag indicating if a domain should be blocked or 254 | not. There is no support for the 'silence' block level. 255 | 256 | There is no support for 'reject_media' or 'reject_reports' or 'obfuscate'. 257 | 258 | All comments are public, by virtue of the public nature of RapidBlock. 259 | 260 | ### Instance sources 261 | 262 | The tool can also read domain_blocks from instances directly. 263 | 264 | The configuration is a list of dictionaries of the form: 265 | ``` 266 | { domain = '', token = '', admin = false } 267 | ``` 268 | 269 | The `domain` is the fully-qualified domain name of the API host for an instance 270 | you want to read domain blocks from. 271 | 272 | The `token` is an optional OAuth token for the application that's configured in 273 | the instance to allow you to read domain blocks, as discussed above. 274 | 275 | The `token` can also be specified using environment variables. This provides 276 | improved security compared to storing the OAuth token in a configuration file, 277 | but it will require the environment variable to be set so that FediBlockHole can 278 | access it. See below in [Instance destinations](#instance-destinations) for more 279 | detail on how to use environment variables to provide authentication tokens. 280 | 281 | `admin` is an optional field that tells the tool to use the more detailed admin 282 | API endpoint for domain_blocks, rather than the more public API endpoint that 283 | doesn't provide as much detail. You will need a `token` that's been configured to 284 | permit access to the admin domain_blocks scope, as detailed above. 285 | 286 | ### Instance destinations 287 | 288 | The tool supports pushing a unified blocklist to multiple instances. 289 | 290 | Configure the list of instances you want to push your blocklist to in the 291 | `blocklist_instance_destinations` list. Each entry is of the form: 292 | 293 | ``` 294 | { domain = '', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' } 295 | ``` 296 | 297 | The field `domain` is required. It is the fully-qualified domain name of the 298 | instance you want to push to. 299 | 300 | A BearerToken is also required, for authenticating with the instance. It can be provided in two ways: 301 | 302 | 1. A token can be provided directly in the entry as a `token` field, like this: 303 | ``` 304 | { domain = '', token = '', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' } 305 | ``` 306 | This was the only mechanism available up to version 0.4.5 of Fediblockhole. 307 | 308 | 1. A token can be provided from the environment. 309 | 310 | If a token is not directly provided with the `token` field, Fediblockhole will 311 | look for an environment variable that contains the token. 312 | 313 | By default, the name of the environment variable will be the domain name 314 | converted to upper case and with dot/period characters converted to 315 | underscores, and the suffix `_TOKEN`. For example, the token variable for the 316 | domain `eigenmagic.net` would be `EIGENMAGIC_NET_TOKEN`. 317 | 318 | You can also specify the environment variable to look for, using the 319 | `token_env_var` field, like this: 320 | ``` 321 | { domain = '', token_env_var = 'MY_CUSTOM_DOMAIN_TOKEN', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' } 322 | ``` 323 | 324 | Fediblockhole will then look for a token in the `MY_CUSTOM_DOMAIN_TOKEN` environment variable. 325 | 326 | If a specific `token_env_var` is provided, the default variable name will 327 | not be used. If both the `token` and `token_env_var` fields are provided, 328 | the token provided in the `token` field will be used, and a warning will be 329 | issued to notify you that you might have misconfigured things. 330 | 331 | 332 | The BearerToken is 333 | an application token with both `admin:read:domain_blocks` and 334 | `admin:write:domain_blocks` authorization. 335 | 336 | The fields `max_followed_severity` and `import_fields` are optional. 337 | 338 | The optional `import_fields` setting allows you to restrict which fields are 339 | imported from each instance. If you want to import the `reject_reports` settings 340 | from one instance, but no others, you can use the `import_fields` setting to do 341 | it. **Note:** The `domain` and `severity` fields are always imported. 342 | 343 | The optional `max_severity` setting limits the maximum severity you will allow a 344 | remote blocklist to set. This helps you import a list from a remote instance but 345 | only at the `silence` level, even if that remote instance has a block at 346 | `suspend` level. If not set, defaults to `suspend`. 347 | 348 | The optional `max_followed_severity` setting sets a per-instance limit on the 349 | severity of a domain_block if there are accounts on the instance that follow 350 | accounts on the domain to be blocked. If `max_followed_severity` isn't set, it 351 | defaults to `silence`. 352 | 353 | This setting exists to give people time to move off an instance that is about to 354 | be defederated and bring their followers from your instance with them. Without 355 | it, if a new `suspend` block appears in any of the blocklists you subscribe to (or 356 | a block level increases from `silence` to `suspend`) and you're using the default 357 | `max` mergeplan, the tool would immediately suspend the instance, cutting 358 | everyone on the blocked instance off from their existing followers on your 359 | instance, even if they move to a new instance. If you actually want that 360 | outcome, you can set `max_followed_severity = 'suspend'` and use the `max` 361 | mergeplan. 362 | 363 | Once the follow count drops to 0 on your instance, the tool will automatically 364 | use the highest severity it finds again (if you're using the `max` mergeplan). 365 | 366 | ### Allowlists 367 | 368 | Sometimes you might want to completely ignore the blocklist definitions for 369 | certain domains. That's what allowlists are for. 370 | 371 | Allowlists remove any domain in the list from the merged list of blocks before 372 | the merged list is saved out to a file or pushed to any instance. 373 | 374 | Allowlists can be in any format supported by `blocklist_urls_sources` but ignore 375 | all fields that aren't `domain`. 376 | 377 | You can also allow domains on the commandline by using the `-A` or `--allow` 378 | flag and providing the domain name to allow. You can use the flag multiple 379 | times to allow multiple domains. 380 | 381 | It is probably wise to include your own instance domain in an allowlist so you 382 | don't accidentally defederate from yourself. 383 | 384 | ## More advanced configuration 385 | 386 | For a list of possible configuration options, check the `--help` and read the 387 | sample configuration file in `etc/sample.fediblockhole.conf.toml`. 388 | 389 | ### save_intermediate 390 | 391 | This option tells the tool to save the unmerged blocklists it fetches from 392 | remote instances and URLs into separate files. This is handy for debugging, or 393 | just to have a non-unified set of blocklist files. 394 | 395 | Works with the `savedir` setting to control where to save the files. 396 | 397 | These are parsed blocklists, not the raw data, and so will be affected by `import_fields`. 398 | 399 | The filename is based on the URL or domain used so you can tell where each list came from. 400 | 401 | ### savedir 402 | 403 | Sets where to save intermediate blocklist files. Defaults to `/tmp`. 404 | 405 | ### blocklist_auditfile 406 | 407 | If provided, will save an audit file of counts and percentages by domain. Useful for debugging 408 | thresholds. Defaults to None. 409 | 410 | ### no_push_instance 411 | 412 | Defaults to False. 413 | 414 | When set, the tool won't actually try to push the unified blocklist to any 415 | configured instances. 416 | 417 | If you want to see what the tool would try to do, but not actually apply any 418 | updates, use `--dryrun`. 419 | 420 | ### no_fetch_url 421 | 422 | Skip the fetching of blocklists from any URLs that are configured. 423 | 424 | ### no_fetch_instance 425 | 426 | Skip the fetching of blocklists from any remote instances that are configured. 427 | 428 | ### override_private_comment 429 | 430 | Defaults to None. 431 | 432 | Stamp all *new* blocks pushed to a remote server with this comment or code. 433 | Helps to identify blocks you've created on a server via Fediblockhole versus ones that 434 | already existed. 435 | 436 | ### mergeplan 437 | 438 | If two (or more) blocklists define blocks for the same domain, but they're 439 | different, `mergeplan` tells the tool how to resolve the conflict. 440 | 441 | `max` is the default. It uses the _highest_ severity block it finds as the one 442 | that should be used in the unified blocklist. 443 | 444 | `min` does the opposite. It uses the _lowest_ severity block it finds as the one 445 | to use in the unified blocklist. 446 | 447 | A full discussion of severities is beyond the scope of this README, but here is 448 | a quick overview of how it works for this tool. 449 | 450 | The severities are: 451 | 452 | - **noop**, level 0: This is essentially an 'unblock' but you can include a 453 | comment. 454 | - **silence**, level 1: A silence adds friction to federation with an instance. 455 | - **suspend**, level 2: A full defederation with the instance. 456 | 457 | With `mergeplan` set to `max`, _silence_ would take precedence over _noop_, and 458 | _suspend_ would take precedence over both. 459 | 460 | With `mergeplan` set to `min`, _silence_ would take precedence over _suspend_, 461 | and _noop_ would take precedence over both. 462 | 463 | You would want to use `max` to ensure that you always block with whichever your 464 | harshest fellow admin thinks should happen. 465 | 466 | You would want to use `min` to ensure that your blocks do what your most lenient 467 | fellow admin thinks should happen. 468 | 469 | ### import_fields 470 | 471 | `import_fields` controls which fields will be imported from remote 472 | instances and URL blocklists, and which fields are pushed to instances from the 473 | unified blocklist. 474 | 475 | The fields `domain` and `severity` are always included, so only define extra 476 | fields, if you want them. 477 | 478 | You can't export fields you haven't imported, so `export_fields` should be a 479 | subset of `import_fields`, but you can run the tool multiple times. You could, 480 | for example, include lots of fields for an initial import to build up a 481 | comprehensive list for export, combined with the `--no-push-instances` option so 482 | you don't actually apply the full list to anywhere. 483 | 484 | Then you could use a different set of options when importing so you have all the 485 | detail in a file, but only push `public_comment` to instances. 486 | 487 | ### export_fields 488 | 489 | `export_fields` controls which fields will get saved to the unified blocklist 490 | file, if you export one. 491 | 492 | The fields `domain` and `severity` are always included, so only define extra 493 | fields, if you want them. -------------------------------------------------------------------------------- /chart/.helmignore: -------------------------------------------------------------------------------- 1 | # A helm chart's templates and default values can be packaged into a .tgz file. 2 | # When doing that, not everything should be bundled into the .tgz file. This 3 | # file describes what to not bundle. 4 | # 5 | # Manually added by us 6 | # -------------------- 7 | # 8 | 9 | # Boilerplate .helmignore from `helm create mastodon` 10 | # --------------------------------------------------- 11 | # 12 | # Patterns to ignore when building packages. 13 | # This supports shell glob matching, relative path matching, and 14 | # negation (prefixed with !). Only one pattern per line. 15 | .DS_Store 16 | # Common VCS dirs 17 | .git/ 18 | .gitignore 19 | .bzr/ 20 | .bzrignore 21 | .hg/ 22 | .hgignore 23 | .svn/ 24 | # Common backup files 25 | *.swp 26 | *.bak 27 | *.tmp 28 | *.orig 29 | *~ 30 | # Various IDEs 31 | .project 32 | .idea/ 33 | *.tmproj 34 | .vscode/ 35 | -------------------------------------------------------------------------------- /chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: fediblockhole 3 | description: FediBlockHole is a tool for keeping a Mastodon instance blocklist synchronised with remote lists. 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 1.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | appVersion: 0.4.2 24 | -------------------------------------------------------------------------------- /chart/fediblockhole.conf.toml: -------------------------------------------------------------------------------- 1 | # List of instances to read blocklists from. 2 | # If the instance makes its blocklist public, no authorization token is needed. 3 | # Otherwise, `token` is a Bearer token authorised to read domain_blocks. 4 | # If `admin` = True, use the more detailed admin API, which requires a token with a 5 | # higher level of authorization. 6 | # If `import_fields` are provided, only import these fields from the instance. 7 | # Overrides the global `import_fields` setting. 8 | blocklist_instance_sources = [ 9 | # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks 10 | # { domain = 'jorts.horse', token = '' }, # user accessible block list 11 | # { domain = 'eigenmagic.net', token = '', admin = true }, # admin access required 12 | ] 13 | 14 | # List of URLs to read csv blocklists from 15 | # Format tells the parser which format to use when parsing the blocklist 16 | # max_severity tells the parser to override any severities that are higher than this value 17 | # import_fields tells the parser to only import that set of fields from a specific source 18 | blocklist_url_sources = [ 19 | # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, 20 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, 21 | 22 | ] 23 | 24 | ## These global allowlists override blocks from blocklists 25 | # These are the same format and structure as blocklists, but they take precedence 26 | allowlist_url_sources = [ 27 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' }, 28 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' }, 29 | ] 30 | 31 | # List of instances to write blocklist to 32 | blocklist_instance_destinations = [ 33 | # { domain = 'eigenmagic.net', token = '', max_followed_severity = 'silence'}, 34 | ] 35 | 36 | ## Store a local copy of the remote blocklists after we fetch them 37 | #save_intermediate = true 38 | 39 | ## Directory to store the local blocklist copies 40 | # savedir = '/tmp' 41 | 42 | ## File to save the fully merged blocklist into 43 | # blocklist_savefile = '/tmp/merged_blocklist.csv' 44 | 45 | ## Don't push blocklist to instances, even if they're defined above 46 | # no_push_instance = false 47 | 48 | ## Don't fetch blocklists from URLs, even if they're defined above 49 | # no_fetch_url = false 50 | 51 | ## Don't fetch blocklists from instances, even if they're defined above 52 | # no_fetch_instance = false 53 | 54 | ## Set the mergeplan to use when dealing with overlaps between blocklists 55 | # The default 'max' mergeplan will use the harshest severity block found for a domain. 56 | # The 'min' mergeplan will use the lightest severity block found for a domain. 57 | # mergeplan = 'max' 58 | 59 | ## Set which fields we import 60 | ## 'domain' and 'severity' are always imported, these are additional 61 | ## 62 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate'] 63 | 64 | ## Set which fields we export 65 | ## 'domain' and 'severity' are always exported, these are additional 66 | ## 67 | export_fields = ['public_comment'] 68 | -------------------------------------------------------------------------------- /chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "fediblockhole.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 7 | {{- end }} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "fediblockhole.fullname" -}} 15 | {{- if .Values.fullnameOverride }} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 17 | {{- else }} 18 | {{- $name := default .Chart.Name .Values.nameOverride }} 19 | {{- if contains $name .Release.Name }} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 21 | {{- else }} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 23 | {{- end }} 24 | {{- end }} 25 | {{- end }} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "fediblockhole.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 32 | {{- end }} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "fediblockhole.labels" -}} 38 | helm.sh/chart: {{ include "fediblockhole.chart" . }} 39 | {{ include "fediblockhole.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- end }} 45 | 46 | {{/* 47 | Selector labels 48 | */}} 49 | {{- define "fediblockhole.selectorLabels" -}} 50 | app.kubernetes.io/name: {{ include "fediblockhole.name" . }} 51 | app.kubernetes.io/instance: {{ .Release.Name }} 52 | {{- end }} 53 | 54 | {{/* 55 | Rolling pod annotations 56 | */}} 57 | {{- define "fediblockhole.rollingPodAnnotations" -}} 58 | rollme: {{ .Release.Revision | quote }} 59 | checksum/config-configmap: {{ include ( print $.Template.BasePath "/configmap-conf-toml.yaml" ) . | sha256sum | quote }} 60 | {{- end }} 61 | 62 | {{/* 63 | Create the default conf file path and filename 64 | */}} 65 | {{- define "fediblockhole.conf_file_path" -}} 66 | {{- default "/etc/default/" .Values.fediblockhole.conf_file.path }} 67 | {{- end }} 68 | {{- define "fediblockhole.conf_file_filename" -}} 69 | {{- default "fediblockhole.conf.toml" .Values.fediblockhole.conf_file.filename }} 70 | {{- end }} 71 | -------------------------------------------------------------------------------- /chart/templates/configmap-conf-toml.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ include "fediblockhole.fullname" . }}-conf-toml 5 | labels: 6 | {{- include "fediblockhole.labels" . | nindent 4 }} 7 | data: 8 | {{ (.Files.Glob "fediblockhole.conf.toml").AsConfig | nindent 4 }} 9 | -------------------------------------------------------------------------------- /chart/templates/cronjob-fediblock-sync.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.fediblockhole.cron.sync.enabled -}} 2 | apiVersion: batch/v1 3 | kind: CronJob 4 | metadata: 5 | name: {{ include "fediblockhole.fullname" . }}-sync 6 | labels: 7 | {{- include "fediblockhole.labels" . | nindent 4 }} 8 | spec: 9 | schedule: {{ .Values.fediblockhole.cron.sync.schedule }} 10 | failedJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.failedJobsHistoryLimit }} 11 | successfulJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.successfulJobsHistoryLimit }} 12 | jobTemplate: 13 | spec: 14 | template: 15 | metadata: 16 | name: {{ include "fediblockhole.fullname" . }}-sync 17 | {{- with .Values.jobAnnotations }} 18 | annotations: 19 | {{- toYaml . | nindent 12 }} 20 | {{- end }} 21 | spec: 22 | restartPolicy: OnFailure 23 | containers: 24 | - name: {{ include "fediblockhole.fullname" . }}-sync 25 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 26 | imagePullPolicy: {{ .Values.image.pullPolicy }} 27 | command: 28 | - fediblock-sync 29 | - -c 30 | - "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}" 31 | volumeMounts: 32 | - name: config 33 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}" 34 | subPath: "{{- include "fediblockhole.conf_file_filename" . -}}" 35 | {{ if .Values.fediblockhole.allow_file.filename }} 36 | - name: allowfile 37 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.allow_file.filename -}}" 38 | subPath: "{{- .Values.fediblockhole.allow_file.filename -}}" 39 | {{ end }} 40 | {{ if .Values.fediblockhole.block_file.filename }} 41 | - name: blockfile 42 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.block_file.filename -}}" 43 | subPath: "{{- .Values.fediblockhole.block_file.filename -}}" 44 | {{ end }} 45 | volumes: 46 | - name: config 47 | configMap: 48 | name: {{ include "fediblockhole.fullname" . }}-conf-toml 49 | items: 50 | - key: {{ include "fediblockhole.conf_file_filename" . | quote }} 51 | path: {{ include "fediblockhole.conf_file_filename" . | quote }} 52 | {{ if .Values.fediblockhole.allow_file.filename }} 53 | - name: allowfile 54 | configMap: 55 | name: {{ include "fediblockhole.fullname" . }}-allow-csv 56 | items: 57 | - key: {{ .Values.fediblockhole.allow_file.filename | quote }} 58 | path: {{ .Values.fediblockhole.allow_file.filename | quote }} 59 | {{ end }} 60 | {{ if .Values.fediblockhole.block_file.filename }} 61 | - name: blockfile 62 | configMap: 63 | name: {{ include "fediblockhole.fullname" . }}-block-csv 64 | items: 65 | - key: {{ .Values.fediblockhole.block_file.filename | quote }} 66 | path: {{ .Values.fediblockhole.block_file.filename | quote }} 67 | {{ end }} 68 | {{- end }} 69 | -------------------------------------------------------------------------------- /chart/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: ghcr.io/cunningpike/fediblockhole 3 | # https://github.com/cunningpike/fediblockhole/pkgs/container/fediblockhole/versions 4 | # 5 | # alternatively, use `latest` for the latest release or `edge` for the image 6 | # built from the most recent commit 7 | # 8 | # tag: latest 9 | tag: "" 10 | # use `Always` when using `latest` tag 11 | pullPolicy: IfNotPresent 12 | 13 | fediblockhole: 14 | # location of the configuration file. Default is /etc/default/fediblockhole.conf.toml 15 | conf_file: 16 | path: "" 17 | filename: "" 18 | # Location of a local allowlist file. It is recommended that this file should at a 19 | # minimum contain the web_domain of your own instance. 20 | allow_file: 21 | # Optionally, set the name of the file. This should match the data key in the 22 | # associated ConfigMap 23 | filename: "" 24 | # Location of a local blocklist file. 25 | block_file: 26 | # Optionally, set the name of the file. This should match the data key in the 27 | # associated ConfigMap 28 | filename: "" 29 | cron: 30 | # -- run `fediblock-sync` every hour 31 | sync: 32 | # @ignored 33 | enabled: false 34 | # @ignored 35 | schedule: "0 * * * *" 36 | failedJobsHistoryLimit: 1 37 | successfulJobsHistoryLimit: 3 38 | 39 | # if you manually change the UID/GID environment variables, ensure these values 40 | # match: 41 | podSecurityContext: 42 | runAsUser: 991 43 | runAsGroup: 991 44 | fsGroup: 991 45 | 46 | # @ignored 47 | securityContext: {} 48 | 49 | # -- Kubernetes manages pods for jobs and pods for deployments differently, so you might 50 | # need to apply different annotations to the two different sets of pods. The annotations 51 | # set with podAnnotations will be added to all deployment-managed pods. 52 | podAnnotations: {} 53 | 54 | # -- The annotations set with jobAnnotations will be added to all job pods. 55 | jobAnnotations: {} 56 | 57 | # -- Default resources for all Deployments and jobs unless overwritten 58 | resources: {} 59 | # We usually recommend not to specify default resources and to leave this as a conscious 60 | # choice for the user. This also increases chances charts run on environments with little 61 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 62 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 63 | # limits: 64 | # cpu: 100m 65 | # memory: 128Mi 66 | # requests: 67 | # cpu: 100m 68 | # memory: 128Mi 69 | 70 | # @ignored 71 | nodeSelector: {} 72 | 73 | # @ignored 74 | tolerations: [] 75 | 76 | # -- Affinity for all pods unless overwritten 77 | affinity: {} 78 | -------------------------------------------------------------------------------- /container/.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | #README.md 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | __pycache__ 7 | -------------------------------------------------------------------------------- /container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official lightweight Python image. 2 | # https://hub.docker.com/_/python 3 | FROM python:slim 4 | 5 | # Copy local code to the container image. 6 | ENV APP_HOME /app 7 | WORKDIR $APP_HOME 8 | 9 | # Install production dependencies. 10 | RUN pip install fediblockhole 11 | 12 | USER 1001 13 | # Set the command on start to fediblock-sync. 14 | ENTRYPOINT ["fediblock-sync"] 15 | -------------------------------------------------------------------------------- /etc/sample.fediblockhole.conf.toml: -------------------------------------------------------------------------------- 1 | # List of instances to read blocklists from. 2 | # If the instance makes its blocklist public, no authorization token is needed. 3 | # Otherwise, `token` is a Bearer token authorised to read domain_blocks. 4 | # If `admin` = True, use the more detailed admin API, which requires a token with a 5 | # higher level of authorization. 6 | # If `import_fields` are provided, only import these fields from the instance. 7 | # Overrides the global `import_fields` setting. 8 | blocklist_instance_sources = [ 9 | # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks 10 | # { domain = 'jorts.horse', token = '' }, # user accessible block list 11 | # { domain = 'jorts.horse', token_env_var = 'FBH_JORTS_TOKEN' }, # use environment variable for token 12 | # { domain = 'eigenmagic.net', token = '', admin = true }, # admin access required 13 | # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_READ_TOKEN', admin = true }, # same, but use environment variable for token 14 | 15 | ] 16 | 17 | # List of URLs to read csv blocklists from 18 | # Format tells the parser which format to use when parsing the blocklist 19 | # max_severity tells the parser to override any severities that are higher than this value 20 | # import_fields tells the parser to only import that set of fields from a specific source 21 | blocklist_url_sources = [ 22 | # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, 23 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, 24 | 25 | ] 26 | 27 | ## These global allowlists override blocks from blocklists 28 | # These are the same format and structure as blocklists, but they take precedence 29 | allowlist_url_sources = [ 30 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' }, 31 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' }, 32 | ] 33 | 34 | # List of instances to write blocklist to 35 | blocklist_instance_destinations = [ 36 | # { domain = 'eigenmagic.net', token = '', max_followed_severity = 'silence'}, 37 | 38 | # Alternate mechanism using environment variable for the token 39 | # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_TOKEN', max_followed_severity = 'silence'}, 40 | 41 | ] 42 | 43 | ## Store a local copy of the remote blocklists after we fetch them 44 | #save_intermediate = true 45 | 46 | ## Directory to store the local blocklist copies 47 | # savedir = '/tmp' 48 | 49 | ## File to save the fully merged blocklist into 50 | # blocklist_savefile = '/tmp/merged_blocklist.csv' 51 | 52 | ## File to save the audit log of counts across sources 53 | # blocklist_auditfile = '/tmp/domain_counts_list.csv' 54 | 55 | ## Don't push blocklist to instances, even if they're defined above 56 | # no_push_instance = false 57 | 58 | ## Don't fetch blocklists from URLs, even if they're defined above 59 | # no_fetch_url = false 60 | 61 | ## Don't fetch blocklists from instances, even if they're defined above 62 | # no_fetch_instance = false 63 | 64 | ## Set the mergeplan to use when dealing with overlaps between blocklists 65 | # The default 'max' mergeplan will use the harshest severity block found for a domain. 66 | # The 'min' mergeplan will use the lightest severity block found for a domain. 67 | # mergeplan = 'max' 68 | 69 | ## Optional threshold-based merging. 70 | # Only merge in domain blocks if the domain is mentioned in 71 | # at least `threshold` blocklists. 72 | # `merge_thresold` is an integer, with a default value of 0. 73 | # The `merge_threshold_type` can be `count` or `pct`. 74 | # If `count` type is selected, the threshold is reached when the domain 75 | # is mentioned in at least `merge_threshold` blocklists. The default value 76 | # of 0 means that every block in every list will be merged in. 77 | # If `pct` type is selected, `merge_threshold` is interpreted as a percentage, 78 | # i.e. if `merge_threshold` = 20, blocks will only be merged in if the domain 79 | # is present in at least 20% of blocklists. 80 | # Percentage calculated as number_of_mentions / total_number_of_blocklists. 81 | # The percentage method is more flexibile, but also more complicated, so take care 82 | # when using it. 83 | # 84 | # merge_threshold_type = 'count' 85 | # merge_threshold = 0 86 | 87 | ## set an override private comment to be added when pushing a NEW block to an instance 88 | # this does not require importing private comments 89 | # override_private_comment = 'Added by Fediblock Sync' 90 | 91 | ## Set which fields we import 92 | ## 'domain' and 'severity' are always imported, these are additional 93 | ## 94 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate'] 95 | 96 | ## Set which fields we export 97 | ## 'domain' and 'severity' are always exported, these are additional 98 | ## 99 | export_fields = ['public_comment'] 100 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "fediblockhole" 3 | version = "0.4.6" 4 | description = "Federated blocklist management for Mastodon" 5 | readme = "README.md" 6 | license = {file = "LICENSE"} 7 | requires-python = ">=3.8" 8 | keywords = ["mastodon", "fediblock"] 9 | authors = [ 10 | {name = "Justin Warren"}, {email = "justin@eigenmagic.com"} 11 | ] 12 | classifiers = [ 13 | "Development Status :: 4 - Beta", 14 | "Intended Audience :: Developers", 15 | "Intended Audience :: System Administrators", 16 | "License :: OSI Approved :: GNU Affero General Public License v3", 17 | "Natural Language :: English", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.8", 22 | ] 23 | dependencies = [ 24 | "requests", 25 | "toml" 26 | ] 27 | 28 | [project.urls] 29 | homepage = "https://github.com/eigenmagic/fediblockhole" 30 | documentation = "https://github.com/eigenmagic/fediblockhole" 31 | repository = "https://github.com/eigenmagic/fediblockhole" 32 | 33 | [project.scripts] 34 | fediblock-sync = "fediblockhole:main" 35 | 36 | [build-system] 37 | requires = ["hatchling"] 38 | build-backend = "hatchling.build" 39 | 40 | [tool.pytest.ini_options] 41 | addopts = [ 42 | "--import-mode=importlib", 43 | ] 44 | norecursedirs = [ 45 | "tests/helpers", 46 | ] 47 | 48 | [tool.uv] 49 | dev-dependencies = [ 50 | "coverage[toml]>=7.6.1", 51 | "pytest ~= 8.3", 52 | ] 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | toml 3 | pytest -------------------------------------------------------------------------------- /samples/demo-allowlist-01.csv: -------------------------------------------------------------------------------- 1 | "domain","severity","private_comment","public_comment","reject_media","reject_reports","obfuscate" 2 | "eigenmagic.net","noop","Never block me","Only the domain field matters for allowlists",False,False,False 3 | "example.org","noop","Never block me either","The severity is ignored in allowlists as are all other fields",False,False,False 4 | "demo01.example.org","noop","Never block me either","But you can use them to leave yourself or others notes on why the item is here",False,False,False 5 | -------------------------------------------------------------------------------- /samples/demo-allowlist-02.csv: -------------------------------------------------------------------------------- 1 | "domain","private_comment" 2 | "example.org","The private comment won't get loaded, but can be handy to leave yourself a note." 3 | -------------------------------------------------------------------------------- /samples/demo-blocklist-01.csv: -------------------------------------------------------------------------------- 1 | "domain","severity","reject_media","reject_reports","private_comment","public_comment","obfuscate" 2 | "qoto.org","suspend",True,True,,,True 3 | "sealion.club","suspend",True,True,,,True 4 | "develop.gab.com","suspend",True,True,,,True 5 | "gab.ai","suspend",True,True,,,True 6 | "gab.sleeck.eu","suspend",True,True,,,True 7 | "gab.com","suspend",True,True,,,True 8 | "kiwifarms.is","suspend",True,True,,,True 9 | "kiwifarms.net","suspend",True,True,,,True 10 | "gabfed.com","suspend",True,True,,,True -------------------------------------------------------------------------------- /src/fediblockhole/__init__.py: -------------------------------------------------------------------------------- 1 | """A tool for managing federated Mastodon blocklists 2 | """ 3 | 4 | from __future__ import annotations 5 | 6 | import argparse 7 | import csv 8 | import json 9 | import os.path 10 | import sys 11 | import time 12 | import urllib.request as urlr 13 | from importlib.metadata import version 14 | 15 | import requests 16 | import toml 17 | 18 | from .blocklists import BlockAuditList, Blocklist, parse_blocklist 19 | from .const import BlockAudit, BlockSeverity, DomainBlock 20 | 21 | __version__ = version("fediblockhole") 22 | 23 | import logging 24 | 25 | logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") 26 | log = logging.getLogger("fediblockhole") 27 | 28 | # Max size of a URL-fetched blocklist 29 | URL_BLOCKLIST_MAXSIZE = 1024**3 30 | 31 | # Wait at most this long for a remote server to respond 32 | REQUEST_TIMEOUT = 30 33 | 34 | # Time to wait between instance API calls to we don't melt them 35 | # The default Mastodon rate limit is 300 calls per 5 minutes 36 | API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes 37 | 38 | # We always import the domain and the severity 39 | IMPORT_FIELDS = ["domain", "severity"] 40 | 41 | # Allowlists always import these fields 42 | ALLOWLIST_IMPORT_FIELDS = [ 43 | "domain", 44 | "severity", 45 | "public_comment", 46 | "private_comment", 47 | "reject_media", 48 | "reject_reports", 49 | "obfuscate", 50 | ] 51 | 52 | # We always export the domain and the severity 53 | EXPORT_FIELDS = ["domain", "severity"] 54 | 55 | 56 | def sync_blocklists(conf: argparse.Namespace): 57 | """Sync instance blocklists from remote sources. 58 | 59 | @param conf: A configuration dictionary 60 | """ 61 | # Build a dict of blocklists we retrieve from remote sources. 62 | # We will merge these later using a merge algorithm we choose. 63 | 64 | # Always import these fields 65 | import_fields = IMPORT_FIELDS 66 | # Add extra import fields if defined in config 67 | import_fields.extend(conf.import_fields) 68 | 69 | # Always export these fields 70 | export_fields = EXPORT_FIELDS 71 | # Add extra export fields if defined in config 72 | export_fields.extend(conf.export_fields) 73 | 74 | blocklists = [] 75 | # Fetch blocklists from URLs 76 | if not conf.no_fetch_url: 77 | blocklists.extend( 78 | fetch_from_urls( 79 | conf.blocklist_url_sources, 80 | import_fields, 81 | conf.save_intermediate, 82 | conf.savedir, 83 | export_fields, 84 | ) 85 | ) 86 | 87 | # Fetch blocklists from remote instances 88 | if not conf.no_fetch_instance: 89 | blocklists.extend( 90 | fetch_from_instances( 91 | conf.blocklist_instance_sources, 92 | import_fields, 93 | conf.save_intermediate, 94 | conf.savedir, 95 | export_fields, 96 | ) 97 | ) 98 | 99 | # Merge blocklists into an update dict 100 | merged = merge_blocklists( 101 | blocklists, 102 | conf.mergeplan, 103 | conf.merge_threshold, 104 | conf.merge_threshold_type, 105 | conf.blocklist_auditfile, 106 | ) 107 | 108 | # Remove items listed in allowlists, if any 109 | allowlists = fetch_allowlists(conf) 110 | merged = apply_allowlists(merged, conf, allowlists) 111 | 112 | # Save the final mergelist, if requested 113 | if conf.blocklist_savefile: 114 | log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") 115 | save_blocklist_to_file(merged, conf.blocklist_savefile, export_fields) 116 | 117 | # Push the blocklist to destination instances 118 | if not conf.no_push_instance: 119 | log.info("Pushing domain blocks to instances...") 120 | for dest in conf.blocklist_instance_destinations: 121 | target = dest["domain"] 122 | token = dest["token"] 123 | scheme = dest.get("scheme", "https") 124 | max_followed_severity = BlockSeverity( 125 | dest.get("max_followed_severity", "silence") 126 | ) 127 | push_blocklist( 128 | token, 129 | target, 130 | merged, 131 | conf.dryrun, 132 | import_fields, 133 | max_followed_severity, 134 | scheme, 135 | conf.override_private_comment, 136 | ) 137 | 138 | 139 | def apply_allowlists(merged: Blocklist, conf: argparse.Namespace, allowlists: dict): 140 | """Apply allowlists""" 141 | # Apply allows specified on the commandline 142 | for domain in conf.allow_domains: 143 | log.info(f"'{domain}' allowed by commandline, removing any blocks...") 144 | if domain in merged.blocks: 145 | del merged.blocks[domain] 146 | 147 | # Apply allows from URLs lists 148 | log.info("Removing domains from URL allowlists...") 149 | for alist in allowlists: 150 | log.debug(f"Processing allows from '{alist.origin}'...") 151 | for allowed in alist.blocks.values(): 152 | domain = allowed.domain 153 | log.debug(f"Removing allowlisted domain '{domain}' from merged list.") 154 | if domain in merged.blocks: 155 | del merged.blocks[domain] 156 | 157 | return merged 158 | 159 | 160 | def fetch_allowlists(conf: argparse.Namespace) -> Blocklist: 161 | """ """ 162 | if conf.allowlist_url_sources: 163 | allowlists = fetch_from_urls( 164 | conf.allowlist_url_sources, 165 | ALLOWLIST_IMPORT_FIELDS, 166 | conf.save_intermediate, 167 | conf.savedir, 168 | ) 169 | return allowlists 170 | return Blocklist() 171 | 172 | 173 | def fetch_from_urls( 174 | url_sources: dict, 175 | import_fields: list = IMPORT_FIELDS, 176 | save_intermediate: bool = False, 177 | savedir: str = None, 178 | export_fields: list = EXPORT_FIELDS, 179 | ) -> dict: 180 | """Fetch blocklists from URL sources 181 | @param blocklists: A dict of existing blocklists, keyed by source 182 | @param url_sources: A dict of configuration info for url sources 183 | @returns: A dict of blocklists, same as input, but (possibly) modified 184 | """ 185 | log.info("Fetching domain blocks from URLs...") 186 | blocklists = [] 187 | for item in url_sources: 188 | url = item["url"] 189 | # If import fields are provided, they override the global ones passed in 190 | source_import_fields = item.get("import_fields", None) 191 | if source_import_fields: 192 | # Ensure we always use the default fields 193 | import_fields = IMPORT_FIELDS.extend(source_import_fields) 194 | 195 | max_severity = item.get("max_severity", "suspend") 196 | listformat = item.get("format", "csv") 197 | with urlr.urlopen(url) as fp: 198 | rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode("utf-8") 199 | bl = parse_blocklist(rawdata, url, listformat, import_fields, max_severity) 200 | blocklists.append(bl) 201 | if save_intermediate: 202 | save_intermediate_blocklist(bl, savedir, export_fields) 203 | 204 | return blocklists 205 | 206 | 207 | def fetch_from_instances( 208 | sources: dict, 209 | import_fields: list = IMPORT_FIELDS, 210 | save_intermediate: bool = False, 211 | savedir: str = None, 212 | export_fields: list = EXPORT_FIELDS, 213 | ) -> dict: 214 | """Fetch blocklists from other instances 215 | @param blocklists: A dict of existing blocklists, keyed by source 216 | @param url_sources: A dict of configuration info for url sources 217 | @returns: A dict of blocklists, same as input, but (possibly) modified 218 | """ 219 | log.info("Fetching domain blocks from instances...") 220 | blocklists = [] 221 | for item in sources: 222 | domain = item["domain"] 223 | admin = item.get("admin", False) 224 | token = item.get("token", None) 225 | scheme = item.get("scheme", "https") 226 | # itemsrc = f"{scheme}://{domain}/api" 227 | 228 | # If import fields are provided, they override the global ones passed in 229 | source_import_fields = item.get("import_fields", None) 230 | if source_import_fields: 231 | # Ensure we always use the default fields 232 | import_fields = IMPORT_FIELDS.extend(source_import_fields) 233 | 234 | bl = fetch_instance_blocklist(domain, token, admin, import_fields, scheme) 235 | blocklists.append(bl) 236 | if save_intermediate: 237 | save_intermediate_blocklist(bl, savedir, export_fields) 238 | return blocklists 239 | 240 | 241 | def merge_blocklists( 242 | blocklists: list[Blocklist], 243 | mergeplan: str = "max", 244 | threshold: int = 0, 245 | threshold_type: str = "count", 246 | save_block_audit_file: str = None, 247 | ) -> Blocklist: 248 | """Merge fetched remote blocklists into a bulk update 249 | @param blocklists: A dict of lists of DomainBlocks, keyed by source. 250 | Each value is a list of DomainBlocks 251 | @param mergeplan: An optional method of merging overlapping block definitions 252 | 'max' (the default) uses the highest severity block found 253 | 'min' uses the lowest severity block found 254 | @param threshold: An integer used in the threshold mechanism. 255 | If a domain is not present in this number/pct or more of the blocklists, 256 | it will not get merged into the final list. 257 | @param threshold_type: choice of ['count', 'pct'] 258 | If `count`, threshold is met if block is present in `threshold` 259 | or more blocklists. 260 | If `pct`, theshold is met if block is present in 261 | count_of_mentions / number_of_blocklists. 262 | @param returns: A dict of DomainBlocks keyed by domain 263 | """ 264 | merged = Blocklist("fediblockhole.merge_blocklists") 265 | audit = BlockAuditList("fediblockhole.merge_blocklists") 266 | 267 | num_blocklists = len(blocklists) 268 | 269 | # Create a domain keyed list of blocks for each domain 270 | domain_blocks = {} 271 | 272 | for bl in blocklists: 273 | for block in bl.values(): 274 | if "*" in block.domain: 275 | log.debug(f"Domain '{block.domain}' is obfuscated. Skipping it.") 276 | continue 277 | elif block.domain in domain_blocks: 278 | domain_blocks[block.domain].append(block) 279 | else: 280 | domain_blocks[block.domain] = [ 281 | block, 282 | ] 283 | 284 | # Only merge items if `threshold` is met or exceeded 285 | for domain in domain_blocks: 286 | domain_matches_count = len(domain_blocks[domain]) 287 | domain_matches_percent = domain_matches_count / num_blocklists * 100 288 | if threshold_type == "count": 289 | domain_threshold_level = domain_matches_count 290 | elif threshold_type == "pct": 291 | domain_threshold_level = domain_matches_percent 292 | # log.debug(f"domain threshold level: {domain_threshold_level}") 293 | else: 294 | raise ValueError( 295 | f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'" # noqa 296 | ) 297 | 298 | log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}") 299 | if domain_threshold_level >= threshold: 300 | # Add first block in the list to merged 301 | block = domain_blocks[domain][0] 302 | log.debug(f"Yes. Merging block: {block}") 303 | 304 | # Merge the others with this record 305 | for newblock in domain_blocks[domain][1:]: 306 | block = apply_mergeplan(block, newblock, mergeplan) 307 | merged.blocks[block.domain] = block 308 | 309 | if save_block_audit_file: 310 | blockdata: BlockAudit = { 311 | "domain": domain, 312 | "count": domain_matches_count, 313 | "percent": domain_matches_percent, 314 | } 315 | audit.blocks[domain] = blockdata 316 | 317 | if save_block_audit_file: 318 | log.info(f"Saving audit file to {save_block_audit_file}") 319 | save_domain_block_audit_to_file(audit, save_block_audit_file) 320 | 321 | return merged 322 | 323 | 324 | def apply_mergeplan( 325 | oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str = "max" 326 | ) -> dict: 327 | """Use a mergeplan to decide how to merge two overlapping block definitions 328 | 329 | @param oldblock: The existing block definition. 330 | @param newblock: The new block definition we want to merge in. 331 | @param mergeplan: How to merge. Choices are 'max', the default, and 'min'. 332 | """ 333 | # Default to the existing block definition 334 | blockdata = oldblock._asdict() 335 | 336 | # Merge comments 337 | keylist = ["public_comment", "private_comment"] 338 | for key in keylist: 339 | try: 340 | oldcomment = getattr(oldblock, key) 341 | newcomment = getattr(newblock, key) 342 | blockdata[key] = merge_comments(oldcomment, newcomment) 343 | except KeyError: 344 | log.debug( 345 | f"Key '{key}' missing from block definition so cannot compare. Continuing..." # noqa 346 | ) 347 | continue 348 | 349 | # How do we override an earlier block definition? 350 | if mergeplan in ["max", None]: 351 | # Use the highest block level found (the default) 352 | # log.debug(f"Using 'max' mergeplan.") 353 | 354 | if newblock.severity > oldblock.severity: 355 | # log.debug(f"New block severity is higher. Using that.") 356 | blockdata["severity"] = newblock.severity 357 | 358 | # For 'reject_media', 'reject_reports', and 'obfuscate' if 359 | # the value is set and is True for the domain in 360 | # any blocklist then the value is set to True. 361 | for key in ["reject_media", "reject_reports", "obfuscate"]: 362 | newval = getattr(newblock, key) 363 | if newval is True: 364 | blockdata[key] = True 365 | 366 | elif mergeplan in ["min"]: 367 | # Use the lowest block level found 368 | log.debug("Using 'min' mergeplan.") 369 | 370 | if newblock.severity < oldblock.severity: 371 | blockdata["severity"] = newblock.severity 372 | 373 | # For 'reject_media', 'reject_reports', and 'obfuscate' if 374 | # the value is set and is False for the domain in 375 | # any blocklist then the value is set to False. 376 | for key in ["reject_media", "reject_reports", "obfuscate"]: 377 | newval = getattr(newblock, key) 378 | if newval is False: 379 | blockdata[key] = False 380 | 381 | else: 382 | raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") 383 | 384 | # log.debug(f"Block severity set to {blockdata['severity']}") 385 | 386 | return DomainBlock(**blockdata) 387 | 388 | 389 | def merge_comments(oldcomment: str, newcomment: str) -> str: 390 | """Merge two comments 391 | 392 | @param oldcomment: The original comment we're merging into 393 | @param newcomment: The new commment we want to merge in 394 | @returns: a new str of the merged comment 395 | """ 396 | # Don't merge if both comments are None or '' 397 | if oldcomment in ["", None] and newcomment in ["", None]: 398 | return "" 399 | 400 | # If both comments are the same, or new comment is empty, don't merge 401 | if oldcomment == newcomment or newcomment in ["", None]: 402 | return oldcomment 403 | 404 | # If old comment is empty, just return the new one 405 | if oldcomment in ["", None]: 406 | return newcomment 407 | 408 | # We want to skip duplicate fragments so we don't end up 409 | # re-concatenating the same strings every time there's an 410 | # update, causing the comment to grow without bound. 411 | # We tokenize the comments, splitting them on ', ', and comparing 412 | # the tokens, skipping duplicates. 413 | # This means "boring, lack of moderation, nazis, scrapers" merging 414 | # with "lack of moderation, scrapers" should result in 415 | # "boring, lack of moderation, nazis, scrapers" 416 | old_tokens = oldcomment.split(", ") 417 | new_tokens = newcomment.split(", ") 418 | 419 | # Remove any empty string tokens that we get 420 | while "" in old_tokens: 421 | old_tokens.remove("") 422 | while "" in new_tokens: 423 | new_tokens.remove("") 424 | 425 | # Remove duplicate tokens 426 | for token in old_tokens: 427 | if token in new_tokens: 428 | new_tokens.remove(token) 429 | 430 | # Combine whatever tokens are left into one set 431 | tokenset = old_tokens 432 | tokenset.extend(new_tokens) 433 | 434 | # Return the merged string 435 | return ", ".join(tokenset) 436 | 437 | 438 | def requests_headers(token: str = None): 439 | """Set common headers for requests""" 440 | headers = {"User-Agent": f"FediBlockHole/{__version__}"} 441 | if token: 442 | headers["Authorization"] = f"Bearer {token}" 443 | 444 | return headers 445 | 446 | 447 | def fetch_instance_blocklist( 448 | host: str, 449 | token: str = None, 450 | admin: bool = False, 451 | import_fields: list = ["domain", "severity"], 452 | scheme: str = "https", 453 | ) -> list[DomainBlock]: 454 | """Fetch existing block list from server 455 | 456 | @param host: The remote host to connect to. 457 | @param token: The (optional) OAuth Bearer token to authenticate with. 458 | @param admin: Boolean flag to use the admin API if True. 459 | @param import_fields: A list of fields to import from the remote instance. 460 | @returns: A list of the domain blocks from the instance. 461 | """ 462 | log.info(f"Fetching instance blocklist from {host} ...") 463 | 464 | if admin: 465 | api_path = "/api/v1/admin/domain_blocks" 466 | parse_format = "json" 467 | else: 468 | api_path = "/api/v1/instance/domain_blocks" 469 | parse_format = "mastodon_api_public" 470 | 471 | headers = requests_headers(token) 472 | 473 | url = f"{scheme}://{host}{api_path}" 474 | 475 | blockdata = [] 476 | link = True 477 | while link: 478 | response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT) 479 | if response.status_code != 200: 480 | log.error(f"Cannot fetch remote blocklist: {response.content}") 481 | raise ValueError("Unable to fetch domain block list: %s", response) 482 | 483 | # Each block of returned data is a JSON list of dicts 484 | # so we parse them and append them to the fetched list 485 | # of JSON data we need to parse. 486 | 487 | blockdata.extend(json.loads(response.content.decode("utf-8"))) 488 | # Parse the link header to find the next url to fetch 489 | # This is a weird and janky way of doing pagination but 490 | # hey nothing we can do about it we just have to deal 491 | link = response.headers.get("Link", None) 492 | if link is None: 493 | break 494 | pagination = link.split(", ") 495 | if len(pagination) != 2: 496 | link = None 497 | break 498 | else: 499 | next = pagination[0] 500 | # prev = pagination[1] 501 | 502 | urlstring, rel = next.split("; ") 503 | url = urlstring.strip("<").rstrip(">") 504 | 505 | blocklist = parse_blocklist(blockdata, url, parse_format, import_fields) 506 | 507 | return blocklist 508 | 509 | 510 | def delete_block(token: str, host: str, id: int, scheme: str = "https"): 511 | """Remove a domain block""" 512 | log.debug(f"Removing domain block {id} at {host}...") 513 | api_path = "/api/v1/admin/domain_blocks/" 514 | 515 | url = f"{scheme}://{host}{api_path}{id}" 516 | 517 | response = requests.delete( 518 | url, headers=requests_headers(token), timeout=REQUEST_TIMEOUT 519 | ) 520 | if response.status_code != 200: 521 | if response.status_code == 404: 522 | log.warning(f"No such domain block: {id}") 523 | return 524 | 525 | raise ValueError( 526 | f"Something went wrong: {response.status_code}: {response.content}" 527 | ) 528 | 529 | 530 | def fetch_instance_follows( 531 | token: str, host: str, domain: str, scheme: str = "https" 532 | ) -> int: 533 | """Fetch the followers of the target domain at the instance 534 | 535 | @param token: the Bearer authentication token for OAuth access 536 | @param host: the instance API hostname/IP address 537 | @param domain: the domain to search for followers of 538 | @returns: int, number of local followers of remote instance accounts 539 | """ 540 | api_path = "/api/v1/admin/measures" 541 | url = f"{scheme}://{host}{api_path}" 542 | 543 | key = "instance_follows" 544 | 545 | # This data structure only allows us to request a single domain 546 | # at a time, which limits the load on the remote instance of each call 547 | data = { 548 | "keys": [key], 549 | key: {"domain": domain}, 550 | } 551 | 552 | # The Mastodon API only accepts JSON formatted POST data for measures 553 | response = requests.post( 554 | url, headers=requests_headers(token), json=data, timeout=REQUEST_TIMEOUT 555 | ) 556 | if response.status_code != 200: 557 | if response.status_code == 403: 558 | log.error( 559 | f"Cannot fetch follow information for {domain} from {host}: {response.content}" # noqa 560 | ) 561 | 562 | raise ValueError( 563 | f"Something went wrong: {response.status_code}: {response.content}" 564 | ) 565 | 566 | # Get the total returned 567 | follows = int(response.json()[0]["total"]) 568 | return follows 569 | 570 | 571 | def check_followed_severity( 572 | host: str, 573 | token: str, 574 | domain: str, 575 | severity: BlockSeverity, 576 | max_followed_severity: BlockSeverity = BlockSeverity("silence"), 577 | scheme: str = "https", 578 | ): 579 | """Check an instance to see if it has followers of a to-be-blocked instance""" 580 | 581 | log.debug("Checking followed severity...") 582 | # Return straight away if we're not increasing the severity 583 | if severity <= max_followed_severity: 584 | return severity 585 | 586 | # If the instance has accounts that follow people on the to-be-blocked domain, 587 | # limit the maximum severity to the configured `max_followed_severity`. 588 | log.debug("checking for instance follows...") 589 | follows = fetch_instance_follows(token, host, domain, scheme) 590 | time.sleep(API_CALL_DELAY) 591 | if follows > 0: 592 | log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.") 593 | if severity > max_followed_severity: 594 | log.warning( 595 | f"Instance {host} has {follows} followers of accounts at {domain}. " 596 | f"Limiting block severity to {max_followed_severity}." 597 | ) 598 | return max_followed_severity 599 | return severity 600 | 601 | 602 | def is_change_needed(oldblock: dict, newblock: dict, import_fields: list): 603 | change_needed = oldblock.compare_fields(newblock, import_fields) 604 | return change_needed 605 | 606 | 607 | def update_known_block( 608 | token: str, host: str, block: DomainBlock, scheme: str = "https" 609 | ): 610 | """Update an existing domain block with information in blockdict""" 611 | api_path = "/api/v1/admin/domain_blocks/" 612 | 613 | id = block.id 614 | blockdata = block._asdict() 615 | del blockdata["id"] 616 | 617 | url = f"{scheme}://{host}{api_path}{id}" 618 | 619 | response = requests.put( 620 | url, headers=requests_headers(token), json=blockdata, timeout=REQUEST_TIMEOUT 621 | ) 622 | if response.status_code != 200: 623 | raise ValueError( 624 | f"Something went wrong: {response.status_code}: {response.content}" 625 | ) 626 | 627 | 628 | def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str = "https"): 629 | """Block a domain on Mastodon host""" 630 | log.debug(f"Adding block entry for {blockdata.domain} at {host}...") 631 | api_path = "/api/v1/admin/domain_blocks" 632 | 633 | url = f"{scheme}://{host}{api_path}" 634 | 635 | response = requests.post( 636 | url, 637 | headers=requests_headers(token), 638 | json=blockdata._asdict(), 639 | timeout=REQUEST_TIMEOUT, 640 | ) 641 | if response.status_code == 422: 642 | # A stricter block already exists. Probably for the base domain. 643 | err = json.loads(response.content) 644 | log.warning(err["error"]) 645 | 646 | elif response.status_code != 200: 647 | 648 | raise ValueError( 649 | f"Something went wrong: {response.status_code}: {response.content}" 650 | ) 651 | 652 | 653 | def push_blocklist( 654 | token: str, 655 | host: str, 656 | blocklist: list[DomainBlock], 657 | dryrun: bool = False, 658 | import_fields: list = ["domain", "severity"], 659 | max_followed_severity: BlockSeverity = BlockSeverity("silence"), 660 | scheme: str = "https", 661 | override_private_comment: str = None, 662 | ): 663 | """Push a blocklist to a remote instance. 664 | 665 | Updates existing entries if they exist, creates new blocks if they don't. 666 | 667 | @param token: The Bearer token for OAUTH API authentication 668 | @param host: The instance host, FQDN or IP 669 | @param blocklist: A list of block definitions. They must include the domain. 670 | @param import_fields: A list of fields to import to the instances. 671 | """ 672 | log.info(f"Pushing blocklist to host {host} ...") 673 | # Fetch the existing blocklist from the instance 674 | # Force use of the admin API, and add 'id' to the list of fields 675 | if "id" not in import_fields: 676 | import_fields.append("id") 677 | serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme) 678 | 679 | # # Convert serverblocks to a dictionary keyed by domain name 680 | # knownblocks = {row.domain: row for row in serverblocks} 681 | 682 | for newblock in blocklist.values(): 683 | 684 | log.debug(f"Processing block: {newblock}") 685 | if newblock.domain in serverblocks: 686 | log.debug( 687 | f"Block already exists for {newblock.domain}, " 688 | f"checking for differences..." 689 | ) 690 | 691 | oldblock = serverblocks[newblock.domain] 692 | 693 | change_needed = is_change_needed(oldblock, newblock, import_fields) 694 | 695 | # Is the severity changing? 696 | if "severity" in change_needed: 697 | log.debug("Severity change requested, checking...") 698 | if newblock.severity > oldblock.severity: 699 | # Confirm if we really want to change the severity 700 | # If we still have followers of the remote domain, 701 | # we may not want to go all the way to full suspend, 702 | # depending on the configuration 703 | newseverity = check_followed_severity( 704 | host, 705 | token, 706 | oldblock.domain, 707 | newblock.severity, 708 | max_followed_severity, 709 | scheme, 710 | ) 711 | if newseverity != oldblock.severity: 712 | newblock.severity = newseverity 713 | else: 714 | log.info( 715 | "Keeping severity of block the same to avoid disrupting followers." # noqa 716 | ) 717 | change_needed.remove("severity") 718 | 719 | if change_needed: 720 | log.info( 721 | f"Change detected. Need to update {change_needed} " 722 | f"for domain block for {oldblock.domain}" 723 | ) 724 | log.info(f"Old block definition: {oldblock}") 725 | log.info(f"Pushing new block definition: {newblock}") 726 | blockdata = oldblock.copy() 727 | blockdata.update(newblock) 728 | log.debug(f"Block as dict: {blockdata._asdict()}") 729 | 730 | if not dryrun: 731 | update_known_block(token, host, blockdata, scheme) 732 | # add a pause here so we don't melt the instance 733 | time.sleep(API_CALL_DELAY) 734 | else: 735 | log.info("Dry run selected. Not applying changes.") 736 | 737 | else: 738 | log.debug("No differences detected. Not updating.") 739 | pass 740 | 741 | else: 742 | # stamp this record with a private comment, since we're the ones adding it 743 | if override_private_comment: 744 | newblock.private_comment = override_private_comment 745 | 746 | # This is a new block for the target instance, so we 747 | # need to add a block rather than update an existing one 748 | log.info(f"Adding new block: {newblock}...") 749 | log.debug(f"Block as dict: {newblock._asdict()}") 750 | 751 | # Make sure the new block doesn't clobber a domain with followers 752 | newblock.severity = check_followed_severity( 753 | host, 754 | token, 755 | newblock.domain, 756 | newblock.severity, 757 | max_followed_severity, 758 | scheme, 759 | ) 760 | if not dryrun: 761 | add_block(token, host, newblock, scheme) 762 | # add a pause here so we don't melt the instance 763 | time.sleep(API_CALL_DELAY) 764 | else: 765 | log.info("Dry run selected. Not adding block.") 766 | 767 | 768 | def load_config(configfile: str): 769 | """Augment commandline arguments with config file parameters 770 | 771 | Config file is expected to be in TOML format 772 | """ 773 | conf = toml.load(configfile) 774 | return conf 775 | 776 | 777 | def save_intermediate_blocklist( 778 | blocklist: Blocklist, filedir: str, export_fields: list = ["domain", "severity"] 779 | ): 780 | """Save a local copy of a blocklist we've downloaded""" 781 | # Invent a filename based on the remote source 782 | # If the source was a URL, convert it to something less messy 783 | # If the source was a remote domain, just use the name of the domain 784 | source = blocklist.origin 785 | log.debug(f"Saving intermediate blocklist from {source}") 786 | source = source.replace("/", "-") 787 | filename = f"{source}.csv" 788 | filepath = os.path.join(filedir, filename) 789 | save_blocklist_to_file(blocklist, filepath, export_fields) 790 | 791 | 792 | def save_blocklist_to_file( 793 | blocklist: Blocklist, filepath: str, export_fields: list = ["domain", "severity"] 794 | ): 795 | """Save a blocklist we've downloaded from a remote source 796 | 797 | @param blocklist: A dictionary of block definitions, keyed by domain 798 | @param filepath: The path to the file the list should be saved in. 799 | @param export_fields: Which fields to include in the export. 800 | """ 801 | try: 802 | sorted_list = sorted(blocklist.blocks.items()) 803 | except KeyError: 804 | log.error("Field 'domain' not found in blocklist.") 805 | log.debug(f"blocklist is: {sorted_list}") 806 | except AttributeError: 807 | log.error("Attribute error!") 808 | import pdb 809 | 810 | pdb.set_trace() 811 | 812 | log.debug(f"export fields: {export_fields}") 813 | 814 | with open(filepath, "w") as fp: 815 | writer = csv.DictWriter(fp, export_fields, extrasaction="ignore") 816 | writer.writeheader() 817 | for key, value in sorted_list: 818 | writer.writerow(value) 819 | 820 | 821 | def resolve_replacements(endpoints: list[dict]) -> list[dict]: 822 | """Resolve any replacement tokens in the list of endpoints""" 823 | 824 | resolved = [] 825 | for item in endpoints: 826 | item = dict(**item) 827 | if "token" in item and "token_env_var" in item: 828 | log.warning( 829 | f"Both `token` and `token_env_var` have been provided; using" 830 | f" the explicit token for {item.get('domain', 'the entry')}" 831 | ) 832 | 833 | # We take the token that's explicitly stated, 834 | # even if there's also an environment variable set. 835 | # Delete the token_env_var key 836 | del item["token_env_var"] 837 | 838 | elif "token" in item: 839 | pass 840 | 841 | elif "token_env_var" in item: 842 | value = os.getenv(item["token_env_var"]) 843 | if value is None: 844 | raise ValueError( 845 | f"Environment variable" f" '{item['token_env_var']}' not set." 846 | ) 847 | 848 | item["token"] = value 849 | 850 | else: 851 | # lastly, try look for a default token. 852 | domain = item.get("domain") 853 | if domain is not None: 854 | domain_env_var_prefix = domain.upper().replace(".", "_") 855 | domain_env_var = f"{domain_env_var_prefix}_TOKEN" 856 | value = os.getenv(domain_env_var) 857 | if value is not None: 858 | item["token"] = value 859 | 860 | resolved.append(item) 861 | return resolved 862 | 863 | 864 | def save_domain_block_audit_to_file(blocklist: BlockAuditList, filepath: str): 865 | """Save an audit log of domains blocked 866 | 867 | @param blocklist: A dictionary of block definitions, keyed by domain 868 | @param filepath: The path to the file the list should be saved in. 869 | """ 870 | export_fields = ["domain", "count", "percent"] 871 | 872 | try: 873 | sorted_list = sorted(blocklist.blocks.items()) 874 | except KeyError: 875 | log.error("Field 'domain' not found in blocklist.") 876 | log.debug(f"blocklist is: {sorted_list}") 877 | except AttributeError: 878 | log.error("Attribute error!") 879 | import pdb 880 | 881 | pdb.set_trace() 882 | 883 | log.debug("exporting audit file") 884 | 885 | with open(filepath, "w") as fp: 886 | writer = csv.DictWriter(fp, export_fields, extrasaction="ignore") 887 | writer.writeheader() 888 | for key, value in sorted_list: 889 | writer.writerow(value) 890 | 891 | 892 | def augment_args(args, tomldata: str = None): 893 | """Augment commandline arguments with config file parameters 894 | 895 | If tomldata is provided, uses that data instead of loading 896 | from a config file. 897 | """ 898 | if tomldata: 899 | conf = toml.loads(tomldata) 900 | else: 901 | conf = toml.load(args.config) 902 | 903 | if not args.no_fetch_url: 904 | args.no_fetch_url = conf.get("no_fetch_url", False) 905 | 906 | if not args.no_fetch_instance: 907 | args.no_fetch_instance = conf.get("no_fetch_instance", False) 908 | 909 | if not args.no_push_instance: 910 | args.no_push_instance = conf.get("no_push_instance", False) 911 | 912 | if not args.blocklist_savefile: 913 | args.blocklist_savefile = conf.get("blocklist_savefile", None) 914 | 915 | if not args.save_intermediate: 916 | args.save_intermediate = conf.get("save_intermediate", False) 917 | 918 | if not args.override_private_comment: 919 | args.override_private_comment = conf.get("override_private_comment", None) 920 | 921 | if not args.savedir: 922 | args.savedir = conf.get("savedir", "/tmp") 923 | 924 | if not args.blocklist_auditfile: 925 | args.blocklist_auditfile = conf.get("blocklist_auditfile", None) 926 | 927 | if not args.export_fields: 928 | args.export_fields = conf.get("export_fields", []) 929 | 930 | if not args.import_fields: 931 | args.import_fields = conf.get("import_fields", []) 932 | 933 | if not args.mergeplan: 934 | args.mergeplan = conf.get("mergeplan", "max") 935 | 936 | if not args.merge_threshold: 937 | args.merge_threshold = conf.get("merge_threshold", 0) 938 | 939 | if not args.merge_threshold_type: 940 | args.merge_threshold_type = conf.get("merge_threshold_type", "count") 941 | 942 | args.blocklist_url_sources = conf.get("blocklist_url_sources", []) 943 | args.blocklist_instance_sources = resolve_replacements( 944 | conf.get("blocklist_instance_sources", []) 945 | ) 946 | args.allowlist_url_sources = conf.get("allowlist_url_sources", []) 947 | args.blocklist_instance_destinations = resolve_replacements( 948 | conf.get("blocklist_instance_destinations", []) 949 | ) 950 | 951 | return args 952 | 953 | 954 | def setup_argparse(): 955 | """Setup the commandline arguments""" 956 | ap = argparse.ArgumentParser( 957 | description="Bulk blocklist tool", 958 | epilog=f"Part of FediBlockHole v{__version__}", 959 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 960 | ) 961 | ap.add_argument( 962 | "-c", 963 | "--config", 964 | default="/etc/default/fediblockhole.conf.toml", 965 | help="Config file", 966 | ) 967 | ap.add_argument( 968 | "-V", "--version", action="store_true", help="Show version and exit." 969 | ) 970 | 971 | ap.add_argument( 972 | "-o", 973 | "--outfile", 974 | dest="blocklist_savefile", 975 | help="Save merged blocklist to a local file.", 976 | ) 977 | ap.add_argument( 978 | "-S", 979 | "--save-intermediate", 980 | dest="save_intermediate", 981 | action="store_true", 982 | help="Save intermediate blocklists we fetch to local files.", 983 | ) 984 | ap.add_argument( 985 | "-D", 986 | "--savedir", 987 | dest="savedir", 988 | help="Directory path to save intermediate lists.", 989 | ) 990 | ap.add_argument("-m", "--mergeplan", choices=["min", "max"], help="Set mergeplan.") 991 | ap.add_argument( 992 | "-b", 993 | "--block-audit-file", 994 | dest="blocklist_auditfile", 995 | help="Save blocklist auditfile to this location.", 996 | ) 997 | ap.add_argument("--merge-threshold", type=int, help="Merge threshold value") 998 | ap.add_argument( 999 | "--merge-threshold-type", 1000 | choices=["count", "pct"], 1001 | help="Type of merge threshold to use.", 1002 | ) 1003 | ap.add_argument( 1004 | "--override-private-comment", 1005 | dest="override_private_comment", 1006 | help="Override private_comment with this string for new blocks when pushing blocklists.", # noqa 1007 | ) 1008 | 1009 | ap.add_argument( 1010 | "-I", 1011 | "--import-field", 1012 | dest="import_fields", 1013 | action="append", 1014 | help="Extra blocklist fields to import.", 1015 | ) 1016 | ap.add_argument( 1017 | "-E", 1018 | "--export-field", 1019 | dest="export_fields", 1020 | action="append", 1021 | help="Extra blocklist fields to export.", 1022 | ) 1023 | ap.add_argument( 1024 | "-A", 1025 | "--allow", 1026 | dest="allow_domains", 1027 | action="append", 1028 | default=[], 1029 | help="Override any blocks to allow this domain.", 1030 | ) 1031 | 1032 | ap.add_argument( 1033 | "--no-fetch-url", 1034 | dest="no_fetch_url", 1035 | action="store_true", 1036 | help="Don't fetch from URLs, even if configured.", 1037 | ) 1038 | ap.add_argument( 1039 | "--no-fetch-instance", 1040 | dest="no_fetch_instance", 1041 | action="store_true", 1042 | help="Don't fetch from instances, even if configured.", 1043 | ) 1044 | ap.add_argument( 1045 | "--no-push-instance", 1046 | dest="no_push_instance", 1047 | action="store_true", 1048 | help="Don't push to instances, even if configured.", 1049 | ) 1050 | 1051 | ap.add_argument( 1052 | "--loglevel", 1053 | choices=["debug", "info", "warning", "error", "critical"], 1054 | help="Set log output level.", 1055 | ) 1056 | ap.add_argument( 1057 | "--dryrun", 1058 | action="store_true", 1059 | help="Don't actually push updates, just show what would happen.", 1060 | ) 1061 | 1062 | return ap 1063 | 1064 | 1065 | def main(): 1066 | 1067 | ap = setup_argparse() 1068 | args = ap.parse_args() 1069 | 1070 | if args.loglevel is not None: 1071 | levelname = args.loglevel.upper() 1072 | log.setLevel(getattr(logging, levelname)) 1073 | 1074 | if args.version: 1075 | print(f"v{__version__}") 1076 | sys.exit(0) 1077 | 1078 | # Load the configuration file 1079 | args = augment_args(args) 1080 | 1081 | # Do the work of syncing 1082 | sync_blocklists(args) 1083 | -------------------------------------------------------------------------------- /src/fediblockhole/blocklists.py: -------------------------------------------------------------------------------- 1 | """Parse various blocklist data formats 2 | """ 3 | 4 | from __future__ import annotations 5 | 6 | import csv 7 | import json 8 | import logging 9 | from dataclasses import dataclass, field 10 | from typing import Iterable 11 | 12 | from .const import BlockAudit, BlockSeverity, DomainBlock 13 | 14 | log = logging.getLogger("fediblockhole") 15 | 16 | 17 | @dataclass 18 | class Blocklist: 19 | """A Blocklist object 20 | 21 | A Blocklist is a list of DomainBlocks from an origin 22 | """ 23 | 24 | origin: str = None 25 | blocks: dict[str, DomainBlock] = field(default_factory=dict) 26 | 27 | def __len__(self): 28 | return len(self.blocks) 29 | 30 | def __class_getitem__(cls, item): 31 | return dict[str, DomainBlock] 32 | 33 | def __getitem__(self, item): 34 | return self.blocks[item] 35 | 36 | def __iter__(self): 37 | return self.blocks.__iter__() 38 | 39 | def items(self): 40 | return self.blocks.items() 41 | 42 | def values(self): 43 | return self.blocks.values() 44 | 45 | 46 | @dataclass 47 | class BlockAuditList: 48 | """A BlockAuditlist object 49 | 50 | A BlockAuditlist is a list of BlockAudits from an origin 51 | """ 52 | 53 | origin: str = None 54 | blocks: dict[str, BlockAudit] = field(default_factory=dict) 55 | 56 | def __len__(self): 57 | return len(self.blocks) 58 | 59 | def __class_getitem__(cls, item): 60 | return dict[str, BlockAudit] 61 | 62 | def __getitem__(self, item): 63 | return self.blocks[item] 64 | 65 | def __iter__(self): 66 | return self.blocks.__iter__() 67 | 68 | def items(self): 69 | return self.blocks.items() 70 | 71 | def values(self): 72 | return self.blocks.values() 73 | 74 | 75 | class BlocklistParser(object): 76 | """ 77 | Base class for parsing blocklists 78 | """ 79 | 80 | do_preparse = False 81 | 82 | def __init__( 83 | self, 84 | import_fields: list = ["domain", "severity"], 85 | max_severity: str = "suspend", 86 | ): 87 | """Create a Parser 88 | 89 | @param import_fields: an optional list of fields to limit the parser to. 90 | Ignore any fields in a block item that aren't in import_fields. 91 | """ 92 | self.import_fields = import_fields 93 | self.max_severity = BlockSeverity(max_severity) 94 | 95 | def preparse(self, blockdata) -> Iterable: 96 | """Some raw datatypes need to be converted into an iterable""" 97 | raise NotImplementedError 98 | 99 | def parse_blocklist(self, blockdata, origin: str = None) -> Blocklist: 100 | """Parse an iterable of blocklist items 101 | @param blocklist: An Iterable of blocklist items 102 | @returns: A dict of DomainBlocks, keyed by domain 103 | """ 104 | if self.do_preparse: 105 | blockdata = self.preparse(blockdata) 106 | 107 | parsed_list = Blocklist(origin) 108 | for blockitem in blockdata: 109 | block = self.parse_item(blockitem) 110 | parsed_list.blocks[block.domain] = block 111 | return parsed_list 112 | 113 | def parse_item(self, blockitem) -> DomainBlock: 114 | """Parse an individual block item 115 | 116 | @param blockitem: an individual block to be parsed 117 | @param import_fields: fields of a block we will import 118 | """ 119 | raise NotImplementedError 120 | 121 | 122 | class BlocklistParserJSON(BlocklistParser): 123 | """Parse a JSON formatted blocklist""" 124 | 125 | do_preparse = True 126 | 127 | def preparse(self, blockdata) -> Iterable: 128 | """Parse the blockdata as JSON if needed""" 129 | if type(blockdata) is type(""): 130 | return json.loads(blockdata) 131 | return blockdata 132 | 133 | def parse_item(self, blockitem: dict) -> DomainBlock: 134 | # Remove fields we don't want to import 135 | origitem = blockitem.copy() 136 | for key in origitem: 137 | if key not in self.import_fields: 138 | del blockitem[key] 139 | 140 | # Convert dict to NamedTuple with the double-star operator 141 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa 142 | block = DomainBlock(**blockitem) 143 | if block.severity > self.max_severity: 144 | block.severity = self.max_severity 145 | return block 146 | 147 | 148 | class BlocklistParserMastodonAPIPublic(BlocklistParserJSON): 149 | """The public blocklist API is slightly different to the admin one""" 150 | 151 | def parse_item(self, blockitem: dict) -> DomainBlock: 152 | # Remove fields we don't want to import 153 | origitem = blockitem.copy() 154 | for key in origitem: 155 | # The Mastodon public API uses the 'public' field 156 | # to mean 'public_comment' because what even is consistency? 157 | if key == "comment": 158 | key = "public_comment" 159 | blockitem["public_comment"] = blockitem["comment"] 160 | del blockitem["comment"] 161 | if key not in self.import_fields: 162 | del blockitem[key] 163 | 164 | # Convert dict to NamedTuple with the double-star operator 165 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa 166 | block = DomainBlock(**blockitem) 167 | if block.severity > self.max_severity: 168 | block.severity = self.max_severity 169 | return block 170 | 171 | 172 | class BlocklistParserCSV(BlocklistParser): 173 | """Parse CSV formatted blocklists 174 | 175 | The parser expects the CSV data to include a header with the field names. 176 | """ 177 | 178 | do_preparse = True 179 | 180 | def preparse(self, blockdata) -> Iterable: 181 | """Use a csv.DictReader to create an iterable from the blockdata""" 182 | return csv.DictReader(blockdata.split("\n")) 183 | 184 | def parse_item(self, blockitem: dict) -> DomainBlock: 185 | # Coerce booleans from string to Python bool 186 | # FIXME: Is this still necessary with the DomainBlock object? 187 | for boolkey in ["reject_media", "reject_reports", "obfuscate"]: 188 | if boolkey in blockitem: 189 | blockitem[boolkey] = str2bool(blockitem[boolkey]) 190 | 191 | # Remove fields we don't want to import 192 | origitem = blockitem.copy() 193 | for key in origitem: 194 | if key not in self.import_fields: 195 | log.debug(f"ignoring field '{key}'") 196 | del blockitem[key] 197 | 198 | # Convert dict to DomainBlock with the double-star operator 199 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa 200 | block = DomainBlock(**blockitem) 201 | if block.severity > self.max_severity: 202 | block.severity = self.max_severity 203 | return block 204 | 205 | 206 | class BlocklistParserMastodonCSV(BlocklistParserCSV): 207 | """Parse Mastodon CSV formatted blocklists 208 | 209 | The Mastodon v4.1.x domain block CSV export prefixes its 210 | field names with a '#' character because… reasons? 211 | """ 212 | 213 | do_preparse = True 214 | 215 | def parse_item(self, blockitem: dict) -> DomainBlock: 216 | """Build a new blockitem dict with new un-#ed keys""" 217 | newdict = {} 218 | for key in blockitem: 219 | newkey = key.lstrip("#") 220 | newdict[newkey] = blockitem[key] 221 | 222 | return super().parse_item(newdict) 223 | 224 | 225 | class RapidBlockParserCSV(BlocklistParserCSV): 226 | """Parse RapidBlock CSV blocklists 227 | 228 | RapidBlock CSV blocklists are just a newline separated list of domains. 229 | """ 230 | 231 | def preparse(self, blockdata) -> Iterable: 232 | """Prepend a 'domain' field header to the data""" 233 | log.debug(f"blockdata: {blockdata[:100]}") 234 | blockdata = "".join(["domain\r\n", blockdata]) 235 | 236 | return csv.DictReader(blockdata.split("\r\n")) 237 | 238 | 239 | class RapidBlockParserJSON(BlocklistParserJSON): 240 | """Parse RapidBlock JSON formatted blocklists""" 241 | 242 | def preparse(self, blockdata) -> Iterable: 243 | rb_dict = json.loads(blockdata) 244 | # We want to iterate over all the dictionary items 245 | return rb_dict["blocks"].items() 246 | 247 | def parse_item(self, blockitem: tuple) -> DomainBlock: 248 | """Parse an individual item in a RapidBlock list""" 249 | # Each item is a tuple of: 250 | # (domain, {dictionary of attributes}) 251 | domain = blockitem[0] 252 | 253 | # RapidBlock has a binary block level which we map 254 | # to 'suspend' if True, and 'noop' if False. 255 | isblocked = blockitem[1]["isBlocked"] 256 | if isblocked: 257 | severity = "suspend" 258 | else: 259 | severity = "noop" 260 | 261 | if "public_comment" in self.import_fields: 262 | public_comment = blockitem[1]["reason"] 263 | else: 264 | public_comment = "" 265 | 266 | # There's a 'tags' field as well, but we can't 267 | # do much with that in Mastodon yet 268 | 269 | block = DomainBlock(domain, severity, public_comment) 270 | if block.severity > self.max_severity: 271 | block.severity = self.max_severity 272 | 273 | return block 274 | 275 | 276 | def str2bool(boolstring: str) -> bool: 277 | """Helper function to convert boolean strings to actual Python bools""" 278 | boolstring = boolstring.lower() 279 | if boolstring in ["true", "t", "1", "y", "yes"]: 280 | return True 281 | elif boolstring in ["", "false", "f", "0", "n", "no"]: 282 | return False 283 | else: 284 | raise ValueError(f"Cannot parse value '{boolstring}' as boolean") 285 | 286 | 287 | FORMAT_PARSERS = { 288 | "csv": BlocklistParserCSV, 289 | "mastodon_csv": BlocklistParserMastodonCSV, 290 | "json": BlocklistParserJSON, 291 | "mastodon_api_public": BlocklistParserMastodonAPIPublic, 292 | "rapidblock.csv": RapidBlockParserCSV, 293 | "rapidblock.json": RapidBlockParserJSON, 294 | } 295 | 296 | 297 | # helper function to select the appropriate Parser 298 | def parse_blocklist( 299 | blockdata, 300 | origin, 301 | format="csv", 302 | import_fields: list = ["domain", "severity"], 303 | max_severity: str = "suspend", 304 | ): 305 | """Parse a blocklist in the given format""" 306 | log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...") 307 | 308 | parser = FORMAT_PARSERS[format](import_fields, max_severity) 309 | return parser.parse_blocklist(blockdata, origin) 310 | -------------------------------------------------------------------------------- /src/fediblockhole/const.py: -------------------------------------------------------------------------------- 1 | """ Constant objects used by FediBlockHole 2 | """ 3 | 4 | from __future__ import annotations 5 | 6 | import enum 7 | import logging 8 | 9 | log = logging.getLogger("fediblockhole") 10 | 11 | 12 | class SeverityLevel(enum.IntEnum): 13 | """How severe should a block be? Higher is more severe.""" 14 | 15 | NONE = enum.auto() 16 | SILENCE = enum.auto() 17 | SUSPEND = enum.auto() 18 | 19 | 20 | class BlockSeverity(object): 21 | """A representation of a block severity 22 | 23 | We add some helpful functions rather than using a bare IntEnum 24 | """ 25 | 26 | def __init__(self, severity: str = None): 27 | self._level = self.str2level(severity) 28 | 29 | @property 30 | def level(self): 31 | return self._level 32 | 33 | @level.setter 34 | def level(self, value): 35 | if isinstance(value, SeverityLevel): 36 | self._level = value 37 | elif type(value) is type(""): 38 | self._level = self.str2level(value) 39 | else: 40 | raise ValueError(f"Invalid level value '{value}'") 41 | 42 | def str2level(self, severity: str = None): 43 | """Convert a string severity level to an internal enum""" 44 | 45 | if severity in [None, "", "noop"]: 46 | return SeverityLevel.NONE 47 | 48 | elif severity in ["silence"]: 49 | return SeverityLevel.SILENCE 50 | 51 | elif severity in ["suspend"]: 52 | return SeverityLevel.SUSPEND 53 | 54 | else: 55 | raise ValueError(f"Invalid severity value '{severity}'") 56 | 57 | def __repr__(self): 58 | return f"'{str(self)}'" 59 | 60 | def __str__(self): 61 | """A string version of the severity level""" 62 | levelmap = { 63 | SeverityLevel.NONE: "noop", 64 | SeverityLevel.SILENCE: "silence", 65 | SeverityLevel.SUSPEND: "suspend", 66 | } 67 | return levelmap[self.level] 68 | 69 | def __lt__(self, other): 70 | if self._level < other._level: 71 | return True 72 | 73 | def __gt__(self, other): 74 | if self._level > other._level: 75 | return True 76 | 77 | def __eq__(self, other): 78 | if other is not None and self._level == other._level: 79 | return True 80 | 81 | def __le__(self, other): 82 | if self._level <= other._level: 83 | return True 84 | 85 | def __ge__(self, other): 86 | if self._level >= other._level: 87 | return True 88 | 89 | 90 | class BlockAudit(object): 91 | 92 | fields = [ 93 | "domain", 94 | "count", 95 | "percent", 96 | ] 97 | 98 | all_fields = ["domain", "count", "percent", "id"] 99 | 100 | def __init__(self, domain: str, count: int = 0, percent: int = 0, id: int = None): 101 | """Initialize the BlockAudit""" 102 | self.domain = domain 103 | self.count = count 104 | self.percent = percent 105 | self.id = id 106 | 107 | def _asdict(self): 108 | """Return a dict version of this object""" 109 | dictval = { 110 | "domain": self.domain, 111 | "count": self.count, 112 | "percent": self.percent, 113 | } 114 | if self.id: 115 | dictval["id"] = self.id 116 | 117 | return dictval 118 | 119 | def __repr__(self): 120 | 121 | return f"" 122 | 123 | def copy(self): 124 | """Make a copy of this object and return it""" 125 | retval = BlockAudit(**self._asdict()) 126 | return retval 127 | 128 | def update(self, dict): 129 | """Update my kwargs""" 130 | for key in dict: 131 | setattr(self, key, dict[key]) 132 | 133 | def __iter__(self): 134 | """Be iterable""" 135 | keys = self.fields 136 | 137 | if getattr(self, "id", False): 138 | keys.append("id") 139 | 140 | for k in keys: 141 | yield k 142 | 143 | def __getitem__(self, k, default=None): 144 | "Behave like a dict for getting values" 145 | if k not in self.all_fields: 146 | raise KeyError(f"Invalid key '{k}'") 147 | 148 | return getattr(self, k, default) 149 | 150 | def get(self, k, default=None): 151 | return self.__getitem__(k, default) 152 | 153 | 154 | # class _DomainBlock(NamedTuple): 155 | # domain: str # FIXME: Use an actual Domain object from somewhere? 156 | # severity: BlockSeverity = BlockSeverity.SUSPEND 157 | # public_comment: str = '' 158 | # private_comment: str = '' 159 | # reject_media: bool = False 160 | # reject_reports: bool = False 161 | # obfuscate: bool = False 162 | 163 | 164 | class DomainBlock(object): 165 | 166 | fields = [ 167 | "domain", 168 | "severity", 169 | "public_comment", 170 | "private_comment", 171 | "reject_media", 172 | "reject_reports", 173 | "obfuscate", 174 | ] 175 | 176 | all_fields = [ 177 | "domain", 178 | "severity", 179 | "public_comment", 180 | "private_comment", 181 | "reject_media", 182 | "reject_reports", 183 | "obfuscate", 184 | "id", 185 | ] 186 | 187 | def __init__( 188 | self, 189 | domain: str, 190 | severity: BlockSeverity = BlockSeverity("suspend"), 191 | public_comment: str = "", 192 | private_comment: str = "", 193 | reject_media: bool = False, 194 | reject_reports: bool = False, 195 | obfuscate: bool = False, 196 | id: int = None, 197 | ): 198 | """Initialize the DomainBlock""" 199 | self.domain = domain 200 | self.severity = severity 201 | self.public_comment = public_comment 202 | self.private_comment = private_comment 203 | self.reject_media = reject_media 204 | self.reject_reports = reject_reports 205 | self.obfuscate = obfuscate 206 | self.id = id 207 | 208 | @property 209 | def severity(self): 210 | return self._severity 211 | 212 | @severity.setter 213 | def severity(self, sev): 214 | if isinstance(sev, BlockSeverity): 215 | self._severity = sev 216 | else: 217 | self._severity = BlockSeverity(sev) 218 | 219 | def _asdict(self): 220 | """Return a dict version of this object""" 221 | dictval = { 222 | "domain": self.domain, 223 | "severity": str(self.severity), 224 | "public_comment": self.public_comment, 225 | "private_comment": self.private_comment, 226 | "reject_media": self.reject_media, 227 | "reject_reports": self.reject_reports, 228 | "obfuscate": self.obfuscate, 229 | } 230 | if self.id: 231 | dictval["id"] = self.id 232 | 233 | return dictval 234 | 235 | def compare_fields(self, other, fields=None) -> list: 236 | """Compare two DomainBlocks on specific fields. 237 | If all the fields are equal, the DomainBlocks are equal. 238 | 239 | @returns: a list of the fields that are different 240 | """ 241 | if not isinstance(other, DomainBlock): 242 | raise ValueError(f"Cannot compare DomainBlock to {type(other)}:{other}") 243 | 244 | if fields is None: 245 | fields = self.fields 246 | 247 | diffs = [] 248 | # Check if all the fields are equal 249 | for field in self.fields: 250 | if getattr(self, field) != getattr(other, field): 251 | diffs.append(field) 252 | return diffs 253 | 254 | def __eq__(self, other): 255 | diffs = self.compare_fields(other) 256 | if len(diffs) == 0: 257 | return True 258 | 259 | def __repr__(self): 260 | 261 | return f"" 262 | 263 | def copy(self): 264 | """Make a copy of this object and return it""" 265 | retval = DomainBlock(**self._asdict()) 266 | return retval 267 | 268 | def update(self, dict): 269 | """Update my kwargs""" 270 | for key in dict: 271 | setattr(self, key, dict[key]) 272 | 273 | def __iter__(self): 274 | """Be iterable""" 275 | keys = self.fields 276 | 277 | if getattr(self, "id", False): 278 | keys.append("id") 279 | 280 | for k in keys: 281 | yield k 282 | 283 | def __getitem__(self, k, default=None): 284 | "Behave like a dict for getting values" 285 | if k not in self.all_fields: 286 | raise KeyError(f"Invalid key '{k}'") 287 | 288 | return getattr(self, k, default) 289 | 290 | def get(self, k, default=None): 291 | return self.__getitem__(k, default) 292 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import pytest 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), "helpers")) 7 | 8 | 9 | def load_data(datafile): 10 | """Load test data from a fixture datafile""" 11 | with open(os.path.join(os.path.dirname(__file__), "fixtures", datafile)) as fp: 12 | return fp.read() 13 | 14 | 15 | @pytest.fixture 16 | def data_mastodon_json(): 17 | return load_data("data-mastodon.json") 18 | 19 | 20 | @pytest.fixture 21 | def data_rapidblock_json(): 22 | return load_data("data-rapidblock.json") 23 | 24 | 25 | @pytest.fixture 26 | def data_suspends_01(): 27 | return load_data("data-suspends-01.csv") 28 | 29 | 30 | @pytest.fixture 31 | def data_silences_01(): 32 | return load_data("data-silences-01.csv") 33 | 34 | 35 | @pytest.fixture 36 | def data_noop_01(): 37 | return load_data("data-noop-01.csv") 38 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/fixtures/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/data-mastodon.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "234", 4 | "domain": "example.org", 5 | "created_at": "2023-01-09T05:17:50.614Z", 6 | "severity": "suspend", 7 | "reject_media": true, 8 | "reject_reports": true, 9 | "private_comment": "A private comment", 10 | "public_comment": "A public comment", 11 | "obfuscate": true 12 | }, 13 | { 14 | "id": "233", 15 | "domain": "example2.org", 16 | "created_at": "2023-01-09T05:09:01.859Z", 17 | "severity": "silence", 18 | "reject_media": true, 19 | "reject_reports": true, 20 | "private_comment": "Another private comment", 21 | "public_comment": "Another public comment", 22 | "obfuscate": true 23 | }, 24 | { 25 | "id": "232", 26 | "domain": "example3.org", 27 | "created_at": "2023-01-09T05:08:58.833Z", 28 | "severity": "suspend", 29 | "reject_media": true, 30 | "reject_reports": true, 31 | "private_comment": "More comments? What is this?", 32 | "public_comment": "Yes we love to comment", 33 | "obfuscate": true 34 | }, 35 | { 36 | "id": "231", 37 | "domain": "example4.org", 38 | "created_at": "2023-01-09T05:04:01.856Z", 39 | "severity": "noop", 40 | "reject_media": true, 41 | "reject_reports": true, 42 | "private_comment": "I cannot believe all the comments", 43 | "public_comment": "Look how many comments we can fit in here", 44 | "obfuscate": true 45 | }, 46 | { 47 | "id": "230", 48 | "domain": "example5.org", 49 | "created_at": "2023-01-08T21:37:22.665Z", 50 | "severity": "suspend", 51 | "reject_media": false, 52 | "reject_reports": false, 53 | "private_comment": "", 54 | "public_comment": "lack of moderation", 55 | "obfuscate": false 56 | }, 57 | { 58 | "id": "2308", 59 | "domain": "example6.org", 60 | "created_at": "2023-01-06T08:36:53.989Z", 61 | "severity": "suspend", 62 | "reject_media": false, 63 | "reject_reports": false, 64 | "private_comment": "", 65 | "public_comment": "anti-trans bigotry", 66 | "obfuscate": false 67 | }, 68 | { 69 | "id": "2306", 70 | "domain": "example7.org", 71 | "created_at": "2023-01-04T08:14:05.381Z", 72 | "severity": "suspend", 73 | "reject_media": false, 74 | "reject_reports": false, 75 | "private_comment": "", 76 | "public_comment": "lack of moderation", 77 | "obfuscate": false 78 | }, 79 | { 80 | "id": "2305", 81 | "domain": "example8.org", 82 | "created_at": "2023-01-04T08:13:48.891Z", 83 | "severity": "suspend", 84 | "reject_media": false, 85 | "reject_reports": false, 86 | "private_comment": "freeze peach", 87 | "public_comment": "lack of moderation, conspiracy weirdness", 88 | "obfuscate": false 89 | }, 90 | { 91 | "id": "2301", 92 | "domain": "example9.org", 93 | "created_at": "2023-01-04T08:11:32.904Z", 94 | "severity": "silence", 95 | "reject_media": false, 96 | "reject_reports": false, 97 | "private_comment": "", 98 | "public_comment": "alt-right conspiracies", 99 | "obfuscate": false 100 | }, 101 | { 102 | "id": "453", 103 | "domain": "example15.org", 104 | "created_at": "2022-12-05T08:26:59.920Z", 105 | "severity": "suspend", 106 | "reject_media": true, 107 | "reject_reports": true, 108 | "private_comment": "cryptocurrency", 109 | "public_comment": "cryptocurrency", 110 | "obfuscate": true 111 | } 112 | ] 113 | -------------------------------------------------------------------------------- /tests/fixtures/data-noop-01.csv: -------------------------------------------------------------------------------- 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate" 2 | "public-comment.example.org","noop","This is a public comment","This is a private comment",FALSE,FALSE,FALSE 3 | "private-comment.example.org","noop",,"This is a private comment",FALSE,FALSE,FALSE 4 | "diff-comment.example.org","noop","Noop public comment","Noop private comment",FALSE,FALSE,FALSE 5 | "2diff-comment.example.org","noop","Public duplicate","Private duplicate",FALSE,FALSE,FALSE 6 | "qoto.org","noop",,,FALSE,FALSE,FALSE 7 | "sealion.club","noop",,,FALSE,FALSE,FALSE 8 | "develop.gab.com","noop",,,FALSE,FALSE,FALSE 9 | "gab.ai","noop",,,FALSE,FALSE,FALSE 10 | "gab.sleeck.eu","noop",,,FALSE,FALSE,FALSE 11 | "gab.com","noop",,,FALSE,FALSE,FALSE 12 | "kiwifarms.is","noop",,,FALSE,FALSE,FALSE 13 | "kiwifarms.net","noop",,,FALSE,FALSE,FALSE 14 | "gabfed.com","noop",,,FALSE,FALSE,FALSE 15 | -------------------------------------------------------------------------------- /tests/fixtures/data-silences-01.csv: -------------------------------------------------------------------------------- 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate" 2 | "public-comment.example.org","silence","This is a public comment","This is a private comment",FALSE,FALSE,FALSE 3 | "private-comment.example.org","silence",,"This is a private comment",FALSE,FALSE,FALSE 4 | "diff-comment.example.org","silence","Silence public comment","Silence private comment",FALSE,FALSE,FALSE 5 | "2diff-comment.example.org","silence","Public duplicate","Private duplicate",FALSE,FALSE,FALSE 6 | "qoto.org","silence",,,FALSE,FALSE,FALSE 7 | "sealion.club","silence",,,FALSE,FALSE,FALSE 8 | "develop.gab.com","silence",,,FALSE,FALSE,FALSE 9 | "gab.ai","silence",,,FALSE,FALSE,FALSE 10 | "gab.sleeck.eu","silence",,,FALSE,FALSE,FALSE 11 | "gab.com","silence",,,FALSE,FALSE,FALSE 12 | "kiwifarms.is","silence",,,FALSE,FALSE,FALSE 13 | "kiwifarms.net","silence",,,FALSE,FALSE,FALSE 14 | "gabfed.com","silence",,,FALSE,FALSE,FALSE 15 | -------------------------------------------------------------------------------- /tests/fixtures/data-suspends-01.csv: -------------------------------------------------------------------------------- 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate" 2 | "public-comment.example.org","suspend","This is a public comment","This is a private comment",TRUE,TRUE,TRUE 3 | "private-comment.example.org","suspend",,"This is a private comment",TRUE,TRUE,TRUE 4 | "diff-comment.example.org","suspend","Suspend public comment","Suspend private comment",TRUE,TRUE,TRUE 5 | "2diff-comment.example.org","suspend","Suspend comment 1","Suspend private 1",TRUE,TRUE,TRUE 6 | "qoto.org","suspend",,,TRUE,TRUE,TRUE 7 | "sealion.club","suspend",,,TRUE,TRUE,TRUE 8 | "develop.gab.com","suspend",,,TRUE,TRUE,TRUE 9 | "gab.ai","suspend",,,TRUE,TRUE,TRUE 10 | "gab.sleeck.eu","suspend",,,TRUE,TRUE,TRUE 11 | "gab.com","suspend",,,TRUE,TRUE,TRUE 12 | "kiwifarms.is","suspend",,,TRUE,TRUE,TRUE 13 | "kiwifarms.net","suspend",,,TRUE,TRUE,TRUE 14 | "gabfed.com","suspend",,,TRUE,TRUE,TRUE 15 | -------------------------------------------------------------------------------- /tests/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/helpers/__init__.py -------------------------------------------------------------------------------- /tests/helpers/util.py: -------------------------------------------------------------------------------- 1 | """ Utility functions for tests 2 | """ 3 | from fediblockhole import setup_argparse, augment_args 4 | 5 | 6 | def shim_argparse(testargv: list = [], tomldata: str = None): 7 | """Helper function to parse test args 8 | """ 9 | ap = setup_argparse() 10 | args = ap.parse_args(testargv) 11 | if tomldata is not None: 12 | args = augment_args(args, tomldata) 13 | return args 14 | -------------------------------------------------------------------------------- /tests/test_allowlist.py: -------------------------------------------------------------------------------- 1 | """ Test allowlists 2 | """ 3 | 4 | import pytest 5 | from util import shim_argparse 6 | 7 | from fediblockhole import apply_allowlists 8 | from fediblockhole.blocklists import Blocklist 9 | from fediblockhole.const import DomainBlock 10 | 11 | 12 | def test_cmdline_allow_removes_domain(): 13 | """Test that -A removes entries from merged""" 14 | conf = shim_argparse(["-A", "removeme.org"]) 15 | 16 | merged = Blocklist( 17 | "test_allowlist.merged", 18 | { 19 | "example.org": DomainBlock("example.org"), 20 | "example2.org": DomainBlock("example2.org"), 21 | "removeme.org": DomainBlock("removeme.org"), 22 | "keepblockingme.org": DomainBlock("keepblockingme.org"), 23 | }, 24 | ) 25 | 26 | merged = apply_allowlists(merged, conf, {}) 27 | 28 | with pytest.raises(KeyError): 29 | merged["removeme.org"] 30 | 31 | 32 | def test_allowlist_removes_domain(): 33 | """Test that an item in an allowlist removes entries from merged""" 34 | conf = shim_argparse() 35 | 36 | merged = Blocklist( 37 | "test_allowlist.merged", 38 | { 39 | "example.org": DomainBlock("example.org"), 40 | "example2.org": DomainBlock("example2.org"), 41 | "removeme.org": DomainBlock("removeme.org"), 42 | "keepblockingme.org": DomainBlock("keepblockingme.org"), 43 | }, 44 | ) 45 | 46 | allowlists = [ 47 | Blocklist( 48 | "test_allowlist", 49 | { 50 | "removeme.org": DomainBlock("removeme.org", "noop"), 51 | }, 52 | ) 53 | ] 54 | 55 | merged = apply_allowlists(merged, conf, allowlists) 56 | 57 | with pytest.raises(KeyError): 58 | merged["removeme.org"] 59 | 60 | 61 | def test_allowlist_removes_tld(): 62 | """Test that an item in an allowlist removes entries from merged""" 63 | conf = shim_argparse() 64 | 65 | merged = Blocklist( 66 | "test_allowlist.merged", 67 | { 68 | ".cf": DomainBlock(".cf"), 69 | "example.org": DomainBlock("example.org"), 70 | ".tk": DomainBlock(".tk"), 71 | "keepblockingme.org": DomainBlock("keepblockingme.org"), 72 | }, 73 | ) 74 | 75 | allowlists = [ 76 | Blocklist( 77 | "test_allowlist.list1", 78 | { 79 | ".cf": DomainBlock(".cf", "noop"), 80 | ".tk": DomainBlock(".tk", "noop"), 81 | }, 82 | ) 83 | ] 84 | 85 | merged = apply_allowlists(merged, conf, allowlists) 86 | 87 | with pytest.raises(KeyError): 88 | merged[".cf"] 89 | 90 | with pytest.raises(KeyError): 91 | merged[".tk"] 92 | -------------------------------------------------------------------------------- /tests/test_blockseverity.py: -------------------------------------------------------------------------------- 1 | from fediblockhole.const import BlockSeverity 2 | 3 | 4 | def test_severity_eq(): 5 | 6 | s1 = BlockSeverity("suspend") 7 | s2 = BlockSeverity("suspend") 8 | 9 | assert s1 == s2 10 | 11 | s3 = BlockSeverity("silence") 12 | s4 = BlockSeverity("silence") 13 | 14 | assert s3 == s4 15 | 16 | s5 = BlockSeverity("noop") 17 | s6 = BlockSeverity("noop") 18 | 19 | assert s5 == s6 20 | 21 | 22 | def test_severity_ne(): 23 | s1 = BlockSeverity("noop") 24 | s2 = BlockSeverity("silence") 25 | s3 = BlockSeverity("suspend") 26 | 27 | assert s1 != s2 28 | assert s2 != s3 29 | assert s1 != s3 30 | 31 | 32 | def test_severity_lt(): 33 | s1 = BlockSeverity("noop") 34 | s2 = BlockSeverity("silence") 35 | s3 = BlockSeverity("suspend") 36 | 37 | assert s1 < s2 38 | assert s2 < s3 39 | assert s1 < s3 40 | 41 | 42 | def test_severity_gt(): 43 | s1 = BlockSeverity("noop") 44 | s2 = BlockSeverity("silence") 45 | s3 = BlockSeverity("suspend") 46 | 47 | assert s2 > s1 48 | assert s3 > s2 49 | assert s3 > s1 50 | 51 | 52 | def test_severity_le(): 53 | s1 = BlockSeverity("noop") 54 | s2 = BlockSeverity("silence") 55 | s2a = BlockSeverity("silence") 56 | s3 = BlockSeverity("suspend") 57 | 58 | assert s1 <= s2 59 | assert s2a <= s2 60 | assert s2 <= s3 61 | assert s1 <= s3 62 | 63 | 64 | def test_severity_ge(): 65 | s1 = BlockSeverity("noop") 66 | s2 = BlockSeverity("silence") 67 | s2a = BlockSeverity("silence") 68 | s3 = BlockSeverity("suspend") 69 | 70 | assert s2 >= s1 71 | assert s2a >= s1 72 | assert s3 >= s2 73 | assert s3 >= s1 74 | -------------------------------------------------------------------------------- /tests/test_cmdline.py: -------------------------------------------------------------------------------- 1 | """Test the commandline defined parameters correctly 2 | """ 3 | 4 | from fediblockhole import setup_argparse 5 | 6 | 7 | def test_cmdline_no_configfile(): 8 | """Test bare command with no configfile""" 9 | ap = setup_argparse() 10 | args = ap.parse_args([]) 11 | 12 | assert args.config == "/etc/default/fediblockhole.conf.toml" 13 | assert args.mergeplan is None 14 | assert args.blocklist_savefile is None 15 | assert args.save_intermediate is False 16 | assert args.savedir is None 17 | assert args.import_fields is None 18 | assert args.export_fields is None 19 | 20 | assert args.no_fetch_url is False 21 | assert args.no_fetch_instance is False 22 | assert args.no_push_instance is False 23 | assert args.dryrun is False 24 | 25 | assert args.loglevel is None 26 | 27 | 28 | def test_cmdline_mergeplan_min(): 29 | """Test setting mergeplan min""" 30 | ap = setup_argparse() 31 | args = ap.parse_args(["-m", "min"]) 32 | 33 | assert args.mergeplan == "min" 34 | 35 | 36 | def test_set_allow_domain(): 37 | """Set a single allow domain on commandline""" 38 | ap = setup_argparse() 39 | args = ap.parse_args(["-A", "example.org"]) 40 | 41 | assert args.allow_domains == ["example.org"] 42 | 43 | 44 | def test_set_multiple_allow_domains(): 45 | """Set multiple allow domains on commandline""" 46 | ap = setup_argparse() 47 | args = ap.parse_args( 48 | ["-A", "example.org", "-A", "example2.org", "-A", "example3.org"] 49 | ) 50 | 51 | assert args.allow_domains == ["example.org", "example2.org", "example3.org"] 52 | -------------------------------------------------------------------------------- /tests/test_configfile.py: -------------------------------------------------------------------------------- 1 | """Test the config file is loading parameters correctly 2 | """ 3 | 4 | from textwrap import dedent 5 | 6 | from util import shim_argparse 7 | 8 | from fediblockhole import augment_args, setup_argparse 9 | 10 | 11 | def test_parse_tomldata(): 12 | tomldata = """ 13 | # Test TOML config for FediBlockHole 14 | 15 | blocklist_instance_sources = [] 16 | 17 | blocklist_url_sources = [] 18 | 19 | save_intermediate = true 20 | 21 | import_fields = ['public_comment'] 22 | """ 23 | ap = setup_argparse() 24 | args = ap.parse_args([]) 25 | args = augment_args(args, tomldata) 26 | 27 | assert args.blocklist_instance_sources == [] 28 | assert args.blocklist_url_sources == [] 29 | assert args.save_intermediate is True 30 | assert args.import_fields == ["public_comment"] 31 | 32 | 33 | def test_set_mergeplan_max(): 34 | tomldata = """mergeplan = 'max' 35 | """ 36 | args = shim_argparse([], tomldata) 37 | 38 | assert args.mergeplan == "max" 39 | 40 | 41 | def test_set_mergeplan_min(): 42 | tomldata = """mergeplan = 'min' 43 | """ 44 | args = shim_argparse([], tomldata) 45 | 46 | assert args.mergeplan == "min" 47 | 48 | 49 | def test_set_allowlists(): 50 | tomldata = """# Comment on config 51 | allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] 52 | """ 53 | args = shim_argparse([], tomldata) 54 | 55 | assert args.mergeplan == "max" 56 | assert args.allowlist_url_sources == [ 57 | { 58 | "url": "file:///path/to/allowlist", 59 | "format": "csv", 60 | } 61 | ] 62 | 63 | 64 | def test_set_merge_thresold_default(): 65 | tomldata = """ 66 | """ 67 | args = shim_argparse([], tomldata) 68 | 69 | assert args.mergeplan == "max" 70 | assert args.merge_threshold_type == "count" 71 | 72 | 73 | def test_set_merge_thresold_count(): 74 | tomldata = """# Add a merge threshold 75 | merge_threshold_type = 'count' 76 | merge_threshold = 2 77 | """ 78 | args = shim_argparse([], tomldata) 79 | 80 | assert args.mergeplan == "max" 81 | assert args.merge_threshold_type == "count" 82 | assert args.merge_threshold == 2 83 | 84 | 85 | def test_set_merge_thresold_pct(): 86 | tomldata = """# Add a merge threshold 87 | merge_threshold_type = 'pct' 88 | merge_threshold = 35 89 | """ 90 | args = shim_argparse([], tomldata) 91 | 92 | assert args.mergeplan == "max" 93 | assert args.merge_threshold_type == "pct" 94 | assert args.merge_threshold == 35 95 | 96 | 97 | def test_destination_token_from_environment(monkeypatch): 98 | tomldata = dedent( 99 | """\ 100 | blocklist_instance_destinations = [ 101 | { domain='example.com', token='raw-token'}, 102 | { domain='example2.com', token_env_var='TOKEN_ENV_VAR' }, 103 | { domain='env-token.com' }, 104 | { domain='www.env-token.com' }, 105 | ] 106 | """ 107 | ) 108 | 109 | monkeypatch.setenv("TOKEN_ENV_VAR", "env-token") 110 | monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token") 111 | monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token") 112 | 113 | args = shim_argparse([], tomldata) 114 | 115 | assert args.blocklist_instance_destinations[0]["token"] == "raw-token" 116 | assert args.blocklist_instance_destinations[1]["token"] == "env-token" 117 | assert args.blocklist_instance_destinations[2]["token"] == "env-token" 118 | assert args.blocklist_instance_destinations[3]["token"] == "www-env-token" 119 | 120 | 121 | def test_instance_sources_token_from_environment(monkeypatch): 122 | tomldata = dedent( 123 | """\ 124 | blocklist_instance_sources = [ 125 | { domain='example.com', token='raw-token'}, 126 | { domain='example2.com', token_env_var='TOKEN_ENV_VAR' }, 127 | { domain='env-token.com' }, 128 | { domain='www.env-token.com' }, 129 | ] 130 | """ 131 | ) 132 | 133 | monkeypatch.setenv("TOKEN_ENV_VAR", "env-token") 134 | monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token") 135 | monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token") 136 | 137 | args = shim_argparse([], tomldata) 138 | 139 | assert args.blocklist_instance_sources[0]["token"] == "raw-token" 140 | assert args.blocklist_instance_sources[1]["token"] == "env-token" 141 | assert args.blocklist_instance_sources[2]["token"] == "env-token" 142 | assert args.blocklist_instance_sources[3]["token"] == "www-env-token" 143 | -------------------------------------------------------------------------------- /tests/test_domainblock.py: -------------------------------------------------------------------------------- 1 | """Test the DomainBlock structure 2 | """ 3 | 4 | import pytest 5 | 6 | from fediblockhole.const import BlockSeverity, DomainBlock, SeverityLevel 7 | 8 | 9 | def test_blocksev_blankstring(): 10 | a = BlockSeverity("") 11 | assert a.level == SeverityLevel.NONE 12 | 13 | 14 | def test_blocksev_string_noop(): 15 | a = BlockSeverity("noop") 16 | assert a.level == SeverityLevel.NONE 17 | 18 | 19 | def test_blocksev_none(): 20 | a = BlockSeverity(None) 21 | assert a.level == SeverityLevel.NONE 22 | 23 | 24 | def test_empty_domainblock_fails(): 25 | with pytest.raises(TypeError): 26 | a = DomainBlock() # noqa 27 | 28 | 29 | def test_default_suspend(): 30 | a = DomainBlock("example.org") 31 | assert a.domain == "example.org" 32 | assert a.severity.level == SeverityLevel.SUSPEND 33 | 34 | 35 | def test_severity_suspend(): 36 | a = DomainBlock("example.org", "suspend") 37 | assert a.domain == "example.org" 38 | assert a.severity.level == SeverityLevel.SUSPEND 39 | 40 | 41 | def test_severity_silence(): 42 | a = DomainBlock("example.org", "silence") 43 | assert a.domain == "example.org" 44 | assert a.severity.level == SeverityLevel.SILENCE 45 | 46 | 47 | def test_severity_noop_string(): 48 | a = DomainBlock("example.org", "noop") 49 | assert a.domain == "example.org" 50 | assert a.severity.level == SeverityLevel.NONE 51 | 52 | 53 | def test_severity_none(): 54 | a = DomainBlock("example.org", None) 55 | assert a.domain == "example.org" 56 | assert a.severity.level == SeverityLevel.NONE 57 | 58 | 59 | def test_compare_equal_blocks(): 60 | 61 | a = DomainBlock("example1.org", "suspend") 62 | b = DomainBlock("example1.org", "suspend") 63 | 64 | assert a == b 65 | 66 | 67 | def test_compare_diff_domains(): 68 | 69 | a = DomainBlock("example1.org", "suspend") 70 | b = DomainBlock("example2.org", "suspend") 71 | 72 | assert a != b 73 | 74 | 75 | def test_compare_diff_sevs(): 76 | 77 | a = DomainBlock("example1.org", "suspend") 78 | b = DomainBlock("example1.org", "silence") 79 | 80 | assert a != b 81 | 82 | 83 | def test_compare_diff_sevs_2(): 84 | 85 | a = DomainBlock("example1.org", "suspend") 86 | b = DomainBlock("example1.org", "noop") 87 | 88 | assert a != b 89 | -------------------------------------------------------------------------------- /tests/test_merge_comments.py: -------------------------------------------------------------------------------- 1 | """ Test merging of comments 2 | """ 3 | 4 | from fediblockhole import merge_comments 5 | 6 | 7 | def test_merge_blank_comments(): 8 | 9 | oldcomment = "" 10 | newcomment = "" 11 | 12 | merged_comment = merge_comments(oldcomment, newcomment) 13 | 14 | assert merged_comment == "" 15 | 16 | 17 | def test_merge_None_comments(): 18 | 19 | oldcomment = None 20 | newcomment = None 21 | 22 | merged_comment = merge_comments(oldcomment, newcomment) 23 | 24 | assert merged_comment == "" 25 | 26 | 27 | def test_merge_oldstr_newNone(): 28 | 29 | oldcomment = "fred, bibble" 30 | newcomment = None 31 | 32 | merged_comment = merge_comments(oldcomment, newcomment) 33 | 34 | assert merged_comment == "fred, bibble" 35 | 36 | 37 | def test_merge_oldempty_newcomment(): 38 | 39 | oldcomment = "" 40 | newcomment = "fred, bibble" 41 | 42 | merged_comment = merge_comments(oldcomment, newcomment) 43 | 44 | assert merged_comment == "fred, bibble" 45 | 46 | 47 | def test_merge_oldNone_newcomment(): 48 | 49 | oldcomment = None 50 | newcomment = "fred, bibble" 51 | 52 | merged_comment = merge_comments(oldcomment, newcomment) 53 | 54 | assert merged_comment == "fred, bibble" 55 | 56 | 57 | def test_merge_two_different(): 58 | 59 | oldcomment = "happy, medium, spinning" 60 | newcomment = "fred, bibble" 61 | 62 | merged_comment = merge_comments(oldcomment, newcomment) 63 | 64 | assert merged_comment == "happy, medium, spinning, fred, bibble" 65 | 66 | 67 | def test_merge_overlaps(): 68 | 69 | oldcomment = "happy, medium, spinning" 70 | newcomment = "fred, medium, bibble, spinning" 71 | 72 | merged_comment = merge_comments(oldcomment, newcomment) 73 | 74 | assert merged_comment == "happy, medium, spinning, fred, bibble" 75 | -------------------------------------------------------------------------------- /tests/test_merge_thresholds.py: -------------------------------------------------------------------------------- 1 | """Test merge with thresholds 2 | """ 3 | 4 | from fediblockhole import merge_blocklists 5 | from fediblockhole.blocklists import Blocklist, parse_blocklist 6 | from fediblockhole.const import DomainBlock 7 | 8 | import_fields = [ 9 | "domain", 10 | "severity", 11 | "public_comment", 12 | "private_comment", 13 | "reject_media", 14 | "reject_reports", 15 | "obfuscate", 16 | ] 17 | 18 | 19 | # FIXME: Deprecated data loader. Now using fixtures. 20 | def __load_test_blocklist_data(datafiles): 21 | 22 | blocklists = [] 23 | 24 | for df in datafiles: 25 | with open(df) as fp: 26 | data = fp.read() 27 | bl = parse_blocklist(data, df, "csv", import_fields) 28 | blocklists.append(bl) 29 | 30 | return blocklists 31 | 32 | 33 | def test_mergeplan_count_2(): 34 | """Only merge a block if present in 2 or more lists""" 35 | 36 | bl_1 = Blocklist( 37 | "test01", 38 | { 39 | "onemention.example.org": DomainBlock( 40 | "onemention.example.org", "suspend", "", "", True, True, True 41 | ), 42 | "twomention.example.org": DomainBlock( 43 | "twomention.example.org", "suspend", "", "", True, True, True 44 | ), 45 | "threemention.example.org": DomainBlock( 46 | "threemention.example.org", "suspend", "", "", True, True, True 47 | ), 48 | }, 49 | ) 50 | 51 | bl_2 = Blocklist( 52 | "test2", 53 | { 54 | "twomention.example.org": DomainBlock( 55 | "twomention.example.org", "suspend", "", "", True, True, True 56 | ), 57 | "threemention.example.org": DomainBlock( 58 | "threemention.example.org", "suspend", "", "", True, True, True 59 | ), 60 | }, 61 | ) 62 | 63 | bl_3 = Blocklist( 64 | "test3", 65 | { 66 | "threemention.example.org": DomainBlock( # noqa 67 | "threemention.example.org", "suspend", "", "", True, True, True 68 | ), 69 | "threemention.example.org": DomainBlock( # noqa 70 | "threemention.example.org", "suspend", "", "", True, True, True 71 | ), 72 | }, 73 | ) 74 | 75 | ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=2) 76 | 77 | assert "onemention.example.org" not in ml 78 | assert "twomention.example.org" in ml 79 | assert "threemention.example.org" in ml 80 | 81 | 82 | def test_mergeplan_count_3(): 83 | """Only merge a block if present in 3 or more lists""" 84 | 85 | bl_1 = Blocklist( 86 | "test01", 87 | { 88 | "onemention.example.org": DomainBlock( 89 | "onemention.example.org", "suspend", "", "", True, True, True 90 | ), 91 | "twomention.example.org": DomainBlock( 92 | "twomention.example.org", "suspend", "", "", True, True, True 93 | ), 94 | "threemention.example.org": DomainBlock( 95 | "threemention.example.org", "suspend", "", "", True, True, True 96 | ), 97 | }, 98 | ) 99 | 100 | bl_2 = Blocklist( 101 | "test2", 102 | { 103 | "twomention.example.org": DomainBlock( 104 | "twomention.example.org", "suspend", "", "", True, True, True 105 | ), 106 | "threemention.example.org": DomainBlock( 107 | "threemention.example.org", "suspend", "", "", True, True, True 108 | ), 109 | }, 110 | ) 111 | 112 | bl_3 = Blocklist( 113 | "test3", 114 | { 115 | "threemention.example.org": DomainBlock( # noqa 116 | "threemention.example.org", "suspend", "", "", True, True, True 117 | ), 118 | "threemention.example.org": DomainBlock( # noqa 119 | "threemention.example.org", "suspend", "", "", True, True, True 120 | ), 121 | }, 122 | ) 123 | 124 | ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=3) 125 | 126 | assert "onemention.example.org" not in ml 127 | assert "twomention.example.org" not in ml 128 | assert "threemention.example.org" in ml 129 | 130 | 131 | def test_mergeplan_pct_30(): 132 | """Only merge a block if present in 2 or more lists""" 133 | 134 | bl_1 = Blocklist( 135 | "test01", 136 | { 137 | "onemention.example.org": DomainBlock( 138 | "onemention.example.org", "suspend", "", "", True, True, True 139 | ), 140 | "twomention.example.org": DomainBlock( 141 | "twomention.example.org", "suspend", "", "", True, True, True 142 | ), 143 | "fourmention.example.org": DomainBlock( 144 | "fourmention.example.org", "suspend", "", "", True, True, True 145 | ), 146 | }, 147 | ) 148 | 149 | bl_2 = Blocklist( 150 | "test2", 151 | { 152 | "twomention.example.org": DomainBlock( 153 | "twomention.example.org", "suspend", "", "", True, True, True 154 | ), 155 | "threemention.example.org": DomainBlock( 156 | "threemention.example.org", "suspend", "", "", True, True, True 157 | ), 158 | "fourmention.example.org": DomainBlock( 159 | "fourmention.example.org", "suspend", "", "", True, True, True 160 | ), 161 | }, 162 | ) 163 | 164 | bl_3 = Blocklist( 165 | "test3", 166 | { 167 | "threemention.example.org": DomainBlock( 168 | "threemention.example.org", "suspend", "", "", True, True, True 169 | ), 170 | "fourmention.example.org": DomainBlock( 171 | "fourmention.example.org", "suspend", "", "", True, True, True 172 | ), 173 | }, 174 | ) 175 | 176 | bl_4 = Blocklist( 177 | "test4", 178 | { 179 | "threemention.example.org": DomainBlock( 180 | "threemention.example.org", "suspend", "", "", True, True, True 181 | ), 182 | "fourmention.example.org": DomainBlock( 183 | "fourmention.example.org", "suspend", "", "", True, True, True 184 | ), 185 | }, 186 | ) 187 | 188 | ml = merge_blocklists( 189 | [bl_1, bl_2, bl_3, bl_4], "max", threshold=30, threshold_type="pct" 190 | ) 191 | 192 | assert "onemention.example.org" not in ml 193 | assert "twomention.example.org" in ml 194 | assert "threemention.example.org" in ml 195 | assert "fourmention.example.org" in ml 196 | 197 | 198 | def test_mergeplan_pct_55(): 199 | """Only merge a block if present in 2 or more lists""" 200 | 201 | bl_1 = Blocklist( 202 | "test01", 203 | { 204 | "onemention.example.org": DomainBlock( 205 | "onemention.example.org", "suspend", "", "", True, True, True 206 | ), 207 | "twomention.example.org": DomainBlock( 208 | "twomention.example.org", "suspend", "", "", True, True, True 209 | ), 210 | "fourmention.example.org": DomainBlock( 211 | "fourmention.example.org", "suspend", "", "", True, True, True 212 | ), 213 | }, 214 | ) 215 | 216 | bl_2 = Blocklist( 217 | "test2", 218 | { 219 | "twomention.example.org": DomainBlock( 220 | "twomention.example.org", "suspend", "", "", True, True, True 221 | ), 222 | "threemention.example.org": DomainBlock( 223 | "threemention.example.org", "suspend", "", "", True, True, True 224 | ), 225 | "fourmention.example.org": DomainBlock( 226 | "fourmention.example.org", "suspend", "", "", True, True, True 227 | ), 228 | }, 229 | ) 230 | 231 | bl_3 = Blocklist( 232 | "test3", 233 | { 234 | "threemention.example.org": DomainBlock( 235 | "threemention.example.org", "suspend", "", "", True, True, True 236 | ), 237 | "fourmention.example.org": DomainBlock( 238 | "fourmention.example.org", "suspend", "", "", True, True, True 239 | ), 240 | }, 241 | ) 242 | 243 | bl_4 = Blocklist( 244 | "test4", 245 | { 246 | "threemention.example.org": DomainBlock( 247 | "threemention.example.org", "suspend", "", "", True, True, True 248 | ), 249 | "fourmention.example.org": DomainBlock( 250 | "fourmention.example.org", "suspend", "", "", True, True, True 251 | ), 252 | }, 253 | ) 254 | 255 | ml = merge_blocklists( 256 | [bl_1, bl_2, bl_3, bl_4], "max", threshold=55, threshold_type="pct" 257 | ) 258 | 259 | assert "onemention.example.org" not in ml 260 | assert "twomention.example.org" not in ml 261 | assert "threemention.example.org" in ml 262 | assert "fourmention.example.org" in ml 263 | -------------------------------------------------------------------------------- /tests/test_mergeplan.py: -------------------------------------------------------------------------------- 1 | """Various mergeplan tests 2 | """ 3 | 4 | from fediblockhole import apply_mergeplan, merge_blocklists, merge_comments 5 | from fediblockhole.blocklists import parse_blocklist 6 | from fediblockhole.const import DomainBlock, SeverityLevel 7 | 8 | import_fields = [ 9 | "domain", 10 | "severity", 11 | "public_comment", 12 | "private_comment", 13 | "reject_media", 14 | "reject_reports", 15 | "obfuscate", 16 | ] 17 | 18 | 19 | def load_test_blocklist_data(datafiles): 20 | 21 | blocklists = [] 22 | 23 | for data in datafiles: 24 | bl = parse_blocklist(data, "pytest", "csv", import_fields) 25 | blocklists.append(bl) 26 | 27 | return blocklists 28 | 29 | 30 | def test_mergeplan_max(data_suspends_01, data_silences_01): 31 | """Test 'max' mergeplan""" 32 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01]) 33 | bl = merge_blocklists(blocklists, "max") 34 | assert len(bl) == 13 35 | 36 | for key in bl: 37 | assert bl[key].severity.level == SeverityLevel.SUSPEND 38 | 39 | 40 | def test_mergeplan_min(data_suspends_01, data_silences_01): 41 | """Test 'max' mergeplan""" 42 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01]) 43 | 44 | bl = merge_blocklists(blocklists, "min") 45 | assert len(bl) == 13 46 | 47 | for key in bl: 48 | assert bl[key].severity.level == SeverityLevel.SILENCE 49 | 50 | 51 | def test_mergeplan_default(data_suspends_01, data_silences_01): 52 | """Default mergeplan is max, so see if it's chosen""" 53 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01]) 54 | 55 | bl = merge_blocklists(blocklists) 56 | assert len(bl) == 13 57 | 58 | for key in bl: 59 | assert bl[key].severity.level == SeverityLevel.SUSPEND 60 | 61 | 62 | def test_mergeplan_3_max(data_suspends_01, data_silences_01, data_noop_01): 63 | """3 datafiles and mergeplan of 'max'""" 64 | blocklists = load_test_blocklist_data( 65 | [data_suspends_01, data_silences_01, data_noop_01] 66 | ) 67 | 68 | bl = merge_blocklists(blocklists, "max") 69 | assert len(bl) == 13 70 | 71 | for key in bl: 72 | assert bl[key].severity.level == SeverityLevel.SUSPEND 73 | assert bl[key].reject_media is True 74 | assert bl[key].reject_reports is True 75 | assert bl[key].obfuscate is True 76 | 77 | 78 | def test_mergeplan_3_min(data_suspends_01, data_silences_01, data_noop_01): 79 | """3 datafiles and mergeplan of 'min'""" 80 | blocklists = load_test_blocklist_data( 81 | [data_suspends_01, data_silences_01, data_noop_01] 82 | ) 83 | 84 | bl = merge_blocklists(blocklists, "min") 85 | assert len(bl) == 13 86 | 87 | for key in bl: 88 | assert bl[key].severity.level == SeverityLevel.NONE 89 | assert bl[key].reject_media is False 90 | assert bl[key].reject_reports is False 91 | assert bl[key].obfuscate is False 92 | 93 | 94 | def test_mergeplan_noop_v_silence_max(data_silences_01, data_noop_01): 95 | """Mergeplan of max should choose silence over noop""" 96 | blocklists = load_test_blocklist_data([data_silences_01, data_noop_01]) 97 | 98 | bl = merge_blocklists(blocklists, "max") 99 | assert len(bl) == 13 100 | 101 | for key in bl: 102 | assert bl[key].severity.level == SeverityLevel.SILENCE 103 | 104 | 105 | def test_mergeplan_noop_v_silence_min(data_silences_01, data_noop_01): 106 | """Mergeplan of min should choose noop over silence""" 107 | blocklists = load_test_blocklist_data([data_silences_01, data_noop_01]) 108 | 109 | bl = merge_blocklists(blocklists, "min") 110 | assert len(bl) == 13 111 | 112 | for key in bl: 113 | assert bl[key].severity.level == SeverityLevel.NONE 114 | 115 | 116 | def test_merge_public_comment(data_suspends_01, data_silences_01, data_noop_01): 117 | blocklists = load_test_blocklist_data( 118 | [data_suspends_01, data_silences_01, data_noop_01] 119 | ) 120 | 121 | bl = merge_blocklists(blocklists, "min") 122 | assert len(bl) == 13 123 | 124 | assert bl["public-comment.example.org"].public_comment == "This is a public comment" 125 | 126 | 127 | def test_merge_private_comment(data_suspends_01, data_silences_01, data_noop_01): 128 | blocklists = load_test_blocklist_data( 129 | [data_suspends_01, data_silences_01, data_noop_01] 130 | ) 131 | 132 | bl = merge_blocklists(blocklists, "min") 133 | assert len(bl) == 13 134 | 135 | assert ( 136 | bl["private-comment.example.org"].private_comment == "This is a private comment" 137 | ) 138 | 139 | 140 | def test_merge_public_comments(data_suspends_01, data_silences_01, data_noop_01): 141 | blocklists = load_test_blocklist_data( 142 | [data_suspends_01, data_silences_01, data_noop_01] 143 | ) 144 | 145 | bl = merge_blocklists(blocklists, "min") 146 | assert len(bl) == 13 147 | 148 | assert ( 149 | bl["diff-comment.example.org"].public_comment 150 | == "Suspend public comment, Silence public comment, Noop public comment" 151 | ) 152 | 153 | 154 | def test_merge_duplicate_comments(data_suspends_01, data_silences_01, data_noop_01): 155 | """The same comment on multiple sources shouldn't get added""" 156 | blocklists = load_test_blocklist_data( 157 | [data_suspends_01, data_silences_01, data_noop_01] 158 | ) 159 | 160 | bl = merge_blocklists(blocklists, "min") 161 | assert len(bl) == 13 162 | 163 | 164 | def test_merge_comments_none(): 165 | 166 | a = None 167 | b = None 168 | 169 | r = merge_comments(a, b) 170 | 171 | assert r == "" 172 | 173 | 174 | def test_merge_comments_empty(): 175 | 176 | a = "" 177 | b = "" 178 | 179 | r = merge_comments(a, b) 180 | 181 | assert r == "" 182 | 183 | 184 | def test_merge_comments_left(): 185 | 186 | a = "comment to merge" 187 | b = "" 188 | 189 | r = merge_comments(a, b) 190 | 191 | assert r == "comment to merge" 192 | 193 | 194 | def test_merge_comments_right(): 195 | 196 | a = "" 197 | b = "comment to merge" 198 | 199 | r = merge_comments(a, b) 200 | 201 | assert r == "comment to merge" 202 | 203 | 204 | def test_merge_comments_same(): 205 | 206 | a = "comment to merge" 207 | b = "comment to merge" 208 | 209 | r = merge_comments(a, b) 210 | 211 | assert r == "comment to merge" 212 | 213 | 214 | def test_merge_comments_diff(): 215 | 216 | a = "comment A" 217 | b = "comment B" 218 | 219 | r = merge_comments(a, b) 220 | 221 | assert r == "comment A, comment B" 222 | 223 | 224 | def test_merge_comments_dups(): 225 | 226 | a = "boring, nazis, lack of moderation, flagged, special" 227 | b = "spoon, nazis, flagged, lack of moderation, happy, fork" 228 | 229 | r = merge_comments(a, b) 230 | 231 | assert ( 232 | r == "boring, nazis, lack of moderation, flagged, special, spoon, happy, fork" 233 | ) 234 | 235 | 236 | def test_mergeplan_same_min_bools_false(): 237 | """Test merging with mergeplan 'max' and False values doesn't change them""" 238 | a = DomainBlock("example.org", "noop", "", "", False, False, False) 239 | b = DomainBlock("example.org", "noop", "", "", False, False, False) 240 | 241 | r = apply_mergeplan(a, b, "max") 242 | 243 | assert r.reject_media is False 244 | assert r.reject_reports is False 245 | assert r.obfuscate is False 246 | 247 | 248 | def test_mergeplan_same_min_bools_true(): 249 | """Test merging with mergeplan 'max' and True values doesn't change them""" 250 | a = DomainBlock("example.org", "noop", "", "", True, False, True) 251 | b = DomainBlock("example.org", "noop", "", "", True, False, True) 252 | 253 | r = apply_mergeplan(a, b, "max") 254 | 255 | assert r.reject_media is True 256 | assert r.reject_reports is False 257 | assert r.obfuscate is True 258 | 259 | 260 | def test_mergeplan_max_bools(): 261 | a = DomainBlock("example.org", "suspend", "", "", True, True, True) 262 | b = DomainBlock("example.org", "noop", "", "", False, False, False) 263 | 264 | r = apply_mergeplan(a, b, "max") 265 | 266 | assert r.reject_media is True 267 | assert r.reject_reports is True 268 | assert r.obfuscate is True 269 | -------------------------------------------------------------------------------- /tests/test_parser_csv.py: -------------------------------------------------------------------------------- 1 | """Tests of the CSV parsing 2 | """ 3 | 4 | from fediblockhole.blocklists import BlocklistParserCSV 5 | from fediblockhole.const import SeverityLevel 6 | 7 | 8 | def test_single_line(): 9 | csvdata = "example.org" 10 | origin = "csvfile" 11 | 12 | parser = BlocklistParserCSV() 13 | bl = parser.parse_blocklist(csvdata, origin) 14 | assert len(bl) == 0 15 | 16 | 17 | def test_header_only(): 18 | csvdata = "domain,severity,public_comment" 19 | origin = "csvfile" 20 | 21 | parser = BlocklistParserCSV() 22 | bl = parser.parse_blocklist(csvdata, origin) 23 | assert len(bl) == 0 24 | 25 | 26 | def test_2_blocks(): 27 | csvdata = """domain,severity 28 | example.org,silence 29 | example2.org,suspend 30 | """ 31 | origin = "csvfile" 32 | 33 | parser = BlocklistParserCSV() 34 | bl = parser.parse_blocklist(csvdata, origin) 35 | 36 | assert len(bl) == 2 37 | assert "example.org" in bl 38 | 39 | 40 | def test_4_blocks(): 41 | csvdata = """domain,severity,public_comment 42 | example.org,silence,"test 1" 43 | example2.org,suspend,"test 2" 44 | example3.org,noop,"test 3" 45 | example4.org,suspend,"test 4" 46 | """ 47 | origin = "csvfile" 48 | 49 | parser = BlocklistParserCSV() 50 | bl = parser.parse_blocklist(csvdata, origin) 51 | 52 | assert len(bl) == 4 53 | assert "example.org" in bl 54 | assert "example2.org" in bl 55 | assert "example3.org" in bl 56 | assert "example4.org" in bl 57 | 58 | assert bl["example.org"].severity.level == SeverityLevel.SILENCE 59 | assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND 60 | assert bl["example3.org"].severity.level == SeverityLevel.NONE 61 | assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND 62 | 63 | 64 | def test_ignore_comments(): 65 | csvdata = """domain,severity,public_comment,private_comment 66 | example.org,silence,"test 1","ignore me" 67 | example2.org,suspend,"test 2","ignote me also" 68 | example3.org,noop,"test 3","and me" 69 | example4.org,suspend,"test 4","also me" 70 | """ 71 | origin = "csvfile" 72 | 73 | parser = BlocklistParserCSV() 74 | bl = parser.parse_blocklist(csvdata, origin) 75 | 76 | assert len(bl) == 4 77 | assert "example.org" in bl 78 | assert "example2.org" in bl 79 | assert "example3.org" in bl 80 | assert "example4.org" in bl 81 | 82 | assert bl["example.org"].public_comment == "" 83 | assert bl["example.org"].private_comment == "" 84 | assert bl["example3.org"].public_comment == "" 85 | assert bl["example4.org"].private_comment == "" 86 | -------------------------------------------------------------------------------- /tests/test_parser_csv_mastodon.py: -------------------------------------------------------------------------------- 1 | """Tests of the CSV parsing 2 | """ 3 | 4 | from fediblockhole.blocklists import BlocklistParserMastodonCSV 5 | from fediblockhole.const import SeverityLevel 6 | 7 | 8 | def test_single_line(): 9 | csvdata = "example.org" 10 | origin = "csvfile" 11 | 12 | parser = BlocklistParserMastodonCSV() 13 | bl = parser.parse_blocklist(csvdata, origin) 14 | assert len(bl) == 0 15 | 16 | 17 | def test_header_only(): 18 | csvdata = "#domain,#severity,#public_comment" 19 | origin = "csvfile" 20 | 21 | parser = BlocklistParserMastodonCSV() 22 | bl = parser.parse_blocklist(csvdata, origin) 23 | assert len(bl) == 0 24 | 25 | 26 | def test_2_blocks(): 27 | csvdata = """domain,severity 28 | example.org,silence 29 | example2.org,suspend 30 | """ 31 | origin = "csvfile" 32 | 33 | parser = BlocklistParserMastodonCSV() 34 | bl = parser.parse_blocklist(csvdata, origin) 35 | 36 | assert len(bl) == 2 37 | assert "example.org" in bl 38 | 39 | 40 | def test_4_blocks(): 41 | csvdata = """domain,severity,public_comment 42 | example.org,silence,"test 1" 43 | example2.org,suspend,"test 2" 44 | example3.org,noop,"test 3" 45 | example4.org,suspend,"test 4" 46 | """ 47 | origin = "csvfile" 48 | 49 | parser = BlocklistParserMastodonCSV() 50 | bl = parser.parse_blocklist(csvdata, origin) 51 | 52 | assert len(bl) == 4 53 | assert "example.org" in bl 54 | assert "example2.org" in bl 55 | assert "example3.org" in bl 56 | assert "example4.org" in bl 57 | 58 | assert bl["example.org"].severity.level == SeverityLevel.SILENCE 59 | assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND 60 | assert bl["example3.org"].severity.level == SeverityLevel.NONE 61 | assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND 62 | 63 | 64 | def test_ignore_comments(): 65 | csvdata = """domain,severity,public_comment,private_comment 66 | example.org,silence,"test 1","ignore me" 67 | example2.org,suspend,"test 2","ignote me also" 68 | example3.org,noop,"test 3","and me" 69 | example4.org,suspend,"test 4","also me" 70 | """ 71 | origin = "csvfile" 72 | 73 | parser = BlocklistParserMastodonCSV() 74 | bl = parser.parse_blocklist(csvdata, origin) 75 | 76 | assert len(bl) == 4 77 | assert "example.org" in bl 78 | assert "example2.org" in bl 79 | assert "example3.org" in bl 80 | assert "example4.org" in bl 81 | 82 | assert bl["example.org"].public_comment == "" 83 | assert bl["example.org"].private_comment == "" 84 | assert bl["example3.org"].public_comment == "" 85 | assert bl["example4.org"].private_comment == "" 86 | -------------------------------------------------------------------------------- /tests/test_parser_json.py: -------------------------------------------------------------------------------- 1 | """Tests of the CSV parsing 2 | """ 3 | 4 | from fediblockhole.blocklists import BlocklistParserJSON 5 | from fediblockhole.const import SeverityLevel 6 | 7 | 8 | def test_json_parser(data_mastodon_json): 9 | 10 | parser = BlocklistParserJSON() 11 | bl = parser.parse_blocklist(data_mastodon_json, "test_json") 12 | 13 | assert len(bl) == 10 14 | assert "example.org" in bl 15 | assert "example2.org" in bl 16 | assert "example3.org" in bl 17 | assert "example4.org" in bl 18 | 19 | assert bl["example.org"].severity.level == SeverityLevel.SUSPEND 20 | assert bl["example2.org"].severity.level == SeverityLevel.SILENCE 21 | assert bl["example3.org"].severity.level == SeverityLevel.SUSPEND 22 | assert bl["example4.org"].severity.level == SeverityLevel.NONE 23 | 24 | 25 | def test_ignore_comments(data_mastodon_json): 26 | 27 | parser = BlocklistParserJSON() 28 | bl = parser.parse_blocklist(data_mastodon_json, "test_json") 29 | 30 | assert len(bl) == 10 31 | assert "example.org" in bl 32 | assert "example2.org" in bl 33 | assert "example3.org" in bl 34 | assert "example4.org" in bl 35 | 36 | assert bl["example.org"].public_comment == "" 37 | assert bl["example.org"].private_comment == "" 38 | assert bl["example3.org"].public_comment == "" 39 | assert bl["example4.org"].private_comment == "" 40 | -------------------------------------------------------------------------------- /tests/test_parser_rapidblockcsv.py: -------------------------------------------------------------------------------- 1 | """Tests of the Rapidblock CSV parsing 2 | """ 3 | 4 | from fediblockhole.blocklists import RapidBlockParserCSV 5 | from fediblockhole.const import SeverityLevel 6 | 7 | csvdata = ( 8 | """example.org\r\nsubdomain.example.org\r\nanotherdomain.org\r\ndomain4.org\r\n""" 9 | ) 10 | parser = RapidBlockParserCSV() 11 | 12 | 13 | def test_basic_rapidblock(): 14 | 15 | bl = parser.parse_blocklist(csvdata) 16 | assert len(bl) == 4 17 | assert "example.org" in bl 18 | assert "subdomain.example.org" in bl 19 | assert "anotherdomain.org" in bl 20 | assert "domain4.org" in bl 21 | 22 | 23 | def test_severity_is_suspend(): 24 | bl = parser.parse_blocklist(csvdata) 25 | 26 | for block in bl.values(): 27 | assert block.severity.level == SeverityLevel.SUSPEND 28 | -------------------------------------------------------------------------------- /tests/test_parser_rapidblockjson.py: -------------------------------------------------------------------------------- 1 | """Test parsing the RapidBlock JSON format 2 | """ 3 | 4 | from fediblockhole.blocklists import parse_blocklist 5 | from fediblockhole.const import SeverityLevel 6 | 7 | 8 | def test_parse_rapidblock_json(data_rapidblock_json): 9 | 10 | bl = parse_blocklist(data_rapidblock_json, "pytest", "rapidblock.json") 11 | 12 | assert "101010.pl" in bl 13 | assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND 14 | assert bl["101010.pl"].public_comment == "" 15 | 16 | assert "berserker.town" in bl 17 | assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND 18 | assert bl["berserker.town"].public_comment == "" 19 | assert bl["berserker.town"].private_comment == "" 20 | 21 | 22 | def test_parse_with_comments(data_rapidblock_json): 23 | 24 | bl = parse_blocklist( 25 | data_rapidblock_json, 26 | "pytest", 27 | "rapidblock.json", 28 | ["domain", "severity", "public_comment", "private_comment"], 29 | ) 30 | 31 | assert "101010.pl" in bl 32 | assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND 33 | assert bl["101010.pl"].public_comment == "cryptomining javascript, white supremacy" 34 | 35 | assert "berserker.town" in bl 36 | assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND 37 | assert bl["berserker.town"].public_comment == "freeze peach" 38 | --------------------------------------------------------------------------------