├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   └── ci.yml
├── .python-version-default
├── CHANGELOG.md
├── LICENSE
├── README.md
├── chart
    ├── .helmignore
    ├── Chart.yaml
    ├── fediblockhole.conf.toml
    ├── templates
    │   ├── _helpers.tpl
    │   ├── configmap-conf-toml.yaml
    │   └── cronjob-fediblock-sync.yaml
    └── values.yaml
├── container
    ├── .dockerignore
    └── Dockerfile
├── etc
    └── sample.fediblockhole.conf.toml
├── pyproject.toml
├── requirements.txt
├── samples
    ├── demo-allowlist-01.csv
    ├── demo-allowlist-02.csv
    └── demo-blocklist-01.csv
├── src
    └── fediblockhole
    │   ├── __init__.py
    │   ├── blocklists.py
    │   └── const.py
├── tests
    ├── conftest.py
    ├── fixtures
    │   ├── __init__.py
    │   ├── data-mastodon.json
    │   ├── data-noop-01.csv
    │   ├── data-rapidblock.json
    │   ├── data-silences-01.csv
    │   └── data-suspends-01.csv
    ├── helpers
    │   ├── __init__.py
    │   └── util.py
    ├── test_allowlist.py
    ├── test_blockseverity.py
    ├── test_cmdline.py
    ├── test_configfile.py
    ├── test_domainblock.py
    ├── test_merge_comments.py
    ├── test_merge_thresholds.py
    ├── test_mergeplan.py
    ├── test_parser_csv.py
    ├── test_parser_csv_mastodon.py
    ├── test_parser_json.py
    ├── test_parser_rapidblockcsv.py
    └── test_parser_rapidblockjson.py
└── uv.lock


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length: 88
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behaviour:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behaviour**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Environment**
24 | Help us understand where the code is running.
25 |  - OS/Distribution: [e.g. Linux/Ubuntu/Debian]
26 |  - Python version [e.g. Python 3.10]
27 |  - Mastodon version [e.g. Mastodon 4.10]
28 | 
29 | **Additional context**
30 | Add any other context about the problem here that could help us find and fix the bug.
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. E.g. "I am frustrated when [...]"
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: CI
  3 | 
  4 | on:
  5 |   merge_group:
  6 |   push:
  7 |     branches: [main, "ci-*"]
  8 |     tags: ["*"]
  9 |   pull_request:
 10 |     branches: [main, "ci-*"]
 11 |   workflow_dispatch:
 12 | 
 13 | env:
 14 |   FORCE_COLOR: "1"
 15 |   PIP_DISABLE_PIP_VERSION_CHECK: "1"
 16 |   PIP_NO_PYTHON_VERSION_WARNING: "1"
 17 | 
 18 | permissions: {}
 19 | 
 20 | jobs:
 21 |   build-package:
 22 |     name: Build & verify package
 23 |     runs-on: ubuntu-latest
 24 | 
 25 |     steps:
 26 |       - uses: actions/checkout@v4
 27 |         with:
 28 |           fetch-depth: 0
 29 | 
 30 |       - uses: hynek/build-and-inspect-python-package@v2
 31 |         id: baipp
 32 |         with:
 33 |           skip-wheel: true
 34 | 
 35 |     outputs:
 36 |       # Used to define the matrix for tests below. The value is based on
 37 |       # packaging metadata (trove classifiers).
 38 |       supported-python-versions: ${{ steps.baipp.outputs.supported_python_classifiers_json_array }}
 39 | 
 40 |   tests:
 41 |     name: Tests on ${{ matrix.python-version }}
 42 |     runs-on: ubuntu-latest
 43 |     needs: build-package
 44 | 
 45 |     strategy:
 46 |       fail-fast: false
 47 |       matrix:
 48 |         # Created by the build-and-inspect-python-package action above.
 49 |         python-version: ${{ fromJson(needs.build-package.outputs.supported-python-versions) }}
 50 | 
 51 |     steps:
 52 |       - name: Download pre-built packages
 53 |         uses: actions/download-artifact@v4
 54 |         with:
 55 |           name: Packages
 56 |           path: dist
 57 |       - run: tar xf dist/*.tar.gz --strip-components=1
 58 |       - uses: actions/setup-python@v5
 59 |         with:
 60 |           python-version: ${{ matrix.python-version }}
 61 |           allow-prereleases: true
 62 |       - uses: hynek/setup-cached-uv@v2
 63 |       - run: uv venv
 64 |       - run: uv pip install .[dev]
 65 |       - run: |
 66 |           cd tests
 67 |           uv run coverage run -p -m pytest
 68 | 
 69 |       - name: Upload coverage data
 70 |         uses: actions/upload-artifact@v4
 71 |         with:
 72 |           name: coverage-data-${{ matrix.python-version }}
 73 |           path: tests/.coverage.*
 74 |           include-hidden-files: true
 75 | 
 76 |   coverage:
 77 |     name: Combine & check coverage
 78 |     runs-on: ubuntu-latest
 79 |     needs: tests
 80 | 
 81 |     steps:
 82 |       - name: Download pre-built packages
 83 |         uses: actions/download-artifact@v4
 84 |         with:
 85 |           name: Packages
 86 |           path: dist
 87 |       - run: tar xf dist/*.tar.gz --strip-components=1
 88 |       - uses: actions/setup-python@v5
 89 |         with:
 90 |           python-version-file: .python-version-default
 91 |       - uses: hynek/setup-cached-uv@v2
 92 | 
 93 |       - name: Download coverage data
 94 |         uses: actions/download-artifact@v4
 95 |         with:
 96 |           pattern: coverage-data-*
 97 |           merge-multiple: true
 98 | 
 99 |       - run: uv venv
100 |       - name: Combine coverage & fail if it's <70%.
101 |         run: |
102 |           uv pip install --system coverage[toml]
103 | 
104 |           uv run coverage combine
105 |           uv run coverage html --skip-covered --skip-empty
106 | 
107 |           # Report and write to summary.
108 |           uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
109 | 
110 |           # Report again and fail if under 70%.
111 |           uv run coverage report --fail-under=70
112 | 
113 |       - name: Upload HTML report if check failed.
114 |         uses: actions/upload-artifact@v4
115 |         with:
116 |           name: html-report
117 |           path: htmlcov
118 |         if: ${{ failure() }}
119 | 
120 |   docs:
121 |     name: Build docs & run doctests
122 |     runs-on: ubuntu-latest
123 |     needs: build-package
124 |     steps:
125 |       - name: Download pre-built packages
126 |         uses: actions/download-artifact@v4
127 |         with:
128 |           name: Packages
129 |           path: dist
130 |       - run: tar xf dist/*.tar.gz --strip-components=1
131 |       - uses: actions/setup-python@v5
132 |         with:
133 |           # Keep in sync with tox/docs and .readthedocs.yaml.
134 |           python-version: "3.12"
135 |       - uses: hynek/setup-cached-uv@v2
136 |       - run: uv venv
137 | 
138 |   # pyright:
139 |   #   name: Check types using pyright
140 |   #   runs-on: ubuntu-latest
141 |   #   steps:
142 |   #     - uses: actions/checkout@v4
143 |   #     - uses: actions/setup-python@v5
144 |   #       with:
145 |   #         python-version-file: .python-version-default
146 |   #     - uses: hynek/setup-cached-uv@v2
147 | 
148 |   #     - run: uv venv
149 |   #     - run: uv pip install .[dev] pyright typing
150 |   #     - run: uv run pyright src
151 | 
152 |   install-dev:
153 |     name: Verify dev env
154 |     runs-on: ubuntu-latest
155 | 
156 |     steps:
157 |       - uses: actions/checkout@v4
158 |       - uses: actions/setup-python@v5
159 |         with:
160 |           python-version-file: .python-version-default
161 |       - uses: hynek/setup-cached-uv@v2
162 | 
163 |       - run: uv venv --python $(cat .python-version-default)
164 |       - run: uv pip install -e .[dev]
165 | 
166 |       - name: Ensure we can import the fediblockhole package
167 |         run: |
168 |           source .venv/bin/activate
169 | 
170 |           python -Ic 'import fediblockhole; print(fediblockhole.__version__)'
171 | 
172 |   # Ensure everything required is passing for branch protection.
173 |   required-checks-pass:
174 |     if: always()
175 | 
176 |     needs:
177 |       - coverage
178 |       - docs
179 |       - install-dev
180 | 
181 |     runs-on: ubuntu-latest
182 | 
183 |     steps:
184 |       - name: Decide whether the needed jobs succeeded or failed
185 |         uses: re-actors/alls-green@release/v1
186 |         with:
187 |           jobs: ${{ toJSON(needs) }}
188 | 


--------------------------------------------------------------------------------
/.python-version-default:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | Notable changes to the project will be documented in this changelog.
  4 | 
  5 | This project uses [Semantic Versioning] and generally follows the conventions of [Keep A Changelog].
  6 | 
  7 | ## [Unreleased]
  8 | 
  9 | ## [v0.4.6] - 2024-11-01
 10 | 
 11 | ### Added
 12 | 
 13 | - Added use of Flake8 as linter to improve code consistency (59d306a)
 14 | - Added use of the Black code formatting tool to improve code consistency (59d306a)
 15 | - Cleaned up code lint and formatting (ffb8219)
 16 | - Added issue templates to help new contributors get their issues addressed quicker (3b655d6)
 17 | - Added ability to use environment variables to provide BearerTokens. Thanks to @offbyone (8c5761e)
 18 | 
 19 | ## [v0.4.5] - 2023-12-30
 20 | 
 21 | ### Added
 22 | 
 23 | - Added `override_private_comment` option for annotation of automated blocks (4d12bac)
 24 | - Added blocklist audit file option to debug and track blocklist operations (9200fc3)
 25 | 
 26 | ## [v0.4.4] - 2023-07-09
 27 | 
 28 | ### Added
 29 | 
 30 | - Added citation for creators of #Fediblock (a64875b)
 31 | - Added parser for Mastodon 4.1 blocklist CSV format (9f95f14)
 32 | - Added container support (76d5b61)
 33 | 
 34 | ### Fixed
 35 | 
 36 | - Use __future__.annotations so type hints work with Python < 2.9 (8265639)
 37 | - test util no longer tries to load default config file if conf tomldata is empty. (2da57b2)
 38 | 
 39 | ## [v0.4.3] - 2023-02-13
 40 | 
 41 | ### Added
 42 | 
 43 | - Added Mastodon public API parser type because #33 (9fe9342)
 44 | - Added ability to set scheme when talking to instances (9fe9342)
 45 | - Added tests of comment merging. (fb3a7ec)
 46 | - Added blocklist thresholds. (bb1d89e)
 47 | - Added logging to help debug threshold-based merging. (b67ff0c)
 48 | - Added extra documentation on configuring thresholds. (6c72af8)
 49 | - Updated documentation to reflect Mastodon v4.1.0 changes to the application scopes screen. (b92dd21)
 50 | 
 51 | ### Changed
 52 | 
 53 | - Dropped minimum Python version to 3.6 (df3c16f)
 54 | - Don't merge comments if new comment is empty. (b8aa11e)
 55 | - Tweaked comment merging to pass tests. (fb3a7ec)
 56 | 
 57 | ## [v0.4.2] - 2023-01-19
 58 | 
 59 | ### Fixed
 60 | 
 61 | - Blockdata var already converted to _asdict() (8d3b9da)
 62 | 
 63 | ## [v0.4.1] - 2023-01-15
 64 | 
 65 | Allowlist support.
 66 | 
 67 | ### Added
 68 | 
 69 | - Allowlists just remove blocks from merged list before push. (a25773f)
 70 | - Added helper submodule for testing utils (bf48a96)
 71 | - Added basic tests of allowlist config args. (a3d3571)
 72 | - Added test cases for cmdline parsing. (11accf3)
 73 | - Added test cases for configfile parsing. (11accf3)
 74 | - Added documentation on allowlists. (26f5464)
 75 | - Fixed bug in how DomainBlock defaults handle reject_media, reject_reports. (6d4e18b)
 76 | - Added support for allowlists. Updated docstring for merge_blocklists() (7a31c33)
 77 | - Added DomainBlock type hint to update_known_block(). (69c28f1)
 78 | - Use ._asdict() to get info to pass to add block API call. (69c28f1)
 79 | 
 80 | ### Changed
 81 | 
 82 | - Updated README to explain allowlist mechanism. (dc4bbd7)
 83 | - Edited sample config to better explain URL source (9bd7914)
 84 | - Restructured argparsing for easier testing. (11accf3)
 85 | - str2bool() now converts '' to False. Added some extra debug logging of blocklist parsing. (894b133)
 86 | - Updated documentation to explain need for `admin:read` access to fetch followers stats. (2cec9e1)
 87 | - Aligned API call rate limit with server default. (55dad3f)
 88 | 
 89 | ### Removed
 90 | 
 91 | - Remove implied setting of reject_media/reports if severity is set to 'suspend'. (3aa2e37)
 92 | 
 93 | ### Fixed
 94 | 
 95 | - Fixed bug: mergeplan in config file was ignored. Reported in #22 (11accf3)
 96 | - Fixed bug in _asdict() of severity level. (9817c99)
 97 | - Fix DomainBlock.id usage during __iter__() (a718af5)
 98 | 
 99 | ## [v0.4.0] - 2023-01-13
100 | 
101 | Substantial changes to better support multiple blocklist formats
102 | 
103 | ### Added
104 | 
105 | - Added support for RapidBlock blocklists, both CSV and JSON formats. (327a44d)
106 | - Added support for per-instance-source import_fields. (327a44d)
107 | - Updated sample config to include new formats. (327a44d)
108 | - A BlockSeverity of 'suspend' implies reject_media and reject_reports. (327a44d)
109 | - Added ability to limit max severity per-URL source. (10011a5)
110 | - Added boolean fields like 'reject_reports' to mergeplan handling. (66f0373)
111 | - Added tests for boolean merge situations. (66f0373)
112 | - Various other test cases added.
113 | 
114 | ### Changed
115 | 
116 | - Refactored to add a DomainBlock object. (10011a5)
117 | - Refactored to use a BlockParser structure. (10011a5)
118 | - Improved method for checking if changes are needed. (10011a5)
119 | - Refactored fetch from URLs and instances. (327a44d)
120 | - Improved check_followed_severity() behaviour. (327a44d)
121 | - Changed API delay to be in calls per hour. (327a44d)
122 | - Improved comment merging. (0a6eec4)
123 | - Clarified logic in apply_mergeplan() for boolean fields. (66f0373)
124 | - Updated README documentation. (ee9625d)
125 | - Aligned API call rate limit with server default. (55dad3f)
126 | 
127 | ### Removed
128 | 
129 | - Removed redundant global vars. (327a44d)
130 | 
131 | ### Fixed
132 | 
133 | - Fixed bug in severity change detection. (e0d40b5)
134 | - Fix DomainBlock.id usage during __iter__() (a718af5)
135 | 
136 | ## [v0.3.0] - 2023-01-11
137 | 
138 | ### Added
139 | 
140 | - Added args to show version information. (1d0649a)
141 | - Added timeout to requests calls. (23b8833)
142 | - Added CHANGELOG.md (ca9d958)
143 | 
144 | ### Changed
145 | 
146 | - Changed min Python version to v3.10. (f37ab70)
147 | 
148 | ## [v0.2.1] - 2023-01-10
149 | 
150 | ### Added
151 | 
152 | - User-Agent is set to FediBlockHole to identify ourselves to remote servers. (04d9eea)
153 | - Adding packaging to prepare for submission to PyPI. (4ab369f)
154 | - Added ability to set max severity level if an instance has followers of accounts on a to-be-blocked domain. (5518421)
155 | - Added ability to read domain_blocks from instances that make the list public. (4ef84b5)
156 | - Skip obfuscated domains when building the merged blocklist. (4ef84b5)
157 | 
158 | ### Changed
159 | 
160 | - Updated documentation in README and the sample config. (68a2c93)
161 | 
162 | ### Fixed
163 | 
164 | - Fixed a bug in config enablement of intermediate blocklists saving. (5518421)
165 | 
166 | ## Before 2023-01-10
167 | 
168 | - Initial rough versions that were not packaged.
169 | 
170 | <!-- Links -->
171 | [keep a changelog]: https://keepachangelog.com/en/1.0.0/
172 | [semantic versioning]: https://semver.org/spec/v2.0.0.html
173 | 
174 | <!-- Versions -->
175 | [unreleased]: https://github.com/eigenmagic/fediblockhole/compare/v0.4.2...HEAD
176 | [v0.4.2]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.2
177 | [v0.4.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.1
178 | [v0.4.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.0
179 | [v0.3.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.3.0
180 | [v0.2.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.2.1


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <https://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FediBlockHole
  2 | 
  3 | A tool for keeping a Mastodon instance blocklist synchronised with remote lists.
  4 | 
  5 | The broad design goal for FediBlockHole is to support pulling in a list of
  6 | blocklists from a set of trusted sources, merge them into a combined blocklist,
  7 | and then push that merged list to a set of managed instances.
  8 | 
  9 | Mastodon admins can choose who they think maintain quality lists and subscribe
 10 | to them, helping to distribute the load for maintaining blocklists among a
 11 | community of people. Control ultimately rests with the admins themselves so they
 12 | can outsource as much, or as little, of the effort to others as they deem
 13 | appropriate.
 14 | 
 15 | Inspired by the way PiHole works for maintaining a set of blocklists of adtech
 16 | domains. Builds on the work of
 17 | [@CaribenxMarciaX@scholar.social](https://scholar.social/@CaribenxMarciaX) and
 18 | [@gingerrroot@kitty.town](https://kitty.town/@gingerrroot) who started the
 19 | #Fediblock hashtag and did a lot of advocacy around it, often at great personal
 20 | cost.
 21 | 
 22 | ## Features
 23 | 
 24 | ### Blocklist Sources
 25 | 
 26 |  - Read domain block lists from other instances via the Mastodon API.
 27 |  - Supports both public lists (no auth required) and 'admin' lists requiring
 28 |    authentication to an instance.
 29 |  - Read domain block lists from arbitrary URLs, including local files.
 30 |  - Supports CSV and JSON format blocklists
 31 |  - Supports RapidBlock CSV and JSON format blocklists
 32 | 
 33 | ### Blocklist Export/Push
 34 | 
 35 |  - Push a merged blocklist to a set of Mastodon instances.
 36 |  - Export per-source, unmerged block lists to local files, in CSV format.
 37 |  - Export merged blocklists to local files, in CSV format.
 38 |  - Read block lists from multiple remote instances
 39 |  - Read block lists from multiple URLs, including local files
 40 |  - Write a unified block list to a local CSV file
 41 |  - Push unified blocklist updates to multiple remote instances
 42 |  - Control import and export fields
 43 | 
 44 | ### Flexible Configuration
 45 | 
 46 |  - Provides (hopefully) sensible defaults to minimise first-time setup.
 47 |  - Global and fine-grained configuration options available for those complex situations that crop up sometimes.
 48 |  - Allowlists to override blocks in blocklists to ensure you never block instances you want to keep.
 49 |  - Blocklist thresholds if you want to only block when an instance shows up in multiple blocklists.
 50 | 
 51 | ## Installing
 52 | 
 53 | Installable using `pip`.
 54 | 
 55 | ```
 56 | python3 -m pip install fediblockhole
 57 | ```
 58 | 
 59 | Install from source by cloning the repo, `cd fediblockhole` and run:
 60 | 
 61 | ```
 62 | python3 -m pip install .
 63 | ```
 64 | 
 65 | Installation adds a commandline tool: `fediblock-sync`
 66 | 
 67 | Instance admins who want to use this tool for their instance will need to add an
 68 | Application at `https://<instance-domain>/settings/applications/` so they can
 69 | authorize the tool to create and update domain blocks with an OAuth token.
 70 | 
 71 | More on authorization by token below.
 72 | 
 73 | ### Reading remote instance blocklists
 74 | 
 75 | If a remote instance makes its domain blocks public, you don't need
 76 | a token to read them.
 77 | 
 78 | If a remote instance only shows its domain blocks to local accounts
 79 | you'll need to have a token with `read:blocks` authorization set up.
 80 | If you have an account on that instance, you can get a token by setting up a new
 81 | Application at `https://<instance-domain>/settings/applications/`.
 82 | 
 83 | To read admin blocks from a remote instance, you'll need to ask the instance
 84 | admin to add a new Application at
 85 | `https://<instance-domain>/settings/applications/` and then tell you the access
 86 | token.
 87 | 
 88 | The application needs the `admin:read:domain_blocks` OAuth scope. You can allow
 89 | full `admin:read` access, but be aware that this authorizes someone to read all
 90 | the data in the instance. That's asking a lot of a remote instance admin who
 91 | just wants to share domain_blocks with you.
 92 | 
 93 | The `admin:read:domain_blocks` scope is available as of Mastodon v4.1.0, but for
 94 | earlier versions admins will need to use the manual method described below.
 95 | 
 96 | You can update the scope for your application in the database directly like
 97 | this:
 98 | 
 99 | ```
100 | UPDATE oauth_applications as app
101 |   SET scopes = 'admin:read:domain_blocks'
102 |   FROM oauth_access_tokens as tok
103 |   WHERE app.id = tok.application_id
104 |   AND app.name = '<the_app_name>'
105 | ;
106 | ```
107 | 
108 | When that's done, regenerate the token (so it has the new scopes) in the
109 | application screen in the instance GUI. FediBlockHole should then able to use
110 | the app token to read domain blocks via the API, but nothing else.
111 | 
112 | Alternately, you could ask the remote instance admin to set up FediBlockHole and
113 | use it to dump out a CSV blocklist from their instance and then put it somewhere
114 | trusted parties can read it. Then you can define the blocklist as a URL source,
115 | as explained below.
116 | 
117 | ### Writing instance blocklists
118 | 
119 | To write domain blocks into an instance requires both the `admin:read` and
120 | `admin:write:domain_blocks` OAuth scopes.
121 | 
122 | The tool needs `admin:read:domain_blocks` scope to read the current list of
123 | domain blocks so we update ones that already exist, rather than trying to add
124 | all new ones and clutter up the instance.
125 | 
126 | `admin:read` access is needed to check if the instance has any accounts that
127 | follow accounts on a domain that is about to get `suspend`ed and automatically
128 | drop the block severity to `silence` level so people have time to migrate
129 | accounts before a full defederation takes effect. Unfortunately, the statistics
130 | measure used to learn this information requires `admin:read` scope.
131 | 
132 | You can add `admin:read` scope in the application admin screen. Please be aware
133 | that this grants full read access to all information in the instance to the
134 | application token, so make sure you keep it a secret. At least remove
135 | world-readable permission to any config file you put it in, e.g.:
136 | 
137 | ```
138 | chmod o-r <configfile>
139 | ```
140 | 
141 | You can also grant full `admin:write` scope to the application, but if you'd
142 | prefer to keep things more tightly secured, limit the scope to
143 | `admin:read:domain_blocks`.
144 | 
145 | Again, this scope is only available in the application config screen as of
146 | Mastodon v4.1.0. If your instance is on an earlier version, you'll need to use
147 | SQL to set the scopes in the database and then regenerate the token:
148 | 
149 | ```
150 | UPDATE oauth_applications as app
151 |   SET scopes = 'admin:read admin:write:domain_blocks'
152 |   FROM oauth_access_tokens as tok
153 |   WHERE app.id = tok.application_id
154 |   AND app.name = '<the_app_name>'
155 | ;
156 | ```
157 | 
158 | When that's done, FediBlockHole should be able to use its token to authorise
159 | adding or updating domain blocks via the API.
160 | 
161 | ## Using the tool
162 | 
163 | Run the tool like this:
164 | 
165 | ```
166 | fediblock-sync -c <configfile_path>
167 | ```
168 | 
169 | If you put the config file in `/etc/default/fediblockhole.conf.toml` you don't
170 | need to pass in the config file path.
171 | 
172 | For a list of possible configuration options, check the `--help`.
173 | 
174 | You can also read the heavily commented sample configuration file in the repo at
175 | [etc/sample.fediblockhole.conf.toml](https://github.com/eigenmagic/fediblockhole/blob/main/etc/sample.fediblockhole.conf.toml).
176 | 
177 | ## Configuring
178 | 
179 | Once you have your applications and tokens and scopes set up, create a
180 | configuration file for FediBlockHole to use. You can put it anywhere and use the
181 | `-c <configfile>` commandline parameter to tell FediBlockHole where it is.
182 | 
183 | Or you can use the default location of `/etc/default/fediblockhole.conf.toml`.
184 | 
185 | As the filename suggests, FediBlockHole uses TOML syntax.
186 | 
187 | There are 4 key sections:
188 |  
189 |  1. `blocklist_urls_sources`: A list of URLs to read blocklists from
190 |  1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API
191 |  1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API
192 |  1. `allowlist_url_sources`: A list of URLs to read allowlists from
193 | 
194 | More detail on configuring the tool is provided below.
195 | 
196 | ### URL sources
197 | 
198 | The URL sources is a list of URLs to fetch blocklists from.
199 | 
200 | Supported formats are currently:
201 | 
202 |  - Comma-Separated Values (CSV)
203 |  - JSON
204 |  - Mastodon v4.1 flavoured CSV
205 |  - RapidBlock CSV
206 |  - RapidBlock JSON
207 | 
208 | Blocklists must provide a `domain` field, and should provide a `severity` field.
209 | 
210 | `domain` is the domain name of the instance to be blocked/limited.
211 | 
212 | `severity` is the severity level of the block/limit. Supported values are: `noop`, `silence`, and `suspend`.
213 | 
214 | Optional fields that the tool understands are `public_comment`, `private_comment`, `reject_media`, `reject_reports`, and `obfuscate`.
215 | 
216 | #### CSV format
217 | 
218 | A CSV format blocklist must contain a header row with at least a `domain` and `severity` field.
219 | 
220 | Optional fields, as listed about, may also be included.
221 | 
222 | #### Mastodon v4.1 CSV format
223 | 
224 | As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
225 | infinite wisdom, the Mastodon devs decided that field names should begin with a
226 | `#` character in the header, unlike the field names in the JSON output via the
227 | API… or in pretty much any other CSV file anywhere else.
228 | 
229 | Setting the format to `mastodon_csv` will strip off the `#` character when
230 | parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
231 | other CSV formatted blocklist.
232 | 
233 | #### JSON format
234 | 
235 | JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.
236 | 
237 | This is a list of dictionaries, with at minimum a `domain` field, and preferably
238 | a `severity` field. The other optional fields are, well, optional.
239 | 
240 | #### RapidBlock CSV format
241 | 
242 | The RapidBlock CSV format has no header and a single field, so it's not
243 | _strictly_ a CSV file as there are no commas separating values. It is basically
244 | just a list of domains to block, separated by '\r\n'.
245 | 
246 | When using this format, the tool assumes the `severity` level is `suspend`.
247 | 
248 | #### RapidBlock JSON format
249 | 
250 | The RapidBlock JSON format provides more detailed information about domain
251 | blocks, but is still somewhat limited.
252 | 
253 | It has a single `isBlocked` flag indicating if a domain should be blocked or
254 | not. There is no support for the 'silence' block level.
255 | 
256 | There is no support for 'reject_media' or 'reject_reports' or 'obfuscate'.
257 | 
258 | All comments are public, by virtue of the public nature of RapidBlock.
259 | 
260 | ### Instance sources
261 | 
262 | The tool can also read domain_blocks from instances directly.
263 | 
264 | The configuration is a list of dictionaries of the form:
265 | ```
266 | { domain = '<domain_name>', token = '<BearerToken>', admin = false }
267 | ```
268 | 
269 | The `domain` is the fully-qualified domain name of the API host for an instance
270 | you want to read domain blocks from. 
271 | 
272 | The `token` is an optional OAuth token for the application that's configured in
273 | the instance to allow you to read domain blocks, as discussed above.
274 | 
275 | The `token` can also be specified using environment variables. This provides
276 | improved security compared to storing the OAuth token in a configuration file,
277 | but it will require the environment variable to be set so that FediBlockHole can
278 | access it. See below in [Instance destinations](#instance-destinations) for more
279 | detail on how to use environment variables to provide authentication tokens.
280 | 
281 | `admin` is an optional field that tells the tool to use the more detailed admin
282 | API endpoint for domain_blocks, rather than the more public API endpoint that
283 | doesn't provide as much detail. You will need a `token` that's been configured to
284 | permit access to the admin domain_blocks scope, as detailed above.
285 | 
286 | ### Instance destinations
287 | 
288 | The tool supports pushing a unified blocklist to multiple instances.
289 | 
290 | Configure the list of instances you want to push your blocklist to in the
291 | `blocklist_instance_destinations` list. Each entry is of the form:
292 | 
293 | ```
294 | { domain = '<domain_name>', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
295 | ```
296 | 
297 | The field `domain` is required. It is the fully-qualified domain name of the
298 | instance you want to push to. 
299 | 
300 | A BearerToken is also required, for authenticating with the instance. It can be provided in two ways:
301 | 
302 | 1. A token can be provided directly in the entry as a `token` field, like this:
303 |     ```
304 |     { domain = '<domain_name>', token = '<BearerToken>', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
305 |     ```
306 |     This was the only mechanism available up to version 0.4.5 of Fediblockhole.
307 | 
308 | 1. A token can be provided from the environment.
309 | 
310 |     If a token is not directly provided with the `token` field, Fediblockhole will
311 |     look for an environment variable that contains the token.
312 | 
313 |     By default, the name of the environment variable will be the domain name
314 |     converted to upper case and with dot/period characters converted to
315 |     underscores, and the suffix `_TOKEN`. For example, the token variable for the
316 |     domain `eigenmagic.net` would be `EIGENMAGIC_NET_TOKEN`.
317 | 
318 |     You can also specify the environment variable to look for, using the
319 |     `token_env_var` field, like this:
320 |     ```
321 |     { domain = '<domain_name>', token_env_var = 'MY_CUSTOM_DOMAIN_TOKEN', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
322 |     ```
323 | 
324 |     Fediblockhole will then look for a token in the `MY_CUSTOM_DOMAIN_TOKEN` environment variable.
325 | 
326 |     If a specific `token_env_var` is provided, the default variable name will
327 |     not be used. If both the `token` and `token_env_var` fields are provided,
328 |     the token provided in the `token` field will be used, and a warning will be
329 |     issued to notify you that you might have misconfigured things.
330 | 
331 | 
332 | The BearerToken is
333 | an application token with both `admin:read:domain_blocks` and
334 | `admin:write:domain_blocks` authorization.
335 | 
336 | The fields `max_followed_severity` and `import_fields` are optional.
337 | 
338 | The optional `import_fields` setting allows you to restrict which fields are
339 | imported from each instance. If you want to import the `reject_reports` settings
340 | from one instance, but no others, you can use the `import_fields` setting to do
341 | it. **Note:** The `domain` and `severity` fields are always imported.
342 | 
343 | The optional `max_severity` setting limits the maximum severity you will allow a
344 | remote blocklist to set. This helps you import a list from a remote instance but
345 | only at the `silence` level, even if that remote instance has a block at
346 | `suspend` level. If not set, defaults to `suspend`.
347 | 
348 | The optional `max_followed_severity` setting sets a per-instance limit on the
349 | severity of a domain_block if there are accounts on the instance that follow
350 | accounts on the domain to be blocked. If `max_followed_severity` isn't set, it
351 | defaults to `silence`.
352 | 
353 | This setting exists to give people time to move off an instance that is about to
354 | be defederated and bring their followers from your instance with them. Without
355 | it, if a new `suspend` block appears in any of the blocklists you subscribe to (or
356 | a block level increases from `silence` to `suspend`) and you're using the default
357 | `max` mergeplan, the tool would immediately suspend the instance, cutting
358 | everyone on the blocked instance off from their existing followers on your
359 | instance, even if they move to a new instance. If you actually want that
360 | outcome, you can set `max_followed_severity = 'suspend'` and use the `max`
361 | mergeplan.
362 | 
363 | Once the follow count drops to 0 on your instance, the tool will automatically
364 | use the highest severity it finds again (if you're using the `max` mergeplan).
365 | 
366 | ### Allowlists
367 | 
368 | Sometimes you might want to completely ignore the blocklist definitions for
369 | certain domains. That's what allowlists are for.
370 | 
371 | Allowlists remove any domain in the list from the merged list of blocks before
372 | the merged list is saved out to a file or pushed to any instance.
373 | 
374 | Allowlists can be in any format supported by `blocklist_urls_sources` but ignore
375 | all fields that aren't `domain`.
376 | 
377 | You can also allow domains on the commandline by using the `-A` or `--allow`
378 | flag and providing the domain name to allow. You can use the flag multiple
379 | times to allow multiple domains.
380 | 
381 | It is probably wise to include your own instance domain in an allowlist so you
382 | don't accidentally defederate from yourself.
383 | 
384 | ## More advanced configuration
385 | 
386 | For a list of possible configuration options, check the `--help` and read the
387 | sample configuration file in `etc/sample.fediblockhole.conf.toml`.
388 | 
389 | ### save_intermediate
390 | 
391 | This option tells the tool to save the unmerged blocklists it fetches from
392 | remote instances and URLs into separate files. This is handy for debugging, or
393 | just to have a non-unified set of blocklist files.
394 | 
395 | Works with the `savedir` setting to control where to save the files.
396 | 
397 | These are parsed blocklists, not the raw data, and so will be affected by `import_fields`.
398 | 
399 | The filename is based on the URL or domain used so you can tell where each list came from.
400 | 
401 | ### savedir
402 | 
403 | Sets where to save intermediate blocklist files. Defaults to `/tmp`.
404 | 
405 | ### blocklist_auditfile
406 | 
407 | If provided, will save an audit file of counts and percentages by domain. Useful for debugging 
408 | thresholds. Defaults to None.
409 | 
410 | ### no_push_instance
411 | 
412 | Defaults to False.
413 | 
414 | When set, the tool won't actually try to push the unified blocklist to any
415 | configured instances.
416 | 
417 | If you want to see what the tool would try to do, but not actually apply any
418 | updates, use `--dryrun`.
419 | 
420 | ### no_fetch_url
421 | 
422 | Skip the fetching of blocklists from any URLs that are configured.
423 | 
424 | ### no_fetch_instance
425 | 
426 | Skip the fetching of blocklists from any remote instances that are configured.
427 | 
428 | ### override_private_comment
429 | 
430 | Defaults to None.
431 | 
432 | Stamp all *new* blocks pushed to a remote server with this comment or code. 
433 | Helps to identify blocks you've created on a server via Fediblockhole versus ones that
434 | already existed.
435 | 
436 | ### mergeplan
437 | 
438 | If two (or more) blocklists define blocks for the same domain, but they're
439 | different, `mergeplan` tells the tool how to resolve the conflict.
440 | 
441 | `max` is the default. It uses the _highest_ severity block it finds as the one
442 | that should be used in the unified blocklist.
443 | 
444 | `min` does the opposite. It uses the _lowest_ severity block it finds as the one
445 | to use in the unified blocklist.
446 | 
447 | A full discussion of severities is beyond the scope of this README, but here is
448 | a quick overview of how it works for this tool.
449 | 
450 | The severities are:
451 | 
452 |  - **noop**, level 0: This is essentially an 'unblock' but you can include a
453 |    comment.
454 |  - **silence**, level 1: A silence adds friction to federation with an instance.
455 |  - **suspend**, level 2: A full defederation with the instance.
456 | 
457 | With `mergeplan` set to `max`, _silence_ would take precedence over _noop_, and
458 | _suspend_ would take precedence over both.
459 | 
460 | With `mergeplan` set to `min`, _silence_ would take precedence over _suspend_,
461 | and _noop_ would take precedence over both.
462 | 
463 | You would want to use `max` to ensure that you always block with whichever your
464 | harshest fellow admin thinks should happen.
465 | 
466 | You would want to use `min` to ensure that your blocks do what your most lenient
467 | fellow admin thinks should happen.
468 | 
469 | ### import_fields
470 | 
471 | `import_fields` controls which fields will be imported from remote
472 | instances and URL blocklists, and which fields are pushed to instances from the
473 | unified blocklist.
474 | 
475 | The fields `domain` and `severity` are always included, so only define extra
476 | fields, if you want them.
477 | 
478 | You can't export fields you haven't imported, so `export_fields` should be a
479 | subset of `import_fields`, but you can run the tool multiple times. You could,
480 | for example, include lots of fields for an initial import to build up a
481 | comprehensive list for export, combined with the `--no-push-instances` option so
482 | you don't actually apply the full list to anywhere.
483 | 
484 | Then you could use a different set of options when importing so you have all the
485 | detail in a file, but only push `public_comment` to instances.
486 | 
487 | ### export_fields
488 | 
489 | `export_fields` controls which fields will get saved to the unified blocklist
490 | file, if you export one.
491 | 
492 | The fields `domain` and `severity` are always included, so only define extra
493 | fields, if you want them.


--------------------------------------------------------------------------------
/chart/.helmignore:
--------------------------------------------------------------------------------
 1 | # A helm chart's templates and default values can be packaged into a .tgz file.
 2 | # When doing that, not everything should be bundled into the .tgz file. This
 3 | # file describes what to not bundle.
 4 | #
 5 | # Manually added by us
 6 | # --------------------
 7 | #
 8 | 
 9 | # Boilerplate .helmignore from `helm create mastodon`
10 | # ---------------------------------------------------
11 | #
12 | # Patterns to ignore when building packages.
13 | # This supports shell glob matching, relative path matching, and
14 | # negation (prefixed with !). Only one pattern per line.
15 | .DS_Store
16 | # Common VCS dirs
17 | .git/
18 | .gitignore
19 | .bzr/
20 | .bzrignore
21 | .hg/
22 | .hgignore
23 | .svn/
24 | # Common backup files
25 | *.swp
26 | *.bak
27 | *.tmp
28 | *.orig
29 | *~
30 | # Various IDEs
31 | .project
32 | .idea/
33 | *.tmproj
34 | .vscode/
35 | 


--------------------------------------------------------------------------------
/chart/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: fediblockhole
 3 | description: FediBlockHole is a tool for keeping a Mastodon instance blocklist synchronised with remote lists.
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | # Application charts are a collection of templates that can be packaged into versioned archives
 8 | # to be deployed.
 9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 | 
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 1.1.0
19 | 
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | appVersion: 0.4.2
24 | 


--------------------------------------------------------------------------------
/chart/fediblockhole.conf.toml:
--------------------------------------------------------------------------------
 1 | # List of instances to read blocklists from.
 2 | # If the instance makes its blocklist public, no authorization token is needed.
 3 | #   Otherwise, `token` is a Bearer token authorised to read domain_blocks.
 4 | # If `admin` = True, use the more detailed admin API, which requires a token with a 
 5 | #   higher level of authorization.
 6 | # If `import_fields` are provided, only import these fields from the instance.
 7 | #   Overrides the global `import_fields` setting.
 8 | blocklist_instance_sources = [
 9 |   # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
10 |   # { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
11 |   # { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>', admin = true }, # admin access required
12 | ]
13 | 
14 | # List of URLs to read csv blocklists from
15 | # Format tells the parser which format to use when parsing the blocklist
16 | # max_severity tells the parser to override any severities that are higher than this value
17 | # import_fields tells the parser to only import that set of fields from a specific source
18 | blocklist_url_sources = [
19 |   # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
20 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
21 | 
22 | ]
23 | 
24 | ## These global allowlists override blocks from blocklists
25 | # These are the same format and structure as blocklists, but they take precedence
26 | allowlist_url_sources = [
27 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' },
28 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' },
29 | ]
30 | 
31 | # List of instances to write blocklist to
32 | blocklist_instance_destinations = [
33 |   # { domain = 'eigenmagic.net', token = '<read_write_token>', max_followed_severity = 'silence'},
34 | ]
35 | 
36 | ## Store a local copy of the remote blocklists after we fetch them
37 | #save_intermediate = true
38 | 
39 | ## Directory to store the local blocklist copies
40 | # savedir = '/tmp'
41 | 
42 | ## File to save the fully merged blocklist into
43 | # blocklist_savefile = '/tmp/merged_blocklist.csv'
44 | 
45 | ## Don't push blocklist to instances, even if they're defined above
46 | # no_push_instance = false
47 | 
48 | ## Don't fetch blocklists from URLs, even if they're defined above
49 | # no_fetch_url = false
50 | 
51 | ## Don't fetch blocklists from instances, even if they're defined above
52 | # no_fetch_instance = false
53 | 
54 | ## Set the mergeplan to use when dealing with overlaps between blocklists
55 | # The default 'max' mergeplan will use the harshest severity block found for a domain.
56 | # The 'min' mergeplan will use the lightest severity block found for a domain.
57 | # mergeplan = 'max'
58 | 
59 | ## Set which fields we import
60 | ## 'domain' and 'severity' are always imported, these are additional
61 | ## 
62 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate']
63 | 
64 | ## Set which fields we export
65 | ## 'domain' and 'severity' are always exported, these are additional
66 | ## 
67 | export_fields = ['public_comment']
68 | 


--------------------------------------------------------------------------------
/chart/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "fediblockhole.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 7 | {{- end }}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "fediblockhole.fullname" -}}
15 | {{- if .Values.fullnameOverride }}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
17 | {{- else }}
18 | {{- $name := default .Chart.Name .Values.nameOverride }}
19 | {{- if contains $name .Release.Name }}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
21 | {{- else }}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
23 | {{- end }}
24 | {{- end }}
25 | {{- end }}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "fediblockhole.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
32 | {{- end }}
33 | 
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "fediblockhole.labels" -}}
38 | helm.sh/chart: {{ include "fediblockhole.chart" . }}
39 | {{ include "fediblockhole.selectorLabels" . }}
40 | {{- if .Chart.AppVersion }}
41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
42 | {{- end }}
43 | app.kubernetes.io/managed-by: {{ .Release.Service }}
44 | {{- end }}
45 | 
46 | {{/*
47 | Selector labels
48 | */}}
49 | {{- define "fediblockhole.selectorLabels" -}}
50 | app.kubernetes.io/name: {{ include "fediblockhole.name" . }}
51 | app.kubernetes.io/instance: {{ .Release.Name }}
52 | {{- end }}
53 | 
54 | {{/*
55 | Rolling pod annotations
56 | */}}
57 | {{- define "fediblockhole.rollingPodAnnotations" -}}
58 | rollme: {{ .Release.Revision | quote }}
59 | checksum/config-configmap: {{ include ( print $.Template.BasePath "/configmap-conf-toml.yaml" ) . | sha256sum | quote }}
60 | {{- end }}
61 | 
62 | {{/*
63 | Create the default conf file path and filename
64 | */}}
65 | {{- define "fediblockhole.conf_file_path" -}}
66 | {{- default "/etc/default/" .Values.fediblockhole.conf_file.path }}
67 | {{- end }}
68 | {{- define "fediblockhole.conf_file_filename" -}}
69 | {{- default "fediblockhole.conf.toml" .Values.fediblockhole.conf_file.filename }}
70 | {{- end }}
71 | 


--------------------------------------------------------------------------------
/chart/templates/configmap-conf-toml.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 |   name: {{ include "fediblockhole.fullname" . }}-conf-toml
5 |   labels:
6 |     {{- include "fediblockhole.labels" . | nindent 4 }}
7 | data:
8 |   {{ (.Files.Glob "fediblockhole.conf.toml").AsConfig | nindent 4 }}
9 | 


--------------------------------------------------------------------------------
/chart/templates/cronjob-fediblock-sync.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.fediblockhole.cron.sync.enabled -}}
 2 | apiVersion: batch/v1
 3 | kind: CronJob
 4 | metadata:
 5 |   name: {{ include "fediblockhole.fullname" . }}-sync
 6 |   labels:
 7 |     {{- include "fediblockhole.labels" . | nindent 4 }}
 8 | spec:
 9 |   schedule: {{ .Values.fediblockhole.cron.sync.schedule }}
10 |   failedJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.failedJobsHistoryLimit }}
11 |   successfulJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.successfulJobsHistoryLimit }}
12 |   jobTemplate:
13 |     spec:
14 |       template:
15 |         metadata:
16 |           name: {{ include "fediblockhole.fullname" . }}-sync
17 |           {{- with .Values.jobAnnotations }}
18 |           annotations:
19 |             {{- toYaml . | nindent 12 }}
20 |           {{- end }}
21 |         spec:
22 |           restartPolicy: OnFailure
23 |           containers:
24 |             - name: {{ include "fediblockhole.fullname" . }}-sync
25 |               image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
26 |               imagePullPolicy: {{ .Values.image.pullPolicy }}
27 |               command:
28 |                 - fediblock-sync
29 |                 - -c
30 |                 - "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}"
31 |               volumeMounts:
32 |                 - name: config
33 |                   mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}"
34 |                   subPath: "{{- include "fediblockhole.conf_file_filename" . -}}"
35 |                 {{ if .Values.fediblockhole.allow_file.filename }}
36 |                 - name: allowfile
37 |                   mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.allow_file.filename -}}"
38 |                   subPath: "{{- .Values.fediblockhole.allow_file.filename -}}"
39 |                 {{ end }}
40 |                 {{ if .Values.fediblockhole.block_file.filename }}
41 |                 - name: blockfile
42 |                   mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.block_file.filename -}}"
43 |                   subPath: "{{- .Values.fediblockhole.block_file.filename -}}"
44 |                 {{ end }}
45 |           volumes:
46 |             - name: config
47 |               configMap:
48 |                 name: {{ include "fediblockhole.fullname" . }}-conf-toml
49 |                 items:
50 |                 - key: {{ include "fediblockhole.conf_file_filename" . | quote }}
51 |                   path: {{ include "fediblockhole.conf_file_filename" . | quote }}
52 |             {{ if .Values.fediblockhole.allow_file.filename }}
53 |             - name: allowfile
54 |               configMap:
55 |                 name: {{ include "fediblockhole.fullname" . }}-allow-csv
56 |                 items:
57 |                 - key: {{ .Values.fediblockhole.allow_file.filename | quote }}
58 |                   path: {{ .Values.fediblockhole.allow_file.filename | quote }}
59 |             {{ end }}
60 |             {{ if .Values.fediblockhole.block_file.filename }}
61 |             - name: blockfile
62 |               configMap:
63 |                 name: {{ include "fediblockhole.fullname" . }}-block-csv
64 |                 items:
65 |                 - key: {{ .Values.fediblockhole.block_file.filename | quote }}
66 |                   path: {{ .Values.fediblockhole.block_file.filename | quote }}
67 |             {{ end }}
68 | {{- end }}
69 | 


--------------------------------------------------------------------------------
/chart/values.yaml:
--------------------------------------------------------------------------------
 1 | image:
 2 |   repository: ghcr.io/cunningpike/fediblockhole
 3 |   # https://github.com/cunningpike/fediblockhole/pkgs/container/fediblockhole/versions
 4 |   #
 5 |   # alternatively, use `latest` for the latest release or `edge` for the image
 6 |   # built from the most recent commit
 7 |   #
 8 |   # tag: latest
 9 |   tag: ""
10 |   # use `Always` when using `latest` tag
11 |   pullPolicy: IfNotPresent
12 | 
13 | fediblockhole:
14 |   # location of the configuration file. Default is /etc/default/fediblockhole.conf.toml
15 |   conf_file:
16 |     path: ""
17 |     filename: ""
18 |   # Location of a local allowlist file. It is recommended that this file should at a
19 |   # minimum contain the web_domain of your own instance.
20 |   allow_file:
21 |     # Optionally, set the name of the file. This should match the data key in the
22 |     # associated ConfigMap
23 |     filename: ""
24 |   # Location of a local blocklist file.
25 |   block_file:
26 |     # Optionally, set the name of the file. This should match the data key in the
27 |     # associated ConfigMap
28 |     filename: ""
29 |   cron:
30 |     # -- run `fediblock-sync` every hour
31 |     sync:
32 |       # @ignored
33 |       enabled: false
34 |       # @ignored
35 |       schedule: "0 * * * *"
36 |       failedJobsHistoryLimit: 1
37 |       successfulJobsHistoryLimit: 3
38 | 
39 | # if you manually change the UID/GID environment variables, ensure these values
40 | # match:
41 | podSecurityContext:
42 |   runAsUser: 991
43 |   runAsGroup: 991
44 |   fsGroup: 991
45 | 
46 | # @ignored
47 | securityContext: {}
48 | 
49 | # -- Kubernetes manages pods for jobs and pods for deployments differently, so you might
50 | # need to apply different annotations to the two different sets of pods. The annotations
51 | # set with podAnnotations will be added to all deployment-managed pods.
52 | podAnnotations: {}
53 | 
54 | # -- The annotations set with jobAnnotations will be added to all job pods.
55 | jobAnnotations: {}
56 | 
57 | # -- Default resources for all Deployments and jobs unless overwritten
58 | resources: {}
59 |   # We usually recommend not to specify default resources and to leave this as a conscious
60 |   # choice for the user. This also increases chances charts run on environments with little
61 |   # resources, such as Minikube. If you do want to specify resources, uncomment the following
62 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
63 |   # limits:
64 |   #   cpu: 100m
65 |   #   memory: 128Mi
66 |   # requests:
67 |   #   cpu: 100m
68 |   #   memory: 128Mi
69 | 
70 | # @ignored
71 | nodeSelector: {}
72 | 
73 | # @ignored
74 | tolerations: []
75 | 
76 | # -- Affinity for all pods unless overwritten
77 | affinity: {}
78 | 


--------------------------------------------------------------------------------
/container/.dockerignore:
--------------------------------------------------------------------------------
1 | Dockerfile
2 | #README.md
3 | *.pyc
4 | *.pyo
5 | *.pyd
6 | __pycache__
7 | 


--------------------------------------------------------------------------------
/container/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official lightweight Python image.
 2 | # https://hub.docker.com/_/python
 3 | FROM python:slim
 4 | 
 5 | # Copy local code to the container image.
 6 | ENV APP_HOME /app
 7 | WORKDIR $APP_HOME
 8 | 
 9 | # Install production dependencies.
10 | RUN pip install fediblockhole
11 | 
12 | USER 1001
13 | # Set the command on start to fediblock-sync.
14 | ENTRYPOINT ["fediblock-sync"]
15 | 


--------------------------------------------------------------------------------
/etc/sample.fediblockhole.conf.toml:
--------------------------------------------------------------------------------
  1 | # List of instances to read blocklists from.
  2 | # If the instance makes its blocklist public, no authorization token is needed.
  3 | #   Otherwise, `token` is a Bearer token authorised to read domain_blocks.
  4 | # If `admin` = True, use the more detailed admin API, which requires a token with a 
  5 | #   higher level of authorization.
  6 | # If `import_fields` are provided, only import these fields from the instance.
  7 | #   Overrides the global `import_fields` setting.
  8 | blocklist_instance_sources = [
  9 |   # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
 10 |   # { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
 11 |   # { domain = 'jorts.horse', token_env_var = 'FBH_JORTS_TOKEN' }, # use environment variable for token
 12 |   # { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>', admin = true }, # admin access required
 13 |   # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_READ_TOKEN', admin = true }, # same, but use environment variable for token
 14 | 
 15 | ]
 16 | 
 17 | # List of URLs to read csv blocklists from
 18 | # Format tells the parser which format to use when parsing the blocklist
 19 | # max_severity tells the parser to override any severities that are higher than this value
 20 | # import_fields tells the parser to only import that set of fields from a specific source
 21 | blocklist_url_sources = [
 22 |   # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
 23 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
 24 | 
 25 | ]
 26 | 
 27 | ## These global allowlists override blocks from blocklists
 28 | # These are the same format and structure as blocklists, but they take precedence
 29 | allowlist_url_sources = [
 30 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' },
 31 |   { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' },
 32 | ]
 33 | 
 34 | # List of instances to write blocklist to
 35 | blocklist_instance_destinations = [
 36 |   # { domain = 'eigenmagic.net', token = '<read_write_token>', max_followed_severity = 'silence'},
 37 | 
 38 |   # Alternate mechanism using environment variable for the token
 39 |   # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_TOKEN', max_followed_severity = 'silence'},
 40 | 
 41 | ]
 42 | 
 43 | ## Store a local copy of the remote blocklists after we fetch them
 44 | #save_intermediate = true
 45 | 
 46 | ## Directory to store the local blocklist copies
 47 | # savedir = '/tmp'
 48 | 
 49 | ## File to save the fully merged blocklist into
 50 | # blocklist_savefile = '/tmp/merged_blocklist.csv'
 51 | 
 52 | ## File to save the audit log of counts across sources
 53 | # blocklist_auditfile = '/tmp/domain_counts_list.csv'
 54 | 
 55 | ## Don't push blocklist to instances, even if they're defined above
 56 | # no_push_instance = false
 57 | 
 58 | ## Don't fetch blocklists from URLs, even if they're defined above
 59 | # no_fetch_url = false
 60 | 
 61 | ## Don't fetch blocklists from instances, even if they're defined above
 62 | # no_fetch_instance = false
 63 | 
 64 | ## Set the mergeplan to use when dealing with overlaps between blocklists
 65 | # The default 'max' mergeplan will use the harshest severity block found for a domain.
 66 | # The 'min' mergeplan will use the lightest severity block found for a domain.
 67 | # mergeplan = 'max'
 68 | 
 69 | ## Optional threshold-based merging.
 70 | # Only merge in domain blocks if the domain is mentioned in
 71 | # at least `threshold` blocklists.
 72 | # `merge_thresold` is an integer, with a default value of 0.
 73 | # The `merge_threshold_type` can be `count` or `pct`.
 74 | # If `count` type is selected, the threshold is reached when the domain
 75 | # is mentioned in at least `merge_threshold` blocklists. The default value
 76 | # of 0 means that every block in every list will be merged in.
 77 | # If `pct` type is selected, `merge_threshold` is interpreted as a percentage,
 78 | # i.e. if `merge_threshold` = 20, blocks will only be merged in if the domain
 79 | # is present in at least 20% of blocklists.
 80 | # Percentage calculated as number_of_mentions / total_number_of_blocklists.
 81 | # The percentage method is more flexibile, but also more complicated, so take care
 82 | # when using it.
 83 | # 
 84 | # merge_threshold_type = 'count'
 85 | # merge_threshold = 0
 86 | 
 87 | ## set an override private comment to be added when pushing a NEW block to an instance
 88 | # this does not require importing private comments
 89 | # override_private_comment = 'Added by Fediblock Sync'
 90 | 
 91 | ## Set which fields we import
 92 | ## 'domain' and 'severity' are always imported, these are additional
 93 | ## 
 94 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate']
 95 | 
 96 | ## Set which fields we export
 97 | ## 'domain' and 'severity' are always exported, these are additional
 98 | ## 
 99 | export_fields = ['public_comment']
100 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "fediblockhole"
 3 | version = "0.4.6"
 4 | description = "Federated blocklist management for Mastodon"
 5 | readme = "README.md"
 6 | license = {file = "LICENSE"}
 7 | requires-python = ">=3.8"
 8 | keywords = ["mastodon", "fediblock"]
 9 | authors = [ 
10 |     {name = "Justin Warren"}, {email = "justin@eigenmagic.com"}
11 | ]
12 | classifiers = [
13 |     "Development Status :: 4 - Beta",
14 |     "Intended Audience :: Developers",
15 |     "Intended Audience :: System Administrators",
16 |     "License :: OSI Approved :: GNU Affero General Public License v3",
17 |     "Natural Language :: English",
18 |     "Programming Language :: Python :: 3",
19 |     "Programming Language :: Python :: 3.10",
20 |     "Programming Language :: Python :: 3.9",
21 |     "Programming Language :: Python :: 3.8",
22 | ]
23 | dependencies = [
24 |     "requests",
25 |     "toml"
26 | ]
27 | 
28 | [project.urls]
29 | homepage = "https://github.com/eigenmagic/fediblockhole"
30 | documentation = "https://github.com/eigenmagic/fediblockhole"
31 | repository = "https://github.com/eigenmagic/fediblockhole"
32 | 
33 | [project.scripts]
34 | fediblock-sync = "fediblockhole:main"
35 | 
36 | [build-system]
37 | requires = ["hatchling"]
38 | build-backend = "hatchling.build"
39 | 
40 | [tool.pytest.ini_options]
41 | addopts = [
42 |     "--import-mode=importlib",
43 | ]
44 | norecursedirs = [
45 |     "tests/helpers",
46 | ]
47 | 
48 | [tool.uv]
49 | dev-dependencies = [
50 |     "coverage[toml]>=7.6.1",
51 |     "pytest ~= 8.3",
52 | ]
53 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | toml
3 | pytest


--------------------------------------------------------------------------------
/samples/demo-allowlist-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","private_comment","public_comment","reject_media","reject_reports","obfuscate"
2 | "eigenmagic.net","noop","Never block me","Only the domain field matters for allowlists",False,False,False
3 | "example.org","noop","Never block me either","The severity is ignored in allowlists as are all other fields",False,False,False
4 | "demo01.example.org","noop","Never block me either","But you can use them to leave yourself or others notes on why the item is here",False,False,False
5 | 


--------------------------------------------------------------------------------
/samples/demo-allowlist-02.csv:
--------------------------------------------------------------------------------
1 | "domain","private_comment"
2 | "example.org","The private comment won't get loaded, but can be handy to leave yourself a note."
3 | 


--------------------------------------------------------------------------------
/samples/demo-blocklist-01.csv:
--------------------------------------------------------------------------------
 1 | "domain","severity","reject_media","reject_reports","private_comment","public_comment","obfuscate"
 2 | "qoto.org","suspend",True,True,,,True
 3 | "sealion.club","suspend",True,True,,,True
 4 | "develop.gab.com","suspend",True,True,,,True
 5 | "gab.ai","suspend",True,True,,,True
 6 | "gab.sleeck.eu","suspend",True,True,,,True
 7 | "gab.com","suspend",True,True,,,True
 8 | "kiwifarms.is","suspend",True,True,,,True
 9 | "kiwifarms.net","suspend",True,True,,,True
10 | "gabfed.com","suspend",True,True,,,True


--------------------------------------------------------------------------------
/src/fediblockhole/__init__.py:
--------------------------------------------------------------------------------
   1 | """A tool for managing federated Mastodon blocklists
   2 | """
   3 | 
   4 | from __future__ import annotations
   5 | 
   6 | import argparse
   7 | import csv
   8 | import json
   9 | import os.path
  10 | import sys
  11 | import time
  12 | import urllib.request as urlr
  13 | from importlib.metadata import version
  14 | 
  15 | import requests
  16 | import toml
  17 | 
  18 | from .blocklists import BlockAuditList, Blocklist, parse_blocklist
  19 | from .const import BlockAudit, BlockSeverity, DomainBlock
  20 | 
  21 | __version__ = version("fediblockhole")
  22 | 
  23 | import logging
  24 | 
  25 | logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
  26 | log = logging.getLogger("fediblockhole")
  27 | 
  28 | # Max size of a URL-fetched blocklist
  29 | URL_BLOCKLIST_MAXSIZE = 1024**3
  30 | 
  31 | # Wait at most this long for a remote server to respond
  32 | REQUEST_TIMEOUT = 30
  33 | 
  34 | # Time to wait between instance API calls to we don't melt them
  35 | # The default Mastodon rate limit is 300 calls per 5 minutes
  36 | API_CALL_DELAY = 5 * 60 / 300  # 300 calls per 5 minutes
  37 | 
  38 | # We always import the domain and the severity
  39 | IMPORT_FIELDS = ["domain", "severity"]
  40 | 
  41 | # Allowlists always import these fields
  42 | ALLOWLIST_IMPORT_FIELDS = [
  43 |     "domain",
  44 |     "severity",
  45 |     "public_comment",
  46 |     "private_comment",
  47 |     "reject_media",
  48 |     "reject_reports",
  49 |     "obfuscate",
  50 | ]
  51 | 
  52 | # We always export the domain and the severity
  53 | EXPORT_FIELDS = ["domain", "severity"]
  54 | 
  55 | 
  56 | def sync_blocklists(conf: argparse.Namespace):
  57 |     """Sync instance blocklists from remote sources.
  58 | 
  59 |     @param conf: A configuration dictionary
  60 |     """
  61 |     # Build a dict of blocklists we retrieve from remote sources.
  62 |     # We will merge these later using a merge algorithm we choose.
  63 | 
  64 |     # Always import these fields
  65 |     import_fields = IMPORT_FIELDS
  66 |     # Add extra import fields if defined in config
  67 |     import_fields.extend(conf.import_fields)
  68 | 
  69 |     # Always export these fields
  70 |     export_fields = EXPORT_FIELDS
  71 |     # Add extra export fields if defined in config
  72 |     export_fields.extend(conf.export_fields)
  73 | 
  74 |     blocklists = []
  75 |     # Fetch blocklists from URLs
  76 |     if not conf.no_fetch_url:
  77 |         blocklists.extend(
  78 |             fetch_from_urls(
  79 |                 conf.blocklist_url_sources,
  80 |                 import_fields,
  81 |                 conf.save_intermediate,
  82 |                 conf.savedir,
  83 |                 export_fields,
  84 |             )
  85 |         )
  86 | 
  87 |     # Fetch blocklists from remote instances
  88 |     if not conf.no_fetch_instance:
  89 |         blocklists.extend(
  90 |             fetch_from_instances(
  91 |                 conf.blocklist_instance_sources,
  92 |                 import_fields,
  93 |                 conf.save_intermediate,
  94 |                 conf.savedir,
  95 |                 export_fields,
  96 |             )
  97 |         )
  98 | 
  99 |     # Merge blocklists into an update dict
 100 |     merged = merge_blocklists(
 101 |         blocklists,
 102 |         conf.mergeplan,
 103 |         conf.merge_threshold,
 104 |         conf.merge_threshold_type,
 105 |         conf.blocklist_auditfile,
 106 |     )
 107 | 
 108 |     # Remove items listed in allowlists, if any
 109 |     allowlists = fetch_allowlists(conf)
 110 |     merged = apply_allowlists(merged, conf, allowlists)
 111 | 
 112 |     # Save the final mergelist, if requested
 113 |     if conf.blocklist_savefile:
 114 |         log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
 115 |         save_blocklist_to_file(merged, conf.blocklist_savefile, export_fields)
 116 | 
 117 |     # Push the blocklist to destination instances
 118 |     if not conf.no_push_instance:
 119 |         log.info("Pushing domain blocks to instances...")
 120 |         for dest in conf.blocklist_instance_destinations:
 121 |             target = dest["domain"]
 122 |             token = dest["token"]
 123 |             scheme = dest.get("scheme", "https")
 124 |             max_followed_severity = BlockSeverity(
 125 |                 dest.get("max_followed_severity", "silence")
 126 |             )
 127 |             push_blocklist(
 128 |                 token,
 129 |                 target,
 130 |                 merged,
 131 |                 conf.dryrun,
 132 |                 import_fields,
 133 |                 max_followed_severity,
 134 |                 scheme,
 135 |                 conf.override_private_comment,
 136 |             )
 137 | 
 138 | 
 139 | def apply_allowlists(merged: Blocklist, conf: argparse.Namespace, allowlists: dict):
 140 |     """Apply allowlists"""
 141 |     # Apply allows specified on the commandline
 142 |     for domain in conf.allow_domains:
 143 |         log.info(f"'{domain}' allowed by commandline, removing any blocks...")
 144 |         if domain in merged.blocks:
 145 |             del merged.blocks[domain]
 146 | 
 147 |     # Apply allows from URLs lists
 148 |     log.info("Removing domains from URL allowlists...")
 149 |     for alist in allowlists:
 150 |         log.debug(f"Processing allows from '{alist.origin}'...")
 151 |         for allowed in alist.blocks.values():
 152 |             domain = allowed.domain
 153 |             log.debug(f"Removing allowlisted domain '{domain}' from merged list.")
 154 |             if domain in merged.blocks:
 155 |                 del merged.blocks[domain]
 156 | 
 157 |     return merged
 158 | 
 159 | 
 160 | def fetch_allowlists(conf: argparse.Namespace) -> Blocklist:
 161 |     """ """
 162 |     if conf.allowlist_url_sources:
 163 |         allowlists = fetch_from_urls(
 164 |             conf.allowlist_url_sources,
 165 |             ALLOWLIST_IMPORT_FIELDS,
 166 |             conf.save_intermediate,
 167 |             conf.savedir,
 168 |         )
 169 |         return allowlists
 170 |     return Blocklist()
 171 | 
 172 | 
 173 | def fetch_from_urls(
 174 |     url_sources: dict,
 175 |     import_fields: list = IMPORT_FIELDS,
 176 |     save_intermediate: bool = False,
 177 |     savedir: str = None,
 178 |     export_fields: list = EXPORT_FIELDS,
 179 | ) -> dict:
 180 |     """Fetch blocklists from URL sources
 181 |     @param blocklists: A dict of existing blocklists, keyed by source
 182 |     @param url_sources: A dict of configuration info for url sources
 183 |     @returns: A dict of blocklists, same as input, but (possibly) modified
 184 |     """
 185 |     log.info("Fetching domain blocks from URLs...")
 186 |     blocklists = []
 187 |     for item in url_sources:
 188 |         url = item["url"]
 189 |         # If import fields are provided, they override the global ones passed in
 190 |         source_import_fields = item.get("import_fields", None)
 191 |         if source_import_fields:
 192 |             # Ensure we always use the default fields
 193 |             import_fields = IMPORT_FIELDS.extend(source_import_fields)
 194 | 
 195 |         max_severity = item.get("max_severity", "suspend")
 196 |         listformat = item.get("format", "csv")
 197 |         with urlr.urlopen(url) as fp:
 198 |             rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode("utf-8")
 199 |             bl = parse_blocklist(rawdata, url, listformat, import_fields, max_severity)
 200 |             blocklists.append(bl)
 201 |             if save_intermediate:
 202 |                 save_intermediate_blocklist(bl, savedir, export_fields)
 203 | 
 204 |     return blocklists
 205 | 
 206 | 
 207 | def fetch_from_instances(
 208 |     sources: dict,
 209 |     import_fields: list = IMPORT_FIELDS,
 210 |     save_intermediate: bool = False,
 211 |     savedir: str = None,
 212 |     export_fields: list = EXPORT_FIELDS,
 213 | ) -> dict:
 214 |     """Fetch blocklists from other instances
 215 |     @param blocklists: A dict of existing blocklists, keyed by source
 216 |     @param url_sources: A dict of configuration info for url sources
 217 |     @returns: A dict of blocklists, same as input, but (possibly) modified
 218 |     """
 219 |     log.info("Fetching domain blocks from instances...")
 220 |     blocklists = []
 221 |     for item in sources:
 222 |         domain = item["domain"]
 223 |         admin = item.get("admin", False)
 224 |         token = item.get("token", None)
 225 |         scheme = item.get("scheme", "https")
 226 |         # itemsrc = f"{scheme}://{domain}/api"
 227 | 
 228 |         # If import fields are provided, they override the global ones passed in
 229 |         source_import_fields = item.get("import_fields", None)
 230 |         if source_import_fields:
 231 |             # Ensure we always use the default fields
 232 |             import_fields = IMPORT_FIELDS.extend(source_import_fields)
 233 | 
 234 |         bl = fetch_instance_blocklist(domain, token, admin, import_fields, scheme)
 235 |         blocklists.append(bl)
 236 |         if save_intermediate:
 237 |             save_intermediate_blocklist(bl, savedir, export_fields)
 238 |     return blocklists
 239 | 
 240 | 
 241 | def merge_blocklists(
 242 |     blocklists: list[Blocklist],
 243 |     mergeplan: str = "max",
 244 |     threshold: int = 0,
 245 |     threshold_type: str = "count",
 246 |     save_block_audit_file: str = None,
 247 | ) -> Blocklist:
 248 |     """Merge fetched remote blocklists into a bulk update
 249 |     @param blocklists: A dict of lists of DomainBlocks, keyed by source.
 250 |         Each value is a list of DomainBlocks
 251 |     @param mergeplan: An optional method of merging overlapping block definitions
 252 |         'max' (the default) uses the highest severity block found
 253 |         'min' uses the lowest severity block found
 254 |     @param threshold: An integer used in the threshold mechanism.
 255 |         If a domain is not present in this number/pct or more of the blocklists,
 256 |         it will not get merged into the final list.
 257 |     @param threshold_type: choice of ['count', 'pct']
 258 |         If `count`, threshold is met if block is present in `threshold`
 259 |         or more blocklists.
 260 |         If `pct`, theshold is met if block is present in
 261 |         count_of_mentions / number_of_blocklists.
 262 |     @param returns: A dict of DomainBlocks keyed by domain
 263 |     """
 264 |     merged = Blocklist("fediblockhole.merge_blocklists")
 265 |     audit = BlockAuditList("fediblockhole.merge_blocklists")
 266 | 
 267 |     num_blocklists = len(blocklists)
 268 | 
 269 |     # Create a domain keyed list of blocks for each domain
 270 |     domain_blocks = {}
 271 | 
 272 |     for bl in blocklists:
 273 |         for block in bl.values():
 274 |             if "*" in block.domain:
 275 |                 log.debug(f"Domain '{block.domain}' is obfuscated. Skipping it.")
 276 |                 continue
 277 |             elif block.domain in domain_blocks:
 278 |                 domain_blocks[block.domain].append(block)
 279 |             else:
 280 |                 domain_blocks[block.domain] = [
 281 |                     block,
 282 |                 ]
 283 | 
 284 |     # Only merge items if `threshold` is met or exceeded
 285 |     for domain in domain_blocks:
 286 |         domain_matches_count = len(domain_blocks[domain])
 287 |         domain_matches_percent = domain_matches_count / num_blocklists * 100
 288 |         if threshold_type == "count":
 289 |             domain_threshold_level = domain_matches_count
 290 |         elif threshold_type == "pct":
 291 |             domain_threshold_level = domain_matches_percent
 292 |             # log.debug(f"domain threshold level: {domain_threshold_level}")
 293 |         else:
 294 |             raise ValueError(
 295 |                 f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'"  # noqa
 296 |             )
 297 | 
 298 |         log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}")
 299 |         if domain_threshold_level >= threshold:
 300 |             # Add first block in the list to merged
 301 |             block = domain_blocks[domain][0]
 302 |             log.debug(f"Yes. Merging block: {block}")
 303 | 
 304 |             # Merge the others with this record
 305 |             for newblock in domain_blocks[domain][1:]:
 306 |                 block = apply_mergeplan(block, newblock, mergeplan)
 307 |             merged.blocks[block.domain] = block
 308 | 
 309 |         if save_block_audit_file:
 310 |             blockdata: BlockAudit = {
 311 |                 "domain": domain,
 312 |                 "count": domain_matches_count,
 313 |                 "percent": domain_matches_percent,
 314 |             }
 315 |             audit.blocks[domain] = blockdata
 316 | 
 317 |     if save_block_audit_file:
 318 |         log.info(f"Saving audit file to {save_block_audit_file}")
 319 |         save_domain_block_audit_to_file(audit, save_block_audit_file)
 320 | 
 321 |     return merged
 322 | 
 323 | 
 324 | def apply_mergeplan(
 325 |     oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str = "max"
 326 | ) -> dict:
 327 |     """Use a mergeplan to decide how to merge two overlapping block definitions
 328 | 
 329 |     @param oldblock: The existing block definition.
 330 |     @param newblock: The new block definition we want to merge in.
 331 |     @param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
 332 |     """
 333 |     # Default to the existing block definition
 334 |     blockdata = oldblock._asdict()
 335 | 
 336 |     # Merge comments
 337 |     keylist = ["public_comment", "private_comment"]
 338 |     for key in keylist:
 339 |         try:
 340 |             oldcomment = getattr(oldblock, key)
 341 |             newcomment = getattr(newblock, key)
 342 |             blockdata[key] = merge_comments(oldcomment, newcomment)
 343 |         except KeyError:
 344 |             log.debug(
 345 |                 f"Key '{key}' missing from block definition so cannot compare. Continuing..."  # noqa
 346 |             )
 347 |             continue
 348 | 
 349 |     # How do we override an earlier block definition?
 350 |     if mergeplan in ["max", None]:
 351 |         # Use the highest block level found (the default)
 352 |         # log.debug(f"Using 'max' mergeplan.")
 353 | 
 354 |         if newblock.severity > oldblock.severity:
 355 |             # log.debug(f"New block severity is higher. Using that.")
 356 |             blockdata["severity"] = newblock.severity
 357 | 
 358 |         # For 'reject_media', 'reject_reports', and 'obfuscate' if
 359 |         # the value is set and is True for the domain in
 360 |         # any blocklist then the value is set to True.
 361 |         for key in ["reject_media", "reject_reports", "obfuscate"]:
 362 |             newval = getattr(newblock, key)
 363 |             if newval is True:
 364 |                 blockdata[key] = True
 365 | 
 366 |     elif mergeplan in ["min"]:
 367 |         # Use the lowest block level found
 368 |         log.debug("Using 'min' mergeplan.")
 369 | 
 370 |         if newblock.severity < oldblock.severity:
 371 |             blockdata["severity"] = newblock.severity
 372 | 
 373 |         # For 'reject_media', 'reject_reports', and 'obfuscate' if
 374 |         # the value is set and is False for the domain in
 375 |         # any blocklist then the value is set to False.
 376 |         for key in ["reject_media", "reject_reports", "obfuscate"]:
 377 |             newval = getattr(newblock, key)
 378 |             if newval is False:
 379 |                 blockdata[key] = False
 380 | 
 381 |     else:
 382 |         raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
 383 | 
 384 |     # log.debug(f"Block severity set to {blockdata['severity']}")
 385 | 
 386 |     return DomainBlock(**blockdata)
 387 | 
 388 | 
 389 | def merge_comments(oldcomment: str, newcomment: str) -> str:
 390 |     """Merge two comments
 391 | 
 392 |     @param oldcomment: The original comment we're merging into
 393 |     @param newcomment: The new commment we want to merge in
 394 |     @returns: a new str of the merged comment
 395 |     """
 396 |     # Don't merge if both comments are None or ''
 397 |     if oldcomment in ["", None] and newcomment in ["", None]:
 398 |         return ""
 399 | 
 400 |     # If both comments are the same, or new comment is empty, don't merge
 401 |     if oldcomment == newcomment or newcomment in ["", None]:
 402 |         return oldcomment
 403 | 
 404 |     # If old comment is empty, just return the new one
 405 |     if oldcomment in ["", None]:
 406 |         return newcomment
 407 | 
 408 |     # We want to skip duplicate fragments so we don't end up
 409 |     # re-concatenating the same strings every time there's an
 410 |     # update, causing the comment to grow without bound.
 411 |     # We tokenize the comments, splitting them on ', ', and comparing
 412 |     # the tokens, skipping duplicates.
 413 |     # This means "boring, lack of moderation, nazis, scrapers" merging
 414 |     # with "lack of moderation, scrapers" should result in
 415 |     # "boring, lack of moderation, nazis, scrapers"
 416 |     old_tokens = oldcomment.split(", ")
 417 |     new_tokens = newcomment.split(", ")
 418 | 
 419 |     # Remove any empty string tokens that we get
 420 |     while "" in old_tokens:
 421 |         old_tokens.remove("")
 422 |     while "" in new_tokens:
 423 |         new_tokens.remove("")
 424 | 
 425 |     # Remove duplicate tokens
 426 |     for token in old_tokens:
 427 |         if token in new_tokens:
 428 |             new_tokens.remove(token)
 429 | 
 430 |     # Combine whatever tokens are left into one set
 431 |     tokenset = old_tokens
 432 |     tokenset.extend(new_tokens)
 433 | 
 434 |     # Return the merged string
 435 |     return ", ".join(tokenset)
 436 | 
 437 | 
 438 | def requests_headers(token: str = None):
 439 |     """Set common headers for requests"""
 440 |     headers = {"User-Agent": f"FediBlockHole/{__version__}"}
 441 |     if token:
 442 |         headers["Authorization"] = f"Bearer {token}"
 443 | 
 444 |     return headers
 445 | 
 446 | 
 447 | def fetch_instance_blocklist(
 448 |     host: str,
 449 |     token: str = None,
 450 |     admin: bool = False,
 451 |     import_fields: list = ["domain", "severity"],
 452 |     scheme: str = "https",
 453 | ) -> list[DomainBlock]:
 454 |     """Fetch existing block list from server
 455 | 
 456 |     @param host: The remote host to connect to.
 457 |     @param token: The (optional) OAuth Bearer token to authenticate with.
 458 |     @param admin: Boolean flag to use the admin API if True.
 459 |     @param import_fields: A list of fields to import from the remote instance.
 460 |     @returns: A list of the domain blocks from the instance.
 461 |     """
 462 |     log.info(f"Fetching instance blocklist from {host} ...")
 463 | 
 464 |     if admin:
 465 |         api_path = "/api/v1/admin/domain_blocks"
 466 |         parse_format = "json"
 467 |     else:
 468 |         api_path = "/api/v1/instance/domain_blocks"
 469 |         parse_format = "mastodon_api_public"
 470 | 
 471 |     headers = requests_headers(token)
 472 | 
 473 |     url = f"{scheme}://{host}{api_path}"
 474 | 
 475 |     blockdata = []
 476 |     link = True
 477 |     while link:
 478 |         response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
 479 |         if response.status_code != 200:
 480 |             log.error(f"Cannot fetch remote blocklist: {response.content}")
 481 |             raise ValueError("Unable to fetch domain block list: %s", response)
 482 | 
 483 |         # Each block of returned data is a JSON list of dicts
 484 |         # so we parse them and append them to the fetched list
 485 |         # of JSON data we need to parse.
 486 | 
 487 |         blockdata.extend(json.loads(response.content.decode("utf-8")))
 488 |         # Parse the link header to find the next url to fetch
 489 |         # This is a weird and janky way of doing pagination but
 490 |         # hey nothing we can do about it we just have to deal
 491 |         link = response.headers.get("Link", None)
 492 |         if link is None:
 493 |             break
 494 |         pagination = link.split(", ")
 495 |         if len(pagination) != 2:
 496 |             link = None
 497 |             break
 498 |         else:
 499 |             next = pagination[0]
 500 |             # prev = pagination[1]
 501 | 
 502 |             urlstring, rel = next.split("; ")
 503 |             url = urlstring.strip("<").rstrip(">")
 504 | 
 505 |     blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
 506 | 
 507 |     return blocklist
 508 | 
 509 | 
 510 | def delete_block(token: str, host: str, id: int, scheme: str = "https"):
 511 |     """Remove a domain block"""
 512 |     log.debug(f"Removing domain block {id} at {host}...")
 513 |     api_path = "/api/v1/admin/domain_blocks/"
 514 | 
 515 |     url = f"{scheme}://{host}{api_path}{id}"
 516 | 
 517 |     response = requests.delete(
 518 |         url, headers=requests_headers(token), timeout=REQUEST_TIMEOUT
 519 |     )
 520 |     if response.status_code != 200:
 521 |         if response.status_code == 404:
 522 |             log.warning(f"No such domain block: {id}")
 523 |             return
 524 | 
 525 |         raise ValueError(
 526 |             f"Something went wrong: {response.status_code}: {response.content}"
 527 |         )
 528 | 
 529 | 
 530 | def fetch_instance_follows(
 531 |     token: str, host: str, domain: str, scheme: str = "https"
 532 | ) -> int:
 533 |     """Fetch the followers of the target domain at the instance
 534 | 
 535 |     @param token: the Bearer authentication token for OAuth access
 536 |     @param host: the instance API hostname/IP address
 537 |     @param domain: the domain to search for followers of
 538 |     @returns: int, number of local followers of remote instance accounts
 539 |     """
 540 |     api_path = "/api/v1/admin/measures"
 541 |     url = f"{scheme}://{host}{api_path}"
 542 | 
 543 |     key = "instance_follows"
 544 | 
 545 |     # This data structure only allows us to request a single domain
 546 |     # at a time, which limits the load on the remote instance of each call
 547 |     data = {
 548 |         "keys": [key],
 549 |         key: {"domain": domain},
 550 |     }
 551 | 
 552 |     # The Mastodon API only accepts JSON formatted POST data for measures
 553 |     response = requests.post(
 554 |         url, headers=requests_headers(token), json=data, timeout=REQUEST_TIMEOUT
 555 |     )
 556 |     if response.status_code != 200:
 557 |         if response.status_code == 403:
 558 |             log.error(
 559 |                 f"Cannot fetch follow information for {domain} from {host}: {response.content}"  # noqa
 560 |             )
 561 | 
 562 |         raise ValueError(
 563 |             f"Something went wrong: {response.status_code}: {response.content}"
 564 |         )
 565 | 
 566 |     # Get the total returned
 567 |     follows = int(response.json()[0]["total"])
 568 |     return follows
 569 | 
 570 | 
 571 | def check_followed_severity(
 572 |     host: str,
 573 |     token: str,
 574 |     domain: str,
 575 |     severity: BlockSeverity,
 576 |     max_followed_severity: BlockSeverity = BlockSeverity("silence"),
 577 |     scheme: str = "https",
 578 | ):
 579 |     """Check an instance to see if it has followers of a to-be-blocked instance"""
 580 | 
 581 |     log.debug("Checking followed severity...")
 582 |     # Return straight away if we're not increasing the severity
 583 |     if severity <= max_followed_severity:
 584 |         return severity
 585 | 
 586 |     # If the instance has accounts that follow people on the to-be-blocked domain,
 587 |     # limit the maximum severity to the configured `max_followed_severity`.
 588 |     log.debug("checking for instance follows...")
 589 |     follows = fetch_instance_follows(token, host, domain, scheme)
 590 |     time.sleep(API_CALL_DELAY)
 591 |     if follows > 0:
 592 |         log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
 593 |         if severity > max_followed_severity:
 594 |             log.warning(
 595 |                 f"Instance {host} has {follows} followers of accounts at {domain}. "
 596 |                 f"Limiting block severity to {max_followed_severity}."
 597 |             )
 598 |             return max_followed_severity
 599 |     return severity
 600 | 
 601 | 
 602 | def is_change_needed(oldblock: dict, newblock: dict, import_fields: list):
 603 |     change_needed = oldblock.compare_fields(newblock, import_fields)
 604 |     return change_needed
 605 | 
 606 | 
 607 | def update_known_block(
 608 |     token: str, host: str, block: DomainBlock, scheme: str = "https"
 609 | ):
 610 |     """Update an existing domain block with information in blockdict"""
 611 |     api_path = "/api/v1/admin/domain_blocks/"
 612 | 
 613 |     id = block.id
 614 |     blockdata = block._asdict()
 615 |     del blockdata["id"]
 616 | 
 617 |     url = f"{scheme}://{host}{api_path}{id}"
 618 | 
 619 |     response = requests.put(
 620 |         url, headers=requests_headers(token), json=blockdata, timeout=REQUEST_TIMEOUT
 621 |     )
 622 |     if response.status_code != 200:
 623 |         raise ValueError(
 624 |             f"Something went wrong: {response.status_code}: {response.content}"
 625 |         )
 626 | 
 627 | 
 628 | def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str = "https"):
 629 |     """Block a domain on Mastodon host"""
 630 |     log.debug(f"Adding block entry for {blockdata.domain} at {host}...")
 631 |     api_path = "/api/v1/admin/domain_blocks"
 632 | 
 633 |     url = f"{scheme}://{host}{api_path}"
 634 | 
 635 |     response = requests.post(
 636 |         url,
 637 |         headers=requests_headers(token),
 638 |         json=blockdata._asdict(),
 639 |         timeout=REQUEST_TIMEOUT,
 640 |     )
 641 |     if response.status_code == 422:
 642 |         # A stricter block already exists. Probably for the base domain.
 643 |         err = json.loads(response.content)
 644 |         log.warning(err["error"])
 645 | 
 646 |     elif response.status_code != 200:
 647 | 
 648 |         raise ValueError(
 649 |             f"Something went wrong: {response.status_code}: {response.content}"
 650 |         )
 651 | 
 652 | 
 653 | def push_blocklist(
 654 |     token: str,
 655 |     host: str,
 656 |     blocklist: list[DomainBlock],
 657 |     dryrun: bool = False,
 658 |     import_fields: list = ["domain", "severity"],
 659 |     max_followed_severity: BlockSeverity = BlockSeverity("silence"),
 660 |     scheme: str = "https",
 661 |     override_private_comment: str = None,
 662 | ):
 663 |     """Push a blocklist to a remote instance.
 664 | 
 665 |     Updates existing entries if they exist, creates new blocks if they don't.
 666 | 
 667 |     @param token: The Bearer token for OAUTH API authentication
 668 |     @param host: The instance host, FQDN or IP
 669 |     @param blocklist: A list of block definitions. They must include the domain.
 670 |     @param import_fields: A list of fields to import to the instances.
 671 |     """
 672 |     log.info(f"Pushing blocklist to host {host} ...")
 673 |     # Fetch the existing blocklist from the instance
 674 |     # Force use of the admin API, and add 'id' to the list of fields
 675 |     if "id" not in import_fields:
 676 |         import_fields.append("id")
 677 |     serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
 678 | 
 679 |     # # Convert serverblocks to a dictionary keyed by domain name
 680 |     # knownblocks = {row.domain: row for row in serverblocks}
 681 | 
 682 |     for newblock in blocklist.values():
 683 | 
 684 |         log.debug(f"Processing block: {newblock}")
 685 |         if newblock.domain in serverblocks:
 686 |             log.debug(
 687 |                 f"Block already exists for {newblock.domain}, "
 688 |                 f"checking for differences..."
 689 |             )
 690 | 
 691 |             oldblock = serverblocks[newblock.domain]
 692 | 
 693 |             change_needed = is_change_needed(oldblock, newblock, import_fields)
 694 | 
 695 |             # Is the severity changing?
 696 |             if "severity" in change_needed:
 697 |                 log.debug("Severity change requested, checking...")
 698 |                 if newblock.severity > oldblock.severity:
 699 |                     # Confirm if we really want to change the severity
 700 |                     # If we still have followers of the remote domain,
 701 |                     # we may not want to go all the way to full suspend,
 702 |                     # depending on the configuration
 703 |                     newseverity = check_followed_severity(
 704 |                         host,
 705 |                         token,
 706 |                         oldblock.domain,
 707 |                         newblock.severity,
 708 |                         max_followed_severity,
 709 |                         scheme,
 710 |                     )
 711 |                     if newseverity != oldblock.severity:
 712 |                         newblock.severity = newseverity
 713 |                     else:
 714 |                         log.info(
 715 |                             "Keeping severity of block the same to avoid disrupting followers."  # noqa
 716 |                         )
 717 |                         change_needed.remove("severity")
 718 | 
 719 |             if change_needed:
 720 |                 log.info(
 721 |                     f"Change detected. Need to update {change_needed} "
 722 |                     f"for domain block for {oldblock.domain}"
 723 |                 )
 724 |                 log.info(f"Old block definition: {oldblock}")
 725 |                 log.info(f"Pushing new block definition: {newblock}")
 726 |                 blockdata = oldblock.copy()
 727 |                 blockdata.update(newblock)
 728 |                 log.debug(f"Block as dict: {blockdata._asdict()}")
 729 | 
 730 |                 if not dryrun:
 731 |                     update_known_block(token, host, blockdata, scheme)
 732 |                     # add a pause here so we don't melt the instance
 733 |                     time.sleep(API_CALL_DELAY)
 734 |                 else:
 735 |                     log.info("Dry run selected. Not applying changes.")
 736 | 
 737 |             else:
 738 |                 log.debug("No differences detected. Not updating.")
 739 |                 pass
 740 | 
 741 |         else:
 742 |             # stamp this record with a private comment, since we're the ones adding it
 743 |             if override_private_comment:
 744 |                 newblock.private_comment = override_private_comment
 745 | 
 746 |             # This is a new block for the target instance, so we
 747 |             # need to add a block rather than update an existing one
 748 |             log.info(f"Adding new block: {newblock}...")
 749 |             log.debug(f"Block as dict: {newblock._asdict()}")
 750 | 
 751 |             # Make sure the new block doesn't clobber a domain with followers
 752 |             newblock.severity = check_followed_severity(
 753 |                 host,
 754 |                 token,
 755 |                 newblock.domain,
 756 |                 newblock.severity,
 757 |                 max_followed_severity,
 758 |                 scheme,
 759 |             )
 760 |             if not dryrun:
 761 |                 add_block(token, host, newblock, scheme)
 762 |                 # add a pause here so we don't melt the instance
 763 |                 time.sleep(API_CALL_DELAY)
 764 |             else:
 765 |                 log.info("Dry run selected. Not adding block.")
 766 | 
 767 | 
 768 | def load_config(configfile: str):
 769 |     """Augment commandline arguments with config file parameters
 770 | 
 771 |     Config file is expected to be in TOML format
 772 |     """
 773 |     conf = toml.load(configfile)
 774 |     return conf
 775 | 
 776 | 
 777 | def save_intermediate_blocklist(
 778 |     blocklist: Blocklist, filedir: str, export_fields: list = ["domain", "severity"]
 779 | ):
 780 |     """Save a local copy of a blocklist we've downloaded"""
 781 |     # Invent a filename based on the remote source
 782 |     # If the source was a URL, convert it to something less messy
 783 |     # If the source was a remote domain, just use the name of the domain
 784 |     source = blocklist.origin
 785 |     log.debug(f"Saving intermediate blocklist from {source}")
 786 |     source = source.replace("/", "-")
 787 |     filename = f"{source}.csv"
 788 |     filepath = os.path.join(filedir, filename)
 789 |     save_blocklist_to_file(blocklist, filepath, export_fields)
 790 | 
 791 | 
 792 | def save_blocklist_to_file(
 793 |     blocklist: Blocklist, filepath: str, export_fields: list = ["domain", "severity"]
 794 | ):
 795 |     """Save a blocklist we've downloaded from a remote source
 796 | 
 797 |     @param blocklist: A dictionary of block definitions, keyed by domain
 798 |     @param filepath: The path to the file the list should be saved in.
 799 |     @param export_fields: Which fields to include in the export.
 800 |     """
 801 |     try:
 802 |         sorted_list = sorted(blocklist.blocks.items())
 803 |     except KeyError:
 804 |         log.error("Field 'domain' not found in blocklist.")
 805 |         log.debug(f"blocklist is: {sorted_list}")
 806 |     except AttributeError:
 807 |         log.error("Attribute error!")
 808 |         import pdb
 809 | 
 810 |         pdb.set_trace()
 811 | 
 812 |     log.debug(f"export fields: {export_fields}")
 813 | 
 814 |     with open(filepath, "w") as fp:
 815 |         writer = csv.DictWriter(fp, export_fields, extrasaction="ignore")
 816 |         writer.writeheader()
 817 |         for key, value in sorted_list:
 818 |             writer.writerow(value)
 819 | 
 820 | 
 821 | def resolve_replacements(endpoints: list[dict]) -> list[dict]:
 822 |     """Resolve any replacement tokens in the list of endpoints"""
 823 | 
 824 |     resolved = []
 825 |     for item in endpoints:
 826 |         item = dict(**item)
 827 |         if "token" in item and "token_env_var" in item:
 828 |             log.warning(
 829 |                 f"Both `token` and `token_env_var` have been provided; using"
 830 |                 f" the explicit token for {item.get('domain', 'the entry')}"
 831 |             )
 832 | 
 833 |             # We take the token that's explicitly stated,
 834 |             # even if there's also an environment variable set.
 835 |             # Delete the token_env_var key
 836 |             del item["token_env_var"]
 837 | 
 838 |         elif "token" in item:
 839 |             pass
 840 | 
 841 |         elif "token_env_var" in item:
 842 |             value = os.getenv(item["token_env_var"])
 843 |             if value is None:
 844 |                 raise ValueError(
 845 |                     f"Environment variable" f" '{item['token_env_var']}' not set."
 846 |                 )
 847 | 
 848 |             item["token"] = value
 849 | 
 850 |         else:
 851 |             # lastly, try look for a default token.
 852 |             domain = item.get("domain")
 853 |             if domain is not None:
 854 |                 domain_env_var_prefix = domain.upper().replace(".", "_")
 855 |                 domain_env_var = f"{domain_env_var_prefix}_TOKEN"
 856 |                 value = os.getenv(domain_env_var)
 857 |                 if value is not None:
 858 |                     item["token"] = value
 859 | 
 860 |         resolved.append(item)
 861 |     return resolved
 862 | 
 863 | 
 864 | def save_domain_block_audit_to_file(blocklist: BlockAuditList, filepath: str):
 865 |     """Save an audit log of domains blocked
 866 | 
 867 |     @param blocklist: A dictionary of block definitions, keyed by domain
 868 |     @param filepath: The path to the file the list should be saved in.
 869 |     """
 870 |     export_fields = ["domain", "count", "percent"]
 871 | 
 872 |     try:
 873 |         sorted_list = sorted(blocklist.blocks.items())
 874 |     except KeyError:
 875 |         log.error("Field 'domain' not found in blocklist.")
 876 |         log.debug(f"blocklist is: {sorted_list}")
 877 |     except AttributeError:
 878 |         log.error("Attribute error!")
 879 |         import pdb
 880 | 
 881 |         pdb.set_trace()
 882 | 
 883 |     log.debug("exporting audit file")
 884 | 
 885 |     with open(filepath, "w") as fp:
 886 |         writer = csv.DictWriter(fp, export_fields, extrasaction="ignore")
 887 |         writer.writeheader()
 888 |         for key, value in sorted_list:
 889 |             writer.writerow(value)
 890 | 
 891 | 
 892 | def augment_args(args, tomldata: str = None):
 893 |     """Augment commandline arguments with config file parameters
 894 | 
 895 |     If tomldata is provided, uses that data instead of loading
 896 |     from a config file.
 897 |     """
 898 |     if tomldata:
 899 |         conf = toml.loads(tomldata)
 900 |     else:
 901 |         conf = toml.load(args.config)
 902 | 
 903 |     if not args.no_fetch_url:
 904 |         args.no_fetch_url = conf.get("no_fetch_url", False)
 905 | 
 906 |     if not args.no_fetch_instance:
 907 |         args.no_fetch_instance = conf.get("no_fetch_instance", False)
 908 | 
 909 |     if not args.no_push_instance:
 910 |         args.no_push_instance = conf.get("no_push_instance", False)
 911 | 
 912 |     if not args.blocklist_savefile:
 913 |         args.blocklist_savefile = conf.get("blocklist_savefile", None)
 914 | 
 915 |     if not args.save_intermediate:
 916 |         args.save_intermediate = conf.get("save_intermediate", False)
 917 | 
 918 |     if not args.override_private_comment:
 919 |         args.override_private_comment = conf.get("override_private_comment", None)
 920 | 
 921 |     if not args.savedir:
 922 |         args.savedir = conf.get("savedir", "/tmp")
 923 | 
 924 |     if not args.blocklist_auditfile:
 925 |         args.blocklist_auditfile = conf.get("blocklist_auditfile", None)
 926 | 
 927 |     if not args.export_fields:
 928 |         args.export_fields = conf.get("export_fields", [])
 929 | 
 930 |     if not args.import_fields:
 931 |         args.import_fields = conf.get("import_fields", [])
 932 | 
 933 |     if not args.mergeplan:
 934 |         args.mergeplan = conf.get("mergeplan", "max")
 935 | 
 936 |     if not args.merge_threshold:
 937 |         args.merge_threshold = conf.get("merge_threshold", 0)
 938 | 
 939 |     if not args.merge_threshold_type:
 940 |         args.merge_threshold_type = conf.get("merge_threshold_type", "count")
 941 | 
 942 |     args.blocklist_url_sources = conf.get("blocklist_url_sources", [])
 943 |     args.blocklist_instance_sources = resolve_replacements(
 944 |         conf.get("blocklist_instance_sources", [])
 945 |     )
 946 |     args.allowlist_url_sources = conf.get("allowlist_url_sources", [])
 947 |     args.blocklist_instance_destinations = resolve_replacements(
 948 |         conf.get("blocklist_instance_destinations", [])
 949 |     )
 950 | 
 951 |     return args
 952 | 
 953 | 
 954 | def setup_argparse():
 955 |     """Setup the commandline arguments"""
 956 |     ap = argparse.ArgumentParser(
 957 |         description="Bulk blocklist tool",
 958 |         epilog=f"Part of FediBlockHole v{__version__}",
 959 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 960 |     )
 961 |     ap.add_argument(
 962 |         "-c",
 963 |         "--config",
 964 |         default="/etc/default/fediblockhole.conf.toml",
 965 |         help="Config file",
 966 |     )
 967 |     ap.add_argument(
 968 |         "-V", "--version", action="store_true", help="Show version and exit."
 969 |     )
 970 | 
 971 |     ap.add_argument(
 972 |         "-o",
 973 |         "--outfile",
 974 |         dest="blocklist_savefile",
 975 |         help="Save merged blocklist to a local file.",
 976 |     )
 977 |     ap.add_argument(
 978 |         "-S",
 979 |         "--save-intermediate",
 980 |         dest="save_intermediate",
 981 |         action="store_true",
 982 |         help="Save intermediate blocklists we fetch to local files.",
 983 |     )
 984 |     ap.add_argument(
 985 |         "-D",
 986 |         "--savedir",
 987 |         dest="savedir",
 988 |         help="Directory path to save intermediate lists.",
 989 |     )
 990 |     ap.add_argument("-m", "--mergeplan", choices=["min", "max"], help="Set mergeplan.")
 991 |     ap.add_argument(
 992 |         "-b",
 993 |         "--block-audit-file",
 994 |         dest="blocklist_auditfile",
 995 |         help="Save blocklist auditfile to this location.",
 996 |     )
 997 |     ap.add_argument("--merge-threshold", type=int, help="Merge threshold value")
 998 |     ap.add_argument(
 999 |         "--merge-threshold-type",
1000 |         choices=["count", "pct"],
1001 |         help="Type of merge threshold to use.",
1002 |     )
1003 |     ap.add_argument(
1004 |         "--override-private-comment",
1005 |         dest="override_private_comment",
1006 |         help="Override private_comment with this string for new blocks when pushing blocklists.",  # noqa
1007 |     )
1008 | 
1009 |     ap.add_argument(
1010 |         "-I",
1011 |         "--import-field",
1012 |         dest="import_fields",
1013 |         action="append",
1014 |         help="Extra blocklist fields to import.",
1015 |     )
1016 |     ap.add_argument(
1017 |         "-E",
1018 |         "--export-field",
1019 |         dest="export_fields",
1020 |         action="append",
1021 |         help="Extra blocklist fields to export.",
1022 |     )
1023 |     ap.add_argument(
1024 |         "-A",
1025 |         "--allow",
1026 |         dest="allow_domains",
1027 |         action="append",
1028 |         default=[],
1029 |         help="Override any blocks to allow this domain.",
1030 |     )
1031 | 
1032 |     ap.add_argument(
1033 |         "--no-fetch-url",
1034 |         dest="no_fetch_url",
1035 |         action="store_true",
1036 |         help="Don't fetch from URLs, even if configured.",
1037 |     )
1038 |     ap.add_argument(
1039 |         "--no-fetch-instance",
1040 |         dest="no_fetch_instance",
1041 |         action="store_true",
1042 |         help="Don't fetch from instances, even if configured.",
1043 |     )
1044 |     ap.add_argument(
1045 |         "--no-push-instance",
1046 |         dest="no_push_instance",
1047 |         action="store_true",
1048 |         help="Don't push to instances, even if configured.",
1049 |     )
1050 | 
1051 |     ap.add_argument(
1052 |         "--loglevel",
1053 |         choices=["debug", "info", "warning", "error", "critical"],
1054 |         help="Set log output level.",
1055 |     )
1056 |     ap.add_argument(
1057 |         "--dryrun",
1058 |         action="store_true",
1059 |         help="Don't actually push updates, just show what would happen.",
1060 |     )
1061 | 
1062 |     return ap
1063 | 
1064 | 
1065 | def main():
1066 | 
1067 |     ap = setup_argparse()
1068 |     args = ap.parse_args()
1069 | 
1070 |     if args.loglevel is not None:
1071 |         levelname = args.loglevel.upper()
1072 |         log.setLevel(getattr(logging, levelname))
1073 | 
1074 |     if args.version:
1075 |         print(f"v{__version__}")
1076 |         sys.exit(0)
1077 | 
1078 |     # Load the configuration file
1079 |     args = augment_args(args)
1080 | 
1081 |     # Do the work of syncing
1082 |     sync_blocklists(args)
1083 | 


--------------------------------------------------------------------------------
/src/fediblockhole/blocklists.py:
--------------------------------------------------------------------------------
  1 | """Parse various blocklist data formats
  2 | """
  3 | 
  4 | from __future__ import annotations
  5 | 
  6 | import csv
  7 | import json
  8 | import logging
  9 | from dataclasses import dataclass, field
 10 | from typing import Iterable
 11 | 
 12 | from .const import BlockAudit, BlockSeverity, DomainBlock
 13 | 
 14 | log = logging.getLogger("fediblockhole")
 15 | 
 16 | 
 17 | @dataclass
 18 | class Blocklist:
 19 |     """A Blocklist object
 20 | 
 21 |     A Blocklist is a list of DomainBlocks from an origin
 22 |     """
 23 | 
 24 |     origin: str = None
 25 |     blocks: dict[str, DomainBlock] = field(default_factory=dict)
 26 | 
 27 |     def __len__(self):
 28 |         return len(self.blocks)
 29 | 
 30 |     def __class_getitem__(cls, item):
 31 |         return dict[str, DomainBlock]
 32 | 
 33 |     def __getitem__(self, item):
 34 |         return self.blocks[item]
 35 | 
 36 |     def __iter__(self):
 37 |         return self.blocks.__iter__()
 38 | 
 39 |     def items(self):
 40 |         return self.blocks.items()
 41 | 
 42 |     def values(self):
 43 |         return self.blocks.values()
 44 | 
 45 | 
 46 | @dataclass
 47 | class BlockAuditList:
 48 |     """A BlockAuditlist object
 49 | 
 50 |     A BlockAuditlist is a list of BlockAudits from an origin
 51 |     """
 52 | 
 53 |     origin: str = None
 54 |     blocks: dict[str, BlockAudit] = field(default_factory=dict)
 55 | 
 56 |     def __len__(self):
 57 |         return len(self.blocks)
 58 | 
 59 |     def __class_getitem__(cls, item):
 60 |         return dict[str, BlockAudit]
 61 | 
 62 |     def __getitem__(self, item):
 63 |         return self.blocks[item]
 64 | 
 65 |     def __iter__(self):
 66 |         return self.blocks.__iter__()
 67 | 
 68 |     def items(self):
 69 |         return self.blocks.items()
 70 | 
 71 |     def values(self):
 72 |         return self.blocks.values()
 73 | 
 74 | 
 75 | class BlocklistParser(object):
 76 |     """
 77 |     Base class for parsing blocklists
 78 |     """
 79 | 
 80 |     do_preparse = False
 81 | 
 82 |     def __init__(
 83 |         self,
 84 |         import_fields: list = ["domain", "severity"],
 85 |         max_severity: str = "suspend",
 86 |     ):
 87 |         """Create a Parser
 88 | 
 89 |         @param import_fields: an optional list of fields to limit the parser to.
 90 |             Ignore any fields in a block item that aren't in import_fields.
 91 |         """
 92 |         self.import_fields = import_fields
 93 |         self.max_severity = BlockSeverity(max_severity)
 94 | 
 95 |     def preparse(self, blockdata) -> Iterable:
 96 |         """Some raw datatypes need to be converted into an iterable"""
 97 |         raise NotImplementedError
 98 | 
 99 |     def parse_blocklist(self, blockdata, origin: str = None) -> Blocklist:
100 |         """Parse an iterable of blocklist items
101 |         @param blocklist: An Iterable of blocklist items
102 |         @returns: A dict of DomainBlocks, keyed by domain
103 |         """
104 |         if self.do_preparse:
105 |             blockdata = self.preparse(blockdata)
106 | 
107 |         parsed_list = Blocklist(origin)
108 |         for blockitem in blockdata:
109 |             block = self.parse_item(blockitem)
110 |             parsed_list.blocks[block.domain] = block
111 |         return parsed_list
112 | 
113 |     def parse_item(self, blockitem) -> DomainBlock:
114 |         """Parse an individual block item
115 | 
116 |         @param blockitem: an individual block to be parsed
117 |         @param import_fields: fields of a block we will import
118 |         """
119 |         raise NotImplementedError
120 | 
121 | 
122 | class BlocklistParserJSON(BlocklistParser):
123 |     """Parse a JSON formatted blocklist"""
124 | 
125 |     do_preparse = True
126 | 
127 |     def preparse(self, blockdata) -> Iterable:
128 |         """Parse the blockdata as JSON if needed"""
129 |         if type(blockdata) is type(""):
130 |             return json.loads(blockdata)
131 |         return blockdata
132 | 
133 |     def parse_item(self, blockitem: dict) -> DomainBlock:
134 |         # Remove fields we don't want to import
135 |         origitem = blockitem.copy()
136 |         for key in origitem:
137 |             if key not in self.import_fields:
138 |                 del blockitem[key]
139 | 
140 |         # Convert dict to NamedTuple with the double-star operator
141 |         # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments  # noqa
142 |         block = DomainBlock(**blockitem)
143 |         if block.severity > self.max_severity:
144 |             block.severity = self.max_severity
145 |         return block
146 | 
147 | 
148 | class BlocklistParserMastodonAPIPublic(BlocklistParserJSON):
149 |     """The public blocklist API is slightly different to the admin one"""
150 | 
151 |     def parse_item(self, blockitem: dict) -> DomainBlock:
152 |         # Remove fields we don't want to import
153 |         origitem = blockitem.copy()
154 |         for key in origitem:
155 |             # The Mastodon public API uses the 'public' field
156 |             # to mean 'public_comment' because what even is consistency?
157 |             if key == "comment":
158 |                 key = "public_comment"
159 |                 blockitem["public_comment"] = blockitem["comment"]
160 |                 del blockitem["comment"]
161 |             if key not in self.import_fields:
162 |                 del blockitem[key]
163 | 
164 |         # Convert dict to NamedTuple with the double-star operator
165 |         # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments  # noqa
166 |         block = DomainBlock(**blockitem)
167 |         if block.severity > self.max_severity:
168 |             block.severity = self.max_severity
169 |         return block
170 | 
171 | 
172 | class BlocklistParserCSV(BlocklistParser):
173 |     """Parse CSV formatted blocklists
174 | 
175 |     The parser expects the CSV data to include a header with the field names.
176 |     """
177 | 
178 |     do_preparse = True
179 | 
180 |     def preparse(self, blockdata) -> Iterable:
181 |         """Use a csv.DictReader to create an iterable from the blockdata"""
182 |         return csv.DictReader(blockdata.split("\n"))
183 | 
184 |     def parse_item(self, blockitem: dict) -> DomainBlock:
185 |         # Coerce booleans from string to Python bool
186 |         # FIXME: Is this still necessary with the DomainBlock object?
187 |         for boolkey in ["reject_media", "reject_reports", "obfuscate"]:
188 |             if boolkey in blockitem:
189 |                 blockitem[boolkey] = str2bool(blockitem[boolkey])
190 | 
191 |         # Remove fields we don't want to import
192 |         origitem = blockitem.copy()
193 |         for key in origitem:
194 |             if key not in self.import_fields:
195 |                 log.debug(f"ignoring field '{key}'")
196 |                 del blockitem[key]
197 | 
198 |         # Convert dict to DomainBlock with the double-star operator
199 |         # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments  # noqa
200 |         block = DomainBlock(**blockitem)
201 |         if block.severity > self.max_severity:
202 |             block.severity = self.max_severity
203 |         return block
204 | 
205 | 
206 | class BlocklistParserMastodonCSV(BlocklistParserCSV):
207 |     """Parse Mastodon CSV formatted blocklists
208 | 
209 |     The Mastodon v4.1.x domain block CSV export prefixes its
210 |     field names with a '#' character because… reasons?
211 |     """
212 | 
213 |     do_preparse = True
214 | 
215 |     def parse_item(self, blockitem: dict) -> DomainBlock:
216 |         """Build a new blockitem dict with new un-#ed keys"""
217 |         newdict = {}
218 |         for key in blockitem:
219 |             newkey = key.lstrip("#")
220 |             newdict[newkey] = blockitem[key]
221 | 
222 |         return super().parse_item(newdict)
223 | 
224 | 
225 | class RapidBlockParserCSV(BlocklistParserCSV):
226 |     """Parse RapidBlock CSV blocklists
227 | 
228 |     RapidBlock CSV blocklists are just a newline separated list of domains.
229 |     """
230 | 
231 |     def preparse(self, blockdata) -> Iterable:
232 |         """Prepend a 'domain' field header to the data"""
233 |         log.debug(f"blockdata: {blockdata[:100]}")
234 |         blockdata = "".join(["domain\r\n", blockdata])
235 | 
236 |         return csv.DictReader(blockdata.split("\r\n"))
237 | 
238 | 
239 | class RapidBlockParserJSON(BlocklistParserJSON):
240 |     """Parse RapidBlock JSON formatted blocklists"""
241 | 
242 |     def preparse(self, blockdata) -> Iterable:
243 |         rb_dict = json.loads(blockdata)
244 |         # We want to iterate over all the dictionary items
245 |         return rb_dict["blocks"].items()
246 | 
247 |     def parse_item(self, blockitem: tuple) -> DomainBlock:
248 |         """Parse an individual item in a RapidBlock list"""
249 |         # Each item is a tuple of:
250 |         # (domain, {dictionary of attributes})
251 |         domain = blockitem[0]
252 | 
253 |         # RapidBlock has a binary block level which we map
254 |         # to 'suspend' if True, and 'noop' if False.
255 |         isblocked = blockitem[1]["isBlocked"]
256 |         if isblocked:
257 |             severity = "suspend"
258 |         else:
259 |             severity = "noop"
260 | 
261 |         if "public_comment" in self.import_fields:
262 |             public_comment = blockitem[1]["reason"]
263 |         else:
264 |             public_comment = ""
265 | 
266 |         # There's a 'tags' field as well, but we can't
267 |         # do much with that in Mastodon yet
268 | 
269 |         block = DomainBlock(domain, severity, public_comment)
270 |         if block.severity > self.max_severity:
271 |             block.severity = self.max_severity
272 | 
273 |         return block
274 | 
275 | 
276 | def str2bool(boolstring: str) -> bool:
277 |     """Helper function to convert boolean strings to actual Python bools"""
278 |     boolstring = boolstring.lower()
279 |     if boolstring in ["true", "t", "1", "y", "yes"]:
280 |         return True
281 |     elif boolstring in ["", "false", "f", "0", "n", "no"]:
282 |         return False
283 |     else:
284 |         raise ValueError(f"Cannot parse value '{boolstring}' as boolean")
285 | 
286 | 
287 | FORMAT_PARSERS = {
288 |     "csv": BlocklistParserCSV,
289 |     "mastodon_csv": BlocklistParserMastodonCSV,
290 |     "json": BlocklistParserJSON,
291 |     "mastodon_api_public": BlocklistParserMastodonAPIPublic,
292 |     "rapidblock.csv": RapidBlockParserCSV,
293 |     "rapidblock.json": RapidBlockParserJSON,
294 | }
295 | 
296 | 
297 | # helper function to select the appropriate Parser
298 | def parse_blocklist(
299 |     blockdata,
300 |     origin,
301 |     format="csv",
302 |     import_fields: list = ["domain", "severity"],
303 |     max_severity: str = "suspend",
304 | ):
305 |     """Parse a blocklist in the given format"""
306 |     log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...")
307 | 
308 |     parser = FORMAT_PARSERS[format](import_fields, max_severity)
309 |     return parser.parse_blocklist(blockdata, origin)
310 | 


--------------------------------------------------------------------------------
/src/fediblockhole/const.py:
--------------------------------------------------------------------------------
  1 | """ Constant objects used by FediBlockHole
  2 | """
  3 | 
  4 | from __future__ import annotations
  5 | 
  6 | import enum
  7 | import logging
  8 | 
  9 | log = logging.getLogger("fediblockhole")
 10 | 
 11 | 
 12 | class SeverityLevel(enum.IntEnum):
 13 |     """How severe should a block be? Higher is more severe."""
 14 | 
 15 |     NONE = enum.auto()
 16 |     SILENCE = enum.auto()
 17 |     SUSPEND = enum.auto()
 18 | 
 19 | 
 20 | class BlockSeverity(object):
 21 |     """A representation of a block severity
 22 | 
 23 |     We add some helpful functions rather than using a bare IntEnum
 24 |     """
 25 | 
 26 |     def __init__(self, severity: str = None):
 27 |         self._level = self.str2level(severity)
 28 | 
 29 |     @property
 30 |     def level(self):
 31 |         return self._level
 32 | 
 33 |     @level.setter
 34 |     def level(self, value):
 35 |         if isinstance(value, SeverityLevel):
 36 |             self._level = value
 37 |         elif type(value) is type(""):
 38 |             self._level = self.str2level(value)
 39 |         else:
 40 |             raise ValueError(f"Invalid level value '{value}'")
 41 | 
 42 |     def str2level(self, severity: str = None):
 43 |         """Convert a string severity level to an internal enum"""
 44 | 
 45 |         if severity in [None, "", "noop"]:
 46 |             return SeverityLevel.NONE
 47 | 
 48 |         elif severity in ["silence"]:
 49 |             return SeverityLevel.SILENCE
 50 | 
 51 |         elif severity in ["suspend"]:
 52 |             return SeverityLevel.SUSPEND
 53 | 
 54 |         else:
 55 |             raise ValueError(f"Invalid severity value '{severity}'")
 56 | 
 57 |     def __repr__(self):
 58 |         return f"'{str(self)}'"
 59 | 
 60 |     def __str__(self):
 61 |         """A string version of the severity level"""
 62 |         levelmap = {
 63 |             SeverityLevel.NONE: "noop",
 64 |             SeverityLevel.SILENCE: "silence",
 65 |             SeverityLevel.SUSPEND: "suspend",
 66 |         }
 67 |         return levelmap[self.level]
 68 | 
 69 |     def __lt__(self, other):
 70 |         if self._level < other._level:
 71 |             return True
 72 | 
 73 |     def __gt__(self, other):
 74 |         if self._level > other._level:
 75 |             return True
 76 | 
 77 |     def __eq__(self, other):
 78 |         if other is not None and self._level == other._level:
 79 |             return True
 80 | 
 81 |     def __le__(self, other):
 82 |         if self._level <= other._level:
 83 |             return True
 84 | 
 85 |     def __ge__(self, other):
 86 |         if self._level >= other._level:
 87 |             return True
 88 | 
 89 | 
 90 | class BlockAudit(object):
 91 | 
 92 |     fields = [
 93 |         "domain",
 94 |         "count",
 95 |         "percent",
 96 |     ]
 97 | 
 98 |     all_fields = ["domain", "count", "percent", "id"]
 99 | 
100 |     def __init__(self, domain: str, count: int = 0, percent: int = 0, id: int = None):
101 |         """Initialize the BlockAudit"""
102 |         self.domain = domain
103 |         self.count = count
104 |         self.percent = percent
105 |         self.id = id
106 | 
107 |     def _asdict(self):
108 |         """Return a dict version of this object"""
109 |         dictval = {
110 |             "domain": self.domain,
111 |             "count": self.count,
112 |             "percent": self.percent,
113 |         }
114 |         if self.id:
115 |             dictval["id"] = self.id
116 | 
117 |         return dictval
118 | 
119 |     def __repr__(self):
120 | 
121 |         return f"<BlockAudit {self._asdict()}>"
122 | 
123 |     def copy(self):
124 |         """Make a copy of this object and return it"""
125 |         retval = BlockAudit(**self._asdict())
126 |         return retval
127 | 
128 |     def update(self, dict):
129 |         """Update my kwargs"""
130 |         for key in dict:
131 |             setattr(self, key, dict[key])
132 | 
133 |     def __iter__(self):
134 |         """Be iterable"""
135 |         keys = self.fields
136 | 
137 |         if getattr(self, "id", False):
138 |             keys.append("id")
139 | 
140 |         for k in keys:
141 |             yield k
142 | 
143 |     def __getitem__(self, k, default=None):
144 |         "Behave like a dict for getting values"
145 |         if k not in self.all_fields:
146 |             raise KeyError(f"Invalid key '{k}'")
147 | 
148 |         return getattr(self, k, default)
149 | 
150 |     def get(self, k, default=None):
151 |         return self.__getitem__(k, default)
152 | 
153 | 
154 | # class _DomainBlock(NamedTuple):
155 | #     domain: str # FIXME: Use an actual Domain object from somewhere?
156 | #     severity: BlockSeverity = BlockSeverity.SUSPEND
157 | #     public_comment: str = ''
158 | #     private_comment: str = ''
159 | #     reject_media: bool = False
160 | #     reject_reports: bool = False
161 | #     obfuscate: bool = False
162 | 
163 | 
164 | class DomainBlock(object):
165 | 
166 |     fields = [
167 |         "domain",
168 |         "severity",
169 |         "public_comment",
170 |         "private_comment",
171 |         "reject_media",
172 |         "reject_reports",
173 |         "obfuscate",
174 |     ]
175 | 
176 |     all_fields = [
177 |         "domain",
178 |         "severity",
179 |         "public_comment",
180 |         "private_comment",
181 |         "reject_media",
182 |         "reject_reports",
183 |         "obfuscate",
184 |         "id",
185 |     ]
186 | 
187 |     def __init__(
188 |         self,
189 |         domain: str,
190 |         severity: BlockSeverity = BlockSeverity("suspend"),
191 |         public_comment: str = "",
192 |         private_comment: str = "",
193 |         reject_media: bool = False,
194 |         reject_reports: bool = False,
195 |         obfuscate: bool = False,
196 |         id: int = None,
197 |     ):
198 |         """Initialize the DomainBlock"""
199 |         self.domain = domain
200 |         self.severity = severity
201 |         self.public_comment = public_comment
202 |         self.private_comment = private_comment
203 |         self.reject_media = reject_media
204 |         self.reject_reports = reject_reports
205 |         self.obfuscate = obfuscate
206 |         self.id = id
207 | 
208 |     @property
209 |     def severity(self):
210 |         return self._severity
211 | 
212 |     @severity.setter
213 |     def severity(self, sev):
214 |         if isinstance(sev, BlockSeverity):
215 |             self._severity = sev
216 |         else:
217 |             self._severity = BlockSeverity(sev)
218 | 
219 |     def _asdict(self):
220 |         """Return a dict version of this object"""
221 |         dictval = {
222 |             "domain": self.domain,
223 |             "severity": str(self.severity),
224 |             "public_comment": self.public_comment,
225 |             "private_comment": self.private_comment,
226 |             "reject_media": self.reject_media,
227 |             "reject_reports": self.reject_reports,
228 |             "obfuscate": self.obfuscate,
229 |         }
230 |         if self.id:
231 |             dictval["id"] = self.id
232 | 
233 |         return dictval
234 | 
235 |     def compare_fields(self, other, fields=None) -> list:
236 |         """Compare two DomainBlocks on specific fields.
237 |         If all the fields are equal, the DomainBlocks are equal.
238 | 
239 |         @returns: a list of the fields that are different
240 |         """
241 |         if not isinstance(other, DomainBlock):
242 |             raise ValueError(f"Cannot compare DomainBlock to {type(other)}:{other}")
243 | 
244 |         if fields is None:
245 |             fields = self.fields
246 | 
247 |         diffs = []
248 |         # Check if all the fields are equal
249 |         for field in self.fields:
250 |             if getattr(self, field) != getattr(other, field):
251 |                 diffs.append(field)
252 |         return diffs
253 | 
254 |     def __eq__(self, other):
255 |         diffs = self.compare_fields(other)
256 |         if len(diffs) == 0:
257 |             return True
258 | 
259 |     def __repr__(self):
260 | 
261 |         return f"<DomainBlock {self._asdict()}>"
262 | 
263 |     def copy(self):
264 |         """Make a copy of this object and return it"""
265 |         retval = DomainBlock(**self._asdict())
266 |         return retval
267 | 
268 |     def update(self, dict):
269 |         """Update my kwargs"""
270 |         for key in dict:
271 |             setattr(self, key, dict[key])
272 | 
273 |     def __iter__(self):
274 |         """Be iterable"""
275 |         keys = self.fields
276 | 
277 |         if getattr(self, "id", False):
278 |             keys.append("id")
279 | 
280 |         for k in keys:
281 |             yield k
282 | 
283 |     def __getitem__(self, k, default=None):
284 |         "Behave like a dict for getting values"
285 |         if k not in self.all_fields:
286 |             raise KeyError(f"Invalid key '{k}'")
287 | 
288 |         return getattr(self, k, default)
289 | 
290 |     def get(self, k, default=None):
291 |         return self.__getitem__(k, default)
292 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import pytest
 5 | 
 6 | sys.path.append(os.path.join(os.path.dirname(__file__), "helpers"))
 7 | 
 8 | 
 9 | def load_data(datafile):
10 |     """Load test data from a fixture datafile"""
11 |     with open(os.path.join(os.path.dirname(__file__), "fixtures", datafile)) as fp:
12 |         return fp.read()
13 | 
14 | 
15 | @pytest.fixture
16 | def data_mastodon_json():
17 |     return load_data("data-mastodon.json")
18 | 
19 | 
20 | @pytest.fixture
21 | def data_rapidblock_json():
22 |     return load_data("data-rapidblock.json")
23 | 
24 | 
25 | @pytest.fixture
26 | def data_suspends_01():
27 |     return load_data("data-suspends-01.csv")
28 | 
29 | 
30 | @pytest.fixture
31 | def data_silences_01():
32 |     return load_data("data-silences-01.csv")
33 | 
34 | 
35 | @pytest.fixture
36 | def data_noop_01():
37 |     return load_data("data-noop-01.csv")
38 | 


--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/fixtures/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/data-mastodon.json:
--------------------------------------------------------------------------------
  1 | [
  2 | 	{
  3 | 		"id": "234",
  4 | 		"domain": "example.org",
  5 | 		"created_at": "2023-01-09T05:17:50.614Z",
  6 | 		"severity": "suspend",
  7 | 		"reject_media": true,
  8 | 		"reject_reports": true,
  9 | 		"private_comment": "A private comment",
 10 | 		"public_comment": "A public comment",
 11 | 		"obfuscate": true
 12 | 	},
 13 | 	{
 14 | 		"id": "233",
 15 | 		"domain": "example2.org",
 16 | 		"created_at": "2023-01-09T05:09:01.859Z",
 17 | 		"severity": "silence",
 18 | 		"reject_media": true,
 19 | 		"reject_reports": true,
 20 | 		"private_comment": "Another private comment",
 21 | 		"public_comment": "Another public comment",
 22 | 		"obfuscate": true
 23 | 	},
 24 | 	{
 25 | 		"id": "232",
 26 | 		"domain": "example3.org",
 27 | 		"created_at": "2023-01-09T05:08:58.833Z",
 28 | 		"severity": "suspend",
 29 | 		"reject_media": true,
 30 | 		"reject_reports": true,
 31 | 		"private_comment": "More comments? What is this?",
 32 | 		"public_comment": "Yes we love to comment",
 33 | 		"obfuscate": true
 34 | 	},
 35 | 	{
 36 | 		"id": "231",
 37 | 		"domain": "example4.org",
 38 | 		"created_at": "2023-01-09T05:04:01.856Z",
 39 | 		"severity": "noop",
 40 | 		"reject_media": true,
 41 | 		"reject_reports": true,
 42 | 		"private_comment": "I cannot believe all the comments",
 43 | 		"public_comment": "Look how many comments we can fit in here",
 44 | 		"obfuscate": true
 45 | 	},
 46 | 	{
 47 | 		"id": "230",
 48 | 		"domain": "example5.org",
 49 | 		"created_at": "2023-01-08T21:37:22.665Z",
 50 | 		"severity": "suspend",
 51 | 		"reject_media": false,
 52 | 		"reject_reports": false,
 53 | 		"private_comment": "",
 54 | 		"public_comment": "lack of moderation",
 55 | 		"obfuscate": false
 56 | 	},
 57 | 	{
 58 | 		"id": "2308",
 59 | 		"domain": "example6.org",
 60 | 		"created_at": "2023-01-06T08:36:53.989Z",
 61 | 		"severity": "suspend",
 62 | 		"reject_media": false,
 63 | 		"reject_reports": false,
 64 | 		"private_comment": "",
 65 | 		"public_comment": "anti-trans bigotry",
 66 | 		"obfuscate": false
 67 | 	},
 68 | 	{
 69 | 		"id": "2306",
 70 | 		"domain": "example7.org",
 71 | 		"created_at": "2023-01-04T08:14:05.381Z",
 72 | 		"severity": "suspend",
 73 | 		"reject_media": false,
 74 | 		"reject_reports": false,
 75 | 		"private_comment": "",
 76 | 		"public_comment": "lack of moderation",
 77 | 		"obfuscate": false
 78 | 	},
 79 | 	{
 80 | 		"id": "2305",
 81 | 		"domain": "example8.org",
 82 | 		"created_at": "2023-01-04T08:13:48.891Z",
 83 | 		"severity": "suspend",
 84 | 		"reject_media": false,
 85 | 		"reject_reports": false,
 86 | 		"private_comment": "freeze peach",
 87 | 		"public_comment": "lack of moderation, conspiracy weirdness",
 88 | 		"obfuscate": false
 89 | 	},
 90 | 	{
 91 | 		"id": "2301",
 92 | 		"domain": "example9.org",
 93 | 		"created_at": "2023-01-04T08:11:32.904Z",
 94 | 		"severity": "silence",
 95 | 		"reject_media": false,
 96 | 		"reject_reports": false,
 97 | 		"private_comment": "",
 98 | 		"public_comment": "alt-right conspiracies",
 99 | 		"obfuscate": false
100 | 	},
101 | 	{
102 | 		"id": "453",
103 | 		"domain": "example15.org",
104 | 		"created_at": "2022-12-05T08:26:59.920Z",
105 | 		"severity": "suspend",
106 | 		"reject_media": true,
107 | 		"reject_reports": true,
108 | 		"private_comment": "cryptocurrency",
109 | 		"public_comment": "cryptocurrency",
110 | 		"obfuscate": true
111 | 	}
112 | ]
113 | 


--------------------------------------------------------------------------------
/tests/fixtures/data-noop-01.csv:
--------------------------------------------------------------------------------
 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
 2 | "public-comment.example.org","noop","This is a public comment","This is a private comment",FALSE,FALSE,FALSE
 3 | "private-comment.example.org","noop",,"This is a private comment",FALSE,FALSE,FALSE
 4 | "diff-comment.example.org","noop","Noop public comment","Noop private comment",FALSE,FALSE,FALSE
 5 | "2diff-comment.example.org","noop","Public duplicate","Private duplicate",FALSE,FALSE,FALSE
 6 | "qoto.org","noop",,,FALSE,FALSE,FALSE
 7 | "sealion.club","noop",,,FALSE,FALSE,FALSE
 8 | "develop.gab.com","noop",,,FALSE,FALSE,FALSE
 9 | "gab.ai","noop",,,FALSE,FALSE,FALSE
10 | "gab.sleeck.eu","noop",,,FALSE,FALSE,FALSE
11 | "gab.com","noop",,,FALSE,FALSE,FALSE
12 | "kiwifarms.is","noop",,,FALSE,FALSE,FALSE
13 | "kiwifarms.net","noop",,,FALSE,FALSE,FALSE
14 | "gabfed.com","noop",,,FALSE,FALSE,FALSE
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/data-silences-01.csv:
--------------------------------------------------------------------------------
 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
 2 | "public-comment.example.org","silence","This is a public comment","This is a private comment",FALSE,FALSE,FALSE
 3 | "private-comment.example.org","silence",,"This is a private comment",FALSE,FALSE,FALSE
 4 | "diff-comment.example.org","silence","Silence public comment","Silence private comment",FALSE,FALSE,FALSE
 5 | "2diff-comment.example.org","silence","Public duplicate","Private duplicate",FALSE,FALSE,FALSE
 6 | "qoto.org","silence",,,FALSE,FALSE,FALSE
 7 | "sealion.club","silence",,,FALSE,FALSE,FALSE
 8 | "develop.gab.com","silence",,,FALSE,FALSE,FALSE
 9 | "gab.ai","silence",,,FALSE,FALSE,FALSE
10 | "gab.sleeck.eu","silence",,,FALSE,FALSE,FALSE
11 | "gab.com","silence",,,FALSE,FALSE,FALSE
12 | "kiwifarms.is","silence",,,FALSE,FALSE,FALSE
13 | "kiwifarms.net","silence",,,FALSE,FALSE,FALSE
14 | "gabfed.com","silence",,,FALSE,FALSE,FALSE
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/data-suspends-01.csv:
--------------------------------------------------------------------------------
 1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
 2 | "public-comment.example.org","suspend","This is a public comment","This is a private comment",TRUE,TRUE,TRUE
 3 | "private-comment.example.org","suspend",,"This is a private comment",TRUE,TRUE,TRUE
 4 | "diff-comment.example.org","suspend","Suspend public comment","Suspend private comment",TRUE,TRUE,TRUE
 5 | "2diff-comment.example.org","suspend","Suspend comment 1","Suspend private 1",TRUE,TRUE,TRUE
 6 | "qoto.org","suspend",,,TRUE,TRUE,TRUE
 7 | "sealion.club","suspend",,,TRUE,TRUE,TRUE
 8 | "develop.gab.com","suspend",,,TRUE,TRUE,TRUE
 9 | "gab.ai","suspend",,,TRUE,TRUE,TRUE
10 | "gab.sleeck.eu","suspend",,,TRUE,TRUE,TRUE
11 | "gab.com","suspend",,,TRUE,TRUE,TRUE
12 | "kiwifarms.is","suspend",,,TRUE,TRUE,TRUE
13 | "kiwifarms.net","suspend",,,TRUE,TRUE,TRUE
14 | "gabfed.com","suspend",,,TRUE,TRUE,TRUE
15 | 


--------------------------------------------------------------------------------
/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/helpers/__init__.py


--------------------------------------------------------------------------------
/tests/helpers/util.py:
--------------------------------------------------------------------------------
 1 | """ Utility functions for tests
 2 | """
 3 | from fediblockhole import setup_argparse, augment_args
 4 | 
 5 | 
 6 | def shim_argparse(testargv: list = [], tomldata: str = None):
 7 |     """Helper function to parse test args
 8 |     """
 9 |     ap = setup_argparse()
10 |     args = ap.parse_args(testargv)
11 |     if tomldata is not None:
12 |         args = augment_args(args, tomldata)
13 |     return args
14 | 


--------------------------------------------------------------------------------
/tests/test_allowlist.py:
--------------------------------------------------------------------------------
 1 | """ Test allowlists
 2 | """
 3 | 
 4 | import pytest
 5 | from util import shim_argparse
 6 | 
 7 | from fediblockhole import apply_allowlists
 8 | from fediblockhole.blocklists import Blocklist
 9 | from fediblockhole.const import DomainBlock
10 | 
11 | 
12 | def test_cmdline_allow_removes_domain():
13 |     """Test that -A <domain> removes entries from merged"""
14 |     conf = shim_argparse(["-A", "removeme.org"])
15 | 
16 |     merged = Blocklist(
17 |         "test_allowlist.merged",
18 |         {
19 |             "example.org": DomainBlock("example.org"),
20 |             "example2.org": DomainBlock("example2.org"),
21 |             "removeme.org": DomainBlock("removeme.org"),
22 |             "keepblockingme.org": DomainBlock("keepblockingme.org"),
23 |         },
24 |     )
25 | 
26 |     merged = apply_allowlists(merged, conf, {})
27 | 
28 |     with pytest.raises(KeyError):
29 |         merged["removeme.org"]
30 | 
31 | 
32 | def test_allowlist_removes_domain():
33 |     """Test that an item in an allowlist removes entries from merged"""
34 |     conf = shim_argparse()
35 | 
36 |     merged = Blocklist(
37 |         "test_allowlist.merged",
38 |         {
39 |             "example.org": DomainBlock("example.org"),
40 |             "example2.org": DomainBlock("example2.org"),
41 |             "removeme.org": DomainBlock("removeme.org"),
42 |             "keepblockingme.org": DomainBlock("keepblockingme.org"),
43 |         },
44 |     )
45 | 
46 |     allowlists = [
47 |         Blocklist(
48 |             "test_allowlist",
49 |             {
50 |                 "removeme.org": DomainBlock("removeme.org", "noop"),
51 |             },
52 |         )
53 |     ]
54 | 
55 |     merged = apply_allowlists(merged, conf, allowlists)
56 | 
57 |     with pytest.raises(KeyError):
58 |         merged["removeme.org"]
59 | 
60 | 
61 | def test_allowlist_removes_tld():
62 |     """Test that an item in an allowlist removes entries from merged"""
63 |     conf = shim_argparse()
64 | 
65 |     merged = Blocklist(
66 |         "test_allowlist.merged",
67 |         {
68 |             ".cf": DomainBlock(".cf"),
69 |             "example.org": DomainBlock("example.org"),
70 |             ".tk": DomainBlock(".tk"),
71 |             "keepblockingme.org": DomainBlock("keepblockingme.org"),
72 |         },
73 |     )
74 | 
75 |     allowlists = [
76 |         Blocklist(
77 |             "test_allowlist.list1",
78 |             {
79 |                 ".cf": DomainBlock(".cf", "noop"),
80 |                 ".tk": DomainBlock(".tk", "noop"),
81 |             },
82 |         )
83 |     ]
84 | 
85 |     merged = apply_allowlists(merged, conf, allowlists)
86 | 
87 |     with pytest.raises(KeyError):
88 |         merged[".cf"]
89 | 
90 |     with pytest.raises(KeyError):
91 |         merged[".tk"]
92 | 


--------------------------------------------------------------------------------
/tests/test_blockseverity.py:
--------------------------------------------------------------------------------
 1 | from fediblockhole.const import BlockSeverity
 2 | 
 3 | 
 4 | def test_severity_eq():
 5 | 
 6 |     s1 = BlockSeverity("suspend")
 7 |     s2 = BlockSeverity("suspend")
 8 | 
 9 |     assert s1 == s2
10 | 
11 |     s3 = BlockSeverity("silence")
12 |     s4 = BlockSeverity("silence")
13 | 
14 |     assert s3 == s4
15 | 
16 |     s5 = BlockSeverity("noop")
17 |     s6 = BlockSeverity("noop")
18 | 
19 |     assert s5 == s6
20 | 
21 | 
22 | def test_severity_ne():
23 |     s1 = BlockSeverity("noop")
24 |     s2 = BlockSeverity("silence")
25 |     s3 = BlockSeverity("suspend")
26 | 
27 |     assert s1 != s2
28 |     assert s2 != s3
29 |     assert s1 != s3
30 | 
31 | 
32 | def test_severity_lt():
33 |     s1 = BlockSeverity("noop")
34 |     s2 = BlockSeverity("silence")
35 |     s3 = BlockSeverity("suspend")
36 | 
37 |     assert s1 < s2
38 |     assert s2 < s3
39 |     assert s1 < s3
40 | 
41 | 
42 | def test_severity_gt():
43 |     s1 = BlockSeverity("noop")
44 |     s2 = BlockSeverity("silence")
45 |     s3 = BlockSeverity("suspend")
46 | 
47 |     assert s2 > s1
48 |     assert s3 > s2
49 |     assert s3 > s1
50 | 
51 | 
52 | def test_severity_le():
53 |     s1 = BlockSeverity("noop")
54 |     s2 = BlockSeverity("silence")
55 |     s2a = BlockSeverity("silence")
56 |     s3 = BlockSeverity("suspend")
57 | 
58 |     assert s1 <= s2
59 |     assert s2a <= s2
60 |     assert s2 <= s3
61 |     assert s1 <= s3
62 | 
63 | 
64 | def test_severity_ge():
65 |     s1 = BlockSeverity("noop")
66 |     s2 = BlockSeverity("silence")
67 |     s2a = BlockSeverity("silence")
68 |     s3 = BlockSeverity("suspend")
69 | 
70 |     assert s2 >= s1
71 |     assert s2a >= s1
72 |     assert s3 >= s2
73 |     assert s3 >= s1
74 | 


--------------------------------------------------------------------------------
/tests/test_cmdline.py:
--------------------------------------------------------------------------------
 1 | """Test the commandline defined parameters correctly
 2 | """
 3 | 
 4 | from fediblockhole import setup_argparse
 5 | 
 6 | 
 7 | def test_cmdline_no_configfile():
 8 |     """Test bare command with no configfile"""
 9 |     ap = setup_argparse()
10 |     args = ap.parse_args([])
11 | 
12 |     assert args.config == "/etc/default/fediblockhole.conf.toml"
13 |     assert args.mergeplan is None
14 |     assert args.blocklist_savefile is None
15 |     assert args.save_intermediate is False
16 |     assert args.savedir is None
17 |     assert args.import_fields is None
18 |     assert args.export_fields is None
19 | 
20 |     assert args.no_fetch_url is False
21 |     assert args.no_fetch_instance is False
22 |     assert args.no_push_instance is False
23 |     assert args.dryrun is False
24 | 
25 |     assert args.loglevel is None
26 | 
27 | 
28 | def test_cmdline_mergeplan_min():
29 |     """Test setting mergeplan min"""
30 |     ap = setup_argparse()
31 |     args = ap.parse_args(["-m", "min"])
32 | 
33 |     assert args.mergeplan == "min"
34 | 
35 | 
36 | def test_set_allow_domain():
37 |     """Set a single allow domain on commandline"""
38 |     ap = setup_argparse()
39 |     args = ap.parse_args(["-A", "example.org"])
40 | 
41 |     assert args.allow_domains == ["example.org"]
42 | 
43 | 
44 | def test_set_multiple_allow_domains():
45 |     """Set multiple allow domains on commandline"""
46 |     ap = setup_argparse()
47 |     args = ap.parse_args(
48 |         ["-A", "example.org", "-A", "example2.org", "-A", "example3.org"]
49 |     )
50 | 
51 |     assert args.allow_domains == ["example.org", "example2.org", "example3.org"]
52 | 


--------------------------------------------------------------------------------
/tests/test_configfile.py:
--------------------------------------------------------------------------------
  1 | """Test the config file is loading parameters correctly
  2 | """
  3 | 
  4 | from textwrap import dedent
  5 | 
  6 | from util import shim_argparse
  7 | 
  8 | from fediblockhole import augment_args, setup_argparse
  9 | 
 10 | 
 11 | def test_parse_tomldata():
 12 |     tomldata = """
 13 | # Test TOML config for FediBlockHole
 14 | 
 15 | blocklist_instance_sources = []
 16 | 
 17 | blocklist_url_sources = []
 18 | 
 19 | save_intermediate = true
 20 | 
 21 | import_fields = ['public_comment']
 22 | """
 23 |     ap = setup_argparse()
 24 |     args = ap.parse_args([])
 25 |     args = augment_args(args, tomldata)
 26 | 
 27 |     assert args.blocklist_instance_sources == []
 28 |     assert args.blocklist_url_sources == []
 29 |     assert args.save_intermediate is True
 30 |     assert args.import_fields == ["public_comment"]
 31 | 
 32 | 
 33 | def test_set_mergeplan_max():
 34 |     tomldata = """mergeplan = 'max'
 35 |     """
 36 |     args = shim_argparse([], tomldata)
 37 | 
 38 |     assert args.mergeplan == "max"
 39 | 
 40 | 
 41 | def test_set_mergeplan_min():
 42 |     tomldata = """mergeplan = 'min'
 43 |     """
 44 |     args = shim_argparse([], tomldata)
 45 | 
 46 |     assert args.mergeplan == "min"
 47 | 
 48 | 
 49 | def test_set_allowlists():
 50 |     tomldata = """# Comment on config
 51 | allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ]
 52 | """
 53 |     args = shim_argparse([], tomldata)
 54 | 
 55 |     assert args.mergeplan == "max"
 56 |     assert args.allowlist_url_sources == [
 57 |         {
 58 |             "url": "file:///path/to/allowlist",
 59 |             "format": "csv",
 60 |         }
 61 |     ]
 62 | 
 63 | 
 64 | def test_set_merge_thresold_default():
 65 |     tomldata = """
 66 | """
 67 |     args = shim_argparse([], tomldata)
 68 | 
 69 |     assert args.mergeplan == "max"
 70 |     assert args.merge_threshold_type == "count"
 71 | 
 72 | 
 73 | def test_set_merge_thresold_count():
 74 |     tomldata = """# Add a merge threshold
 75 | merge_threshold_type = 'count'
 76 | merge_threshold = 2
 77 | """
 78 |     args = shim_argparse([], tomldata)
 79 | 
 80 |     assert args.mergeplan == "max"
 81 |     assert args.merge_threshold_type == "count"
 82 |     assert args.merge_threshold == 2
 83 | 
 84 | 
 85 | def test_set_merge_thresold_pct():
 86 |     tomldata = """# Add a merge threshold
 87 | merge_threshold_type = 'pct'
 88 | merge_threshold = 35
 89 | """
 90 |     args = shim_argparse([], tomldata)
 91 | 
 92 |     assert args.mergeplan == "max"
 93 |     assert args.merge_threshold_type == "pct"
 94 |     assert args.merge_threshold == 35
 95 | 
 96 | 
 97 | def test_destination_token_from_environment(monkeypatch):
 98 |     tomldata = dedent(
 99 |         """\
100 |     blocklist_instance_destinations = [
101 |       { domain='example.com', token='raw-token'},
102 |       { domain='example2.com', token_env_var='TOKEN_ENV_VAR' },
103 |       { domain='env-token.com' },
104 |       { domain='www.env-token.com' },
105 |     ]
106 |     """
107 |     )
108 | 
109 |     monkeypatch.setenv("TOKEN_ENV_VAR", "env-token")
110 |     monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token")
111 |     monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token")
112 | 
113 |     args = shim_argparse([], tomldata)
114 | 
115 |     assert args.blocklist_instance_destinations[0]["token"] == "raw-token"
116 |     assert args.blocklist_instance_destinations[1]["token"] == "env-token"
117 |     assert args.blocklist_instance_destinations[2]["token"] == "env-token"
118 |     assert args.blocklist_instance_destinations[3]["token"] == "www-env-token"
119 | 
120 | 
121 | def test_instance_sources_token_from_environment(monkeypatch):
122 |     tomldata = dedent(
123 |         """\
124 |     blocklist_instance_sources = [
125 |       { domain='example.com', token='raw-token'},
126 |       { domain='example2.com', token_env_var='TOKEN_ENV_VAR' },
127 |       { domain='env-token.com' },
128 |       { domain='www.env-token.com' },
129 |     ]
130 |     """
131 |     )
132 | 
133 |     monkeypatch.setenv("TOKEN_ENV_VAR", "env-token")
134 |     monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token")
135 |     monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token")
136 | 
137 |     args = shim_argparse([], tomldata)
138 | 
139 |     assert args.blocklist_instance_sources[0]["token"] == "raw-token"
140 |     assert args.blocklist_instance_sources[1]["token"] == "env-token"
141 |     assert args.blocklist_instance_sources[2]["token"] == "env-token"
142 |     assert args.blocklist_instance_sources[3]["token"] == "www-env-token"
143 | 


--------------------------------------------------------------------------------
/tests/test_domainblock.py:
--------------------------------------------------------------------------------
 1 | """Test the DomainBlock structure
 2 | """
 3 | 
 4 | import pytest
 5 | 
 6 | from fediblockhole.const import BlockSeverity, DomainBlock, SeverityLevel
 7 | 
 8 | 
 9 | def test_blocksev_blankstring():
10 |     a = BlockSeverity("")
11 |     assert a.level == SeverityLevel.NONE
12 | 
13 | 
14 | def test_blocksev_string_noop():
15 |     a = BlockSeverity("noop")
16 |     assert a.level == SeverityLevel.NONE
17 | 
18 | 
19 | def test_blocksev_none():
20 |     a = BlockSeverity(None)
21 |     assert a.level == SeverityLevel.NONE
22 | 
23 | 
24 | def test_empty_domainblock_fails():
25 |     with pytest.raises(TypeError):
26 |         a = DomainBlock()  # noqa
27 | 
28 | 
29 | def test_default_suspend():
30 |     a = DomainBlock("example.org")
31 |     assert a.domain == "example.org"
32 |     assert a.severity.level == SeverityLevel.SUSPEND
33 | 
34 | 
35 | def test_severity_suspend():
36 |     a = DomainBlock("example.org", "suspend")
37 |     assert a.domain == "example.org"
38 |     assert a.severity.level == SeverityLevel.SUSPEND
39 | 
40 | 
41 | def test_severity_silence():
42 |     a = DomainBlock("example.org", "silence")
43 |     assert a.domain == "example.org"
44 |     assert a.severity.level == SeverityLevel.SILENCE
45 | 
46 | 
47 | def test_severity_noop_string():
48 |     a = DomainBlock("example.org", "noop")
49 |     assert a.domain == "example.org"
50 |     assert a.severity.level == SeverityLevel.NONE
51 | 
52 | 
53 | def test_severity_none():
54 |     a = DomainBlock("example.org", None)
55 |     assert a.domain == "example.org"
56 |     assert a.severity.level == SeverityLevel.NONE
57 | 
58 | 
59 | def test_compare_equal_blocks():
60 | 
61 |     a = DomainBlock("example1.org", "suspend")
62 |     b = DomainBlock("example1.org", "suspend")
63 | 
64 |     assert a == b
65 | 
66 | 
67 | def test_compare_diff_domains():
68 | 
69 |     a = DomainBlock("example1.org", "suspend")
70 |     b = DomainBlock("example2.org", "suspend")
71 | 
72 |     assert a != b
73 | 
74 | 
75 | def test_compare_diff_sevs():
76 | 
77 |     a = DomainBlock("example1.org", "suspend")
78 |     b = DomainBlock("example1.org", "silence")
79 | 
80 |     assert a != b
81 | 
82 | 
83 | def test_compare_diff_sevs_2():
84 | 
85 |     a = DomainBlock("example1.org", "suspend")
86 |     b = DomainBlock("example1.org", "noop")
87 | 
88 |     assert a != b
89 | 


--------------------------------------------------------------------------------
/tests/test_merge_comments.py:
--------------------------------------------------------------------------------
 1 | """ Test merging of comments
 2 | """
 3 | 
 4 | from fediblockhole import merge_comments
 5 | 
 6 | 
 7 | def test_merge_blank_comments():
 8 | 
 9 |     oldcomment = ""
10 |     newcomment = ""
11 | 
12 |     merged_comment = merge_comments(oldcomment, newcomment)
13 | 
14 |     assert merged_comment == ""
15 | 
16 | 
17 | def test_merge_None_comments():
18 | 
19 |     oldcomment = None
20 |     newcomment = None
21 | 
22 |     merged_comment = merge_comments(oldcomment, newcomment)
23 | 
24 |     assert merged_comment == ""
25 | 
26 | 
27 | def test_merge_oldstr_newNone():
28 | 
29 |     oldcomment = "fred, bibble"
30 |     newcomment = None
31 | 
32 |     merged_comment = merge_comments(oldcomment, newcomment)
33 | 
34 |     assert merged_comment == "fred, bibble"
35 | 
36 | 
37 | def test_merge_oldempty_newcomment():
38 | 
39 |     oldcomment = ""
40 |     newcomment = "fred, bibble"
41 | 
42 |     merged_comment = merge_comments(oldcomment, newcomment)
43 | 
44 |     assert merged_comment == "fred, bibble"
45 | 
46 | 
47 | def test_merge_oldNone_newcomment():
48 | 
49 |     oldcomment = None
50 |     newcomment = "fred, bibble"
51 | 
52 |     merged_comment = merge_comments(oldcomment, newcomment)
53 | 
54 |     assert merged_comment == "fred, bibble"
55 | 
56 | 
57 | def test_merge_two_different():
58 | 
59 |     oldcomment = "happy, medium, spinning"
60 |     newcomment = "fred, bibble"
61 | 
62 |     merged_comment = merge_comments(oldcomment, newcomment)
63 | 
64 |     assert merged_comment == "happy, medium, spinning, fred, bibble"
65 | 
66 | 
67 | def test_merge_overlaps():
68 | 
69 |     oldcomment = "happy, medium, spinning"
70 |     newcomment = "fred, medium, bibble, spinning"
71 | 
72 |     merged_comment = merge_comments(oldcomment, newcomment)
73 | 
74 |     assert merged_comment == "happy, medium, spinning, fred, bibble"
75 | 


--------------------------------------------------------------------------------
/tests/test_merge_thresholds.py:
--------------------------------------------------------------------------------
  1 | """Test merge with thresholds
  2 | """
  3 | 
  4 | from fediblockhole import merge_blocklists
  5 | from fediblockhole.blocklists import Blocklist, parse_blocklist
  6 | from fediblockhole.const import DomainBlock
  7 | 
  8 | import_fields = [
  9 |     "domain",
 10 |     "severity",
 11 |     "public_comment",
 12 |     "private_comment",
 13 |     "reject_media",
 14 |     "reject_reports",
 15 |     "obfuscate",
 16 | ]
 17 | 
 18 | 
 19 | # FIXME: Deprecated data loader. Now using fixtures.
 20 | def __load_test_blocklist_data(datafiles):
 21 | 
 22 |     blocklists = []
 23 | 
 24 |     for df in datafiles:
 25 |         with open(df) as fp:
 26 |             data = fp.read()
 27 |             bl = parse_blocklist(data, df, "csv", import_fields)
 28 |             blocklists.append(bl)
 29 | 
 30 |     return blocklists
 31 | 
 32 | 
 33 | def test_mergeplan_count_2():
 34 |     """Only merge a block if present in 2 or more lists"""
 35 | 
 36 |     bl_1 = Blocklist(
 37 |         "test01",
 38 |         {
 39 |             "onemention.example.org": DomainBlock(
 40 |                 "onemention.example.org", "suspend", "", "", True, True, True
 41 |             ),
 42 |             "twomention.example.org": DomainBlock(
 43 |                 "twomention.example.org", "suspend", "", "", True, True, True
 44 |             ),
 45 |             "threemention.example.org": DomainBlock(
 46 |                 "threemention.example.org", "suspend", "", "", True, True, True
 47 |             ),
 48 |         },
 49 |     )
 50 | 
 51 |     bl_2 = Blocklist(
 52 |         "test2",
 53 |         {
 54 |             "twomention.example.org": DomainBlock(
 55 |                 "twomention.example.org", "suspend", "", "", True, True, True
 56 |             ),
 57 |             "threemention.example.org": DomainBlock(
 58 |                 "threemention.example.org", "suspend", "", "", True, True, True
 59 |             ),
 60 |         },
 61 |     )
 62 | 
 63 |     bl_3 = Blocklist(
 64 |         "test3",
 65 |         {
 66 |             "threemention.example.org": DomainBlock(  # noqa
 67 |                 "threemention.example.org", "suspend", "", "", True, True, True
 68 |             ),
 69 |             "threemention.example.org": DomainBlock(  # noqa
 70 |                 "threemention.example.org", "suspend", "", "", True, True, True
 71 |             ),
 72 |         },
 73 |     )
 74 | 
 75 |     ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=2)
 76 | 
 77 |     assert "onemention.example.org" not in ml
 78 |     assert "twomention.example.org" in ml
 79 |     assert "threemention.example.org" in ml
 80 | 
 81 | 
 82 | def test_mergeplan_count_3():
 83 |     """Only merge a block if present in 3 or more lists"""
 84 | 
 85 |     bl_1 = Blocklist(
 86 |         "test01",
 87 |         {
 88 |             "onemention.example.org": DomainBlock(
 89 |                 "onemention.example.org", "suspend", "", "", True, True, True
 90 |             ),
 91 |             "twomention.example.org": DomainBlock(
 92 |                 "twomention.example.org", "suspend", "", "", True, True, True
 93 |             ),
 94 |             "threemention.example.org": DomainBlock(
 95 |                 "threemention.example.org", "suspend", "", "", True, True, True
 96 |             ),
 97 |         },
 98 |     )
 99 | 
100 |     bl_2 = Blocklist(
101 |         "test2",
102 |         {
103 |             "twomention.example.org": DomainBlock(
104 |                 "twomention.example.org", "suspend", "", "", True, True, True
105 |             ),
106 |             "threemention.example.org": DomainBlock(
107 |                 "threemention.example.org", "suspend", "", "", True, True, True
108 |             ),
109 |         },
110 |     )
111 | 
112 |     bl_3 = Blocklist(
113 |         "test3",
114 |         {
115 |             "threemention.example.org": DomainBlock(  # noqa
116 |                 "threemention.example.org", "suspend", "", "", True, True, True
117 |             ),
118 |             "threemention.example.org": DomainBlock(  # noqa
119 |                 "threemention.example.org", "suspend", "", "", True, True, True
120 |             ),
121 |         },
122 |     )
123 | 
124 |     ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=3)
125 | 
126 |     assert "onemention.example.org" not in ml
127 |     assert "twomention.example.org" not in ml
128 |     assert "threemention.example.org" in ml
129 | 
130 | 
131 | def test_mergeplan_pct_30():
132 |     """Only merge a block if present in 2 or more lists"""
133 | 
134 |     bl_1 = Blocklist(
135 |         "test01",
136 |         {
137 |             "onemention.example.org": DomainBlock(
138 |                 "onemention.example.org", "suspend", "", "", True, True, True
139 |             ),
140 |             "twomention.example.org": DomainBlock(
141 |                 "twomention.example.org", "suspend", "", "", True, True, True
142 |             ),
143 |             "fourmention.example.org": DomainBlock(
144 |                 "fourmention.example.org", "suspend", "", "", True, True, True
145 |             ),
146 |         },
147 |     )
148 | 
149 |     bl_2 = Blocklist(
150 |         "test2",
151 |         {
152 |             "twomention.example.org": DomainBlock(
153 |                 "twomention.example.org", "suspend", "", "", True, True, True
154 |             ),
155 |             "threemention.example.org": DomainBlock(
156 |                 "threemention.example.org", "suspend", "", "", True, True, True
157 |             ),
158 |             "fourmention.example.org": DomainBlock(
159 |                 "fourmention.example.org", "suspend", "", "", True, True, True
160 |             ),
161 |         },
162 |     )
163 | 
164 |     bl_3 = Blocklist(
165 |         "test3",
166 |         {
167 |             "threemention.example.org": DomainBlock(
168 |                 "threemention.example.org", "suspend", "", "", True, True, True
169 |             ),
170 |             "fourmention.example.org": DomainBlock(
171 |                 "fourmention.example.org", "suspend", "", "", True, True, True
172 |             ),
173 |         },
174 |     )
175 | 
176 |     bl_4 = Blocklist(
177 |         "test4",
178 |         {
179 |             "threemention.example.org": DomainBlock(
180 |                 "threemention.example.org", "suspend", "", "", True, True, True
181 |             ),
182 |             "fourmention.example.org": DomainBlock(
183 |                 "fourmention.example.org", "suspend", "", "", True, True, True
184 |             ),
185 |         },
186 |     )
187 | 
188 |     ml = merge_blocklists(
189 |         [bl_1, bl_2, bl_3, bl_4], "max", threshold=30, threshold_type="pct"
190 |     )
191 | 
192 |     assert "onemention.example.org" not in ml
193 |     assert "twomention.example.org" in ml
194 |     assert "threemention.example.org" in ml
195 |     assert "fourmention.example.org" in ml
196 | 
197 | 
198 | def test_mergeplan_pct_55():
199 |     """Only merge a block if present in 2 or more lists"""
200 | 
201 |     bl_1 = Blocklist(
202 |         "test01",
203 |         {
204 |             "onemention.example.org": DomainBlock(
205 |                 "onemention.example.org", "suspend", "", "", True, True, True
206 |             ),
207 |             "twomention.example.org": DomainBlock(
208 |                 "twomention.example.org", "suspend", "", "", True, True, True
209 |             ),
210 |             "fourmention.example.org": DomainBlock(
211 |                 "fourmention.example.org", "suspend", "", "", True, True, True
212 |             ),
213 |         },
214 |     )
215 | 
216 |     bl_2 = Blocklist(
217 |         "test2",
218 |         {
219 |             "twomention.example.org": DomainBlock(
220 |                 "twomention.example.org", "suspend", "", "", True, True, True
221 |             ),
222 |             "threemention.example.org": DomainBlock(
223 |                 "threemention.example.org", "suspend", "", "", True, True, True
224 |             ),
225 |             "fourmention.example.org": DomainBlock(
226 |                 "fourmention.example.org", "suspend", "", "", True, True, True
227 |             ),
228 |         },
229 |     )
230 | 
231 |     bl_3 = Blocklist(
232 |         "test3",
233 |         {
234 |             "threemention.example.org": DomainBlock(
235 |                 "threemention.example.org", "suspend", "", "", True, True, True
236 |             ),
237 |             "fourmention.example.org": DomainBlock(
238 |                 "fourmention.example.org", "suspend", "", "", True, True, True
239 |             ),
240 |         },
241 |     )
242 | 
243 |     bl_4 = Blocklist(
244 |         "test4",
245 |         {
246 |             "threemention.example.org": DomainBlock(
247 |                 "threemention.example.org", "suspend", "", "", True, True, True
248 |             ),
249 |             "fourmention.example.org": DomainBlock(
250 |                 "fourmention.example.org", "suspend", "", "", True, True, True
251 |             ),
252 |         },
253 |     )
254 | 
255 |     ml = merge_blocklists(
256 |         [bl_1, bl_2, bl_3, bl_4], "max", threshold=55, threshold_type="pct"
257 |     )
258 | 
259 |     assert "onemention.example.org" not in ml
260 |     assert "twomention.example.org" not in ml
261 |     assert "threemention.example.org" in ml
262 |     assert "fourmention.example.org" in ml
263 | 


--------------------------------------------------------------------------------
/tests/test_mergeplan.py:
--------------------------------------------------------------------------------
  1 | """Various mergeplan tests
  2 | """
  3 | 
  4 | from fediblockhole import apply_mergeplan, merge_blocklists, merge_comments
  5 | from fediblockhole.blocklists import parse_blocklist
  6 | from fediblockhole.const import DomainBlock, SeverityLevel
  7 | 
  8 | import_fields = [
  9 |     "domain",
 10 |     "severity",
 11 |     "public_comment",
 12 |     "private_comment",
 13 |     "reject_media",
 14 |     "reject_reports",
 15 |     "obfuscate",
 16 | ]
 17 | 
 18 | 
 19 | def load_test_blocklist_data(datafiles):
 20 | 
 21 |     blocklists = []
 22 | 
 23 |     for data in datafiles:
 24 |         bl = parse_blocklist(data, "pytest", "csv", import_fields)
 25 |         blocklists.append(bl)
 26 | 
 27 |     return blocklists
 28 | 
 29 | 
 30 | def test_mergeplan_max(data_suspends_01, data_silences_01):
 31 |     """Test 'max' mergeplan"""
 32 |     blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
 33 |     bl = merge_blocklists(blocklists, "max")
 34 |     assert len(bl) == 13
 35 | 
 36 |     for key in bl:
 37 |         assert bl[key].severity.level == SeverityLevel.SUSPEND
 38 | 
 39 | 
 40 | def test_mergeplan_min(data_suspends_01, data_silences_01):
 41 |     """Test 'max' mergeplan"""
 42 |     blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
 43 | 
 44 |     bl = merge_blocklists(blocklists, "min")
 45 |     assert len(bl) == 13
 46 | 
 47 |     for key in bl:
 48 |         assert bl[key].severity.level == SeverityLevel.SILENCE
 49 | 
 50 | 
 51 | def test_mergeplan_default(data_suspends_01, data_silences_01):
 52 |     """Default mergeplan is max, so see if it's chosen"""
 53 |     blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
 54 | 
 55 |     bl = merge_blocklists(blocklists)
 56 |     assert len(bl) == 13
 57 | 
 58 |     for key in bl:
 59 |         assert bl[key].severity.level == SeverityLevel.SUSPEND
 60 | 
 61 | 
 62 | def test_mergeplan_3_max(data_suspends_01, data_silences_01, data_noop_01):
 63 |     """3 datafiles and mergeplan of 'max'"""
 64 |     blocklists = load_test_blocklist_data(
 65 |         [data_suspends_01, data_silences_01, data_noop_01]
 66 |     )
 67 | 
 68 |     bl = merge_blocklists(blocklists, "max")
 69 |     assert len(bl) == 13
 70 | 
 71 |     for key in bl:
 72 |         assert bl[key].severity.level == SeverityLevel.SUSPEND
 73 |         assert bl[key].reject_media is True
 74 |         assert bl[key].reject_reports is True
 75 |         assert bl[key].obfuscate is True
 76 | 
 77 | 
 78 | def test_mergeplan_3_min(data_suspends_01, data_silences_01, data_noop_01):
 79 |     """3 datafiles and mergeplan of 'min'"""
 80 |     blocklists = load_test_blocklist_data(
 81 |         [data_suspends_01, data_silences_01, data_noop_01]
 82 |     )
 83 | 
 84 |     bl = merge_blocklists(blocklists, "min")
 85 |     assert len(bl) == 13
 86 | 
 87 |     for key in bl:
 88 |         assert bl[key].severity.level == SeverityLevel.NONE
 89 |         assert bl[key].reject_media is False
 90 |         assert bl[key].reject_reports is False
 91 |         assert bl[key].obfuscate is False
 92 | 
 93 | 
 94 | def test_mergeplan_noop_v_silence_max(data_silences_01, data_noop_01):
 95 |     """Mergeplan of max should choose silence over noop"""
 96 |     blocklists = load_test_blocklist_data([data_silences_01, data_noop_01])
 97 | 
 98 |     bl = merge_blocklists(blocklists, "max")
 99 |     assert len(bl) == 13
100 | 
101 |     for key in bl:
102 |         assert bl[key].severity.level == SeverityLevel.SILENCE
103 | 
104 | 
105 | def test_mergeplan_noop_v_silence_min(data_silences_01, data_noop_01):
106 |     """Mergeplan of min should choose noop over silence"""
107 |     blocklists = load_test_blocklist_data([data_silences_01, data_noop_01])
108 | 
109 |     bl = merge_blocklists(blocklists, "min")
110 |     assert len(bl) == 13
111 | 
112 |     for key in bl:
113 |         assert bl[key].severity.level == SeverityLevel.NONE
114 | 
115 | 
116 | def test_merge_public_comment(data_suspends_01, data_silences_01, data_noop_01):
117 |     blocklists = load_test_blocklist_data(
118 |         [data_suspends_01, data_silences_01, data_noop_01]
119 |     )
120 | 
121 |     bl = merge_blocklists(blocklists, "min")
122 |     assert len(bl) == 13
123 | 
124 |     assert bl["public-comment.example.org"].public_comment == "This is a public comment"
125 | 
126 | 
127 | def test_merge_private_comment(data_suspends_01, data_silences_01, data_noop_01):
128 |     blocklists = load_test_blocklist_data(
129 |         [data_suspends_01, data_silences_01, data_noop_01]
130 |     )
131 | 
132 |     bl = merge_blocklists(blocklists, "min")
133 |     assert len(bl) == 13
134 | 
135 |     assert (
136 |         bl["private-comment.example.org"].private_comment == "This is a private comment"
137 |     )
138 | 
139 | 
140 | def test_merge_public_comments(data_suspends_01, data_silences_01, data_noop_01):
141 |     blocklists = load_test_blocklist_data(
142 |         [data_suspends_01, data_silences_01, data_noop_01]
143 |     )
144 | 
145 |     bl = merge_blocklists(blocklists, "min")
146 |     assert len(bl) == 13
147 | 
148 |     assert (
149 |         bl["diff-comment.example.org"].public_comment
150 |         == "Suspend public comment, Silence public comment, Noop public comment"
151 |     )
152 | 
153 | 
154 | def test_merge_duplicate_comments(data_suspends_01, data_silences_01, data_noop_01):
155 |     """The same comment on multiple sources shouldn't get added"""
156 |     blocklists = load_test_blocklist_data(
157 |         [data_suspends_01, data_silences_01, data_noop_01]
158 |     )
159 | 
160 |     bl = merge_blocklists(blocklists, "min")
161 |     assert len(bl) == 13
162 | 
163 | 
164 | def test_merge_comments_none():
165 | 
166 |     a = None
167 |     b = None
168 | 
169 |     r = merge_comments(a, b)
170 | 
171 |     assert r == ""
172 | 
173 | 
174 | def test_merge_comments_empty():
175 | 
176 |     a = ""
177 |     b = ""
178 | 
179 |     r = merge_comments(a, b)
180 | 
181 |     assert r == ""
182 | 
183 | 
184 | def test_merge_comments_left():
185 | 
186 |     a = "comment to merge"
187 |     b = ""
188 | 
189 |     r = merge_comments(a, b)
190 | 
191 |     assert r == "comment to merge"
192 | 
193 | 
194 | def test_merge_comments_right():
195 | 
196 |     a = ""
197 |     b = "comment to merge"
198 | 
199 |     r = merge_comments(a, b)
200 | 
201 |     assert r == "comment to merge"
202 | 
203 | 
204 | def test_merge_comments_same():
205 | 
206 |     a = "comment to merge"
207 |     b = "comment to merge"
208 | 
209 |     r = merge_comments(a, b)
210 | 
211 |     assert r == "comment to merge"
212 | 
213 | 
214 | def test_merge_comments_diff():
215 | 
216 |     a = "comment A"
217 |     b = "comment B"
218 | 
219 |     r = merge_comments(a, b)
220 | 
221 |     assert r == "comment A, comment B"
222 | 
223 | 
224 | def test_merge_comments_dups():
225 | 
226 |     a = "boring, nazis, lack of moderation, flagged, special"
227 |     b = "spoon, nazis, flagged, lack of moderation, happy, fork"
228 | 
229 |     r = merge_comments(a, b)
230 | 
231 |     assert (
232 |         r == "boring, nazis, lack of moderation, flagged, special, spoon, happy, fork"
233 |     )
234 | 
235 | 
236 | def test_mergeplan_same_min_bools_false():
237 |     """Test merging with mergeplan 'max' and False values doesn't change them"""
238 |     a = DomainBlock("example.org", "noop", "", "", False, False, False)
239 |     b = DomainBlock("example.org", "noop", "", "", False, False, False)
240 | 
241 |     r = apply_mergeplan(a, b, "max")
242 | 
243 |     assert r.reject_media is False
244 |     assert r.reject_reports is False
245 |     assert r.obfuscate is False
246 | 
247 | 
248 | def test_mergeplan_same_min_bools_true():
249 |     """Test merging with mergeplan 'max' and True values doesn't change them"""
250 |     a = DomainBlock("example.org", "noop", "", "", True, False, True)
251 |     b = DomainBlock("example.org", "noop", "", "", True, False, True)
252 | 
253 |     r = apply_mergeplan(a, b, "max")
254 | 
255 |     assert r.reject_media is True
256 |     assert r.reject_reports is False
257 |     assert r.obfuscate is True
258 | 
259 | 
260 | def test_mergeplan_max_bools():
261 |     a = DomainBlock("example.org", "suspend", "", "", True, True, True)
262 |     b = DomainBlock("example.org", "noop", "", "", False, False, False)
263 | 
264 |     r = apply_mergeplan(a, b, "max")
265 | 
266 |     assert r.reject_media is True
267 |     assert r.reject_reports is True
268 |     assert r.obfuscate is True
269 | 


--------------------------------------------------------------------------------
/tests/test_parser_csv.py:
--------------------------------------------------------------------------------
 1 | """Tests of the CSV parsing
 2 | """
 3 | 
 4 | from fediblockhole.blocklists import BlocklistParserCSV
 5 | from fediblockhole.const import SeverityLevel
 6 | 
 7 | 
 8 | def test_single_line():
 9 |     csvdata = "example.org"
10 |     origin = "csvfile"
11 | 
12 |     parser = BlocklistParserCSV()
13 |     bl = parser.parse_blocklist(csvdata, origin)
14 |     assert len(bl) == 0
15 | 
16 | 
17 | def test_header_only():
18 |     csvdata = "domain,severity,public_comment"
19 |     origin = "csvfile"
20 | 
21 |     parser = BlocklistParserCSV()
22 |     bl = parser.parse_blocklist(csvdata, origin)
23 |     assert len(bl) == 0
24 | 
25 | 
26 | def test_2_blocks():
27 |     csvdata = """domain,severity
28 | example.org,silence
29 | example2.org,suspend
30 | """
31 |     origin = "csvfile"
32 | 
33 |     parser = BlocklistParserCSV()
34 |     bl = parser.parse_blocklist(csvdata, origin)
35 | 
36 |     assert len(bl) == 2
37 |     assert "example.org" in bl
38 | 
39 | 
40 | def test_4_blocks():
41 |     csvdata = """domain,severity,public_comment
42 | example.org,silence,"test 1"
43 | example2.org,suspend,"test 2"
44 | example3.org,noop,"test 3"
45 | example4.org,suspend,"test 4"
46 | """
47 |     origin = "csvfile"
48 | 
49 |     parser = BlocklistParserCSV()
50 |     bl = parser.parse_blocklist(csvdata, origin)
51 | 
52 |     assert len(bl) == 4
53 |     assert "example.org" in bl
54 |     assert "example2.org" in bl
55 |     assert "example3.org" in bl
56 |     assert "example4.org" in bl
57 | 
58 |     assert bl["example.org"].severity.level == SeverityLevel.SILENCE
59 |     assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND
60 |     assert bl["example3.org"].severity.level == SeverityLevel.NONE
61 |     assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND
62 | 
63 | 
64 | def test_ignore_comments():
65 |     csvdata = """domain,severity,public_comment,private_comment
66 | example.org,silence,"test 1","ignore me"
67 | example2.org,suspend,"test 2","ignote me also"
68 | example3.org,noop,"test 3","and me"
69 | example4.org,suspend,"test 4","also me"
70 | """
71 |     origin = "csvfile"
72 | 
73 |     parser = BlocklistParserCSV()
74 |     bl = parser.parse_blocklist(csvdata, origin)
75 | 
76 |     assert len(bl) == 4
77 |     assert "example.org" in bl
78 |     assert "example2.org" in bl
79 |     assert "example3.org" in bl
80 |     assert "example4.org" in bl
81 | 
82 |     assert bl["example.org"].public_comment == ""
83 |     assert bl["example.org"].private_comment == ""
84 |     assert bl["example3.org"].public_comment == ""
85 |     assert bl["example4.org"].private_comment == ""
86 | 


--------------------------------------------------------------------------------
/tests/test_parser_csv_mastodon.py:
--------------------------------------------------------------------------------
 1 | """Tests of the CSV parsing
 2 | """
 3 | 
 4 | from fediblockhole.blocklists import BlocklistParserMastodonCSV
 5 | from fediblockhole.const import SeverityLevel
 6 | 
 7 | 
 8 | def test_single_line():
 9 |     csvdata = "example.org"
10 |     origin = "csvfile"
11 | 
12 |     parser = BlocklistParserMastodonCSV()
13 |     bl = parser.parse_blocklist(csvdata, origin)
14 |     assert len(bl) == 0
15 | 
16 | 
17 | def test_header_only():
18 |     csvdata = "#domain,#severity,#public_comment"
19 |     origin = "csvfile"
20 | 
21 |     parser = BlocklistParserMastodonCSV()
22 |     bl = parser.parse_blocklist(csvdata, origin)
23 |     assert len(bl) == 0
24 | 
25 | 
26 | def test_2_blocks():
27 |     csvdata = """domain,severity
28 | example.org,silence
29 | example2.org,suspend
30 | """
31 |     origin = "csvfile"
32 | 
33 |     parser = BlocklistParserMastodonCSV()
34 |     bl = parser.parse_blocklist(csvdata, origin)
35 | 
36 |     assert len(bl) == 2
37 |     assert "example.org" in bl
38 | 
39 | 
40 | def test_4_blocks():
41 |     csvdata = """domain,severity,public_comment
42 | example.org,silence,"test 1"
43 | example2.org,suspend,"test 2"
44 | example3.org,noop,"test 3"
45 | example4.org,suspend,"test 4"
46 | """
47 |     origin = "csvfile"
48 | 
49 |     parser = BlocklistParserMastodonCSV()
50 |     bl = parser.parse_blocklist(csvdata, origin)
51 | 
52 |     assert len(bl) == 4
53 |     assert "example.org" in bl
54 |     assert "example2.org" in bl
55 |     assert "example3.org" in bl
56 |     assert "example4.org" in bl
57 | 
58 |     assert bl["example.org"].severity.level == SeverityLevel.SILENCE
59 |     assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND
60 |     assert bl["example3.org"].severity.level == SeverityLevel.NONE
61 |     assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND
62 | 
63 | 
64 | def test_ignore_comments():
65 |     csvdata = """domain,severity,public_comment,private_comment
66 | example.org,silence,"test 1","ignore me"
67 | example2.org,suspend,"test 2","ignote me also"
68 | example3.org,noop,"test 3","and me"
69 | example4.org,suspend,"test 4","also me"
70 | """
71 |     origin = "csvfile"
72 | 
73 |     parser = BlocklistParserMastodonCSV()
74 |     bl = parser.parse_blocklist(csvdata, origin)
75 | 
76 |     assert len(bl) == 4
77 |     assert "example.org" in bl
78 |     assert "example2.org" in bl
79 |     assert "example3.org" in bl
80 |     assert "example4.org" in bl
81 | 
82 |     assert bl["example.org"].public_comment == ""
83 |     assert bl["example.org"].private_comment == ""
84 |     assert bl["example3.org"].public_comment == ""
85 |     assert bl["example4.org"].private_comment == ""
86 | 


--------------------------------------------------------------------------------
/tests/test_parser_json.py:
--------------------------------------------------------------------------------
 1 | """Tests of the CSV parsing
 2 | """
 3 | 
 4 | from fediblockhole.blocklists import BlocklistParserJSON
 5 | from fediblockhole.const import SeverityLevel
 6 | 
 7 | 
 8 | def test_json_parser(data_mastodon_json):
 9 | 
10 |     parser = BlocklistParserJSON()
11 |     bl = parser.parse_blocklist(data_mastodon_json, "test_json")
12 | 
13 |     assert len(bl) == 10
14 |     assert "example.org" in bl
15 |     assert "example2.org" in bl
16 |     assert "example3.org" in bl
17 |     assert "example4.org" in bl
18 | 
19 |     assert bl["example.org"].severity.level == SeverityLevel.SUSPEND
20 |     assert bl["example2.org"].severity.level == SeverityLevel.SILENCE
21 |     assert bl["example3.org"].severity.level == SeverityLevel.SUSPEND
22 |     assert bl["example4.org"].severity.level == SeverityLevel.NONE
23 | 
24 | 
25 | def test_ignore_comments(data_mastodon_json):
26 | 
27 |     parser = BlocklistParserJSON()
28 |     bl = parser.parse_blocklist(data_mastodon_json, "test_json")
29 | 
30 |     assert len(bl) == 10
31 |     assert "example.org" in bl
32 |     assert "example2.org" in bl
33 |     assert "example3.org" in bl
34 |     assert "example4.org" in bl
35 | 
36 |     assert bl["example.org"].public_comment == ""
37 |     assert bl["example.org"].private_comment == ""
38 |     assert bl["example3.org"].public_comment == ""
39 |     assert bl["example4.org"].private_comment == ""
40 | 


--------------------------------------------------------------------------------
/tests/test_parser_rapidblockcsv.py:
--------------------------------------------------------------------------------
 1 | """Tests of the Rapidblock CSV parsing
 2 | """
 3 | 
 4 | from fediblockhole.blocklists import RapidBlockParserCSV
 5 | from fediblockhole.const import SeverityLevel
 6 | 
 7 | csvdata = (
 8 |     """example.org\r\nsubdomain.example.org\r\nanotherdomain.org\r\ndomain4.org\r\n"""
 9 | )
10 | parser = RapidBlockParserCSV()
11 | 
12 | 
13 | def test_basic_rapidblock():
14 | 
15 |     bl = parser.parse_blocklist(csvdata)
16 |     assert len(bl) == 4
17 |     assert "example.org" in bl
18 |     assert "subdomain.example.org" in bl
19 |     assert "anotherdomain.org" in bl
20 |     assert "domain4.org" in bl
21 | 
22 | 
23 | def test_severity_is_suspend():
24 |     bl = parser.parse_blocklist(csvdata)
25 | 
26 |     for block in bl.values():
27 |         assert block.severity.level == SeverityLevel.SUSPEND
28 | 


--------------------------------------------------------------------------------
/tests/test_parser_rapidblockjson.py:
--------------------------------------------------------------------------------
 1 | """Test parsing the RapidBlock JSON format
 2 | """
 3 | 
 4 | from fediblockhole.blocklists import parse_blocklist
 5 | from fediblockhole.const import SeverityLevel
 6 | 
 7 | 
 8 | def test_parse_rapidblock_json(data_rapidblock_json):
 9 | 
10 |     bl = parse_blocklist(data_rapidblock_json, "pytest", "rapidblock.json")
11 | 
12 |     assert "101010.pl" in bl
13 |     assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND
14 |     assert bl["101010.pl"].public_comment == ""
15 | 
16 |     assert "berserker.town" in bl
17 |     assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND
18 |     assert bl["berserker.town"].public_comment == ""
19 |     assert bl["berserker.town"].private_comment == ""
20 | 
21 | 
22 | def test_parse_with_comments(data_rapidblock_json):
23 | 
24 |     bl = parse_blocklist(
25 |         data_rapidblock_json,
26 |         "pytest",
27 |         "rapidblock.json",
28 |         ["domain", "severity", "public_comment", "private_comment"],
29 |     )
30 | 
31 |     assert "101010.pl" in bl
32 |     assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND
33 |     assert bl["101010.pl"].public_comment == "cryptomining javascript, white supremacy"
34 | 
35 |     assert "berserker.town" in bl
36 |     assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND
37 |     assert bl["berserker.town"].public_comment == "freeze peach"
38 | 


--------------------------------------------------------------------------------