├── .flake8
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ └── ci.yml
├── .python-version-default
├── CHANGELOG.md
├── LICENSE
├── README.md
├── chart
├── .helmignore
├── Chart.yaml
├── fediblockhole.conf.toml
├── templates
│ ├── _helpers.tpl
│ ├── configmap-conf-toml.yaml
│ └── cronjob-fediblock-sync.yaml
└── values.yaml
├── container
├── .dockerignore
└── Dockerfile
├── etc
└── sample.fediblockhole.conf.toml
├── pyproject.toml
├── requirements.txt
├── samples
├── demo-allowlist-01.csv
├── demo-allowlist-02.csv
└── demo-blocklist-01.csv
├── src
└── fediblockhole
│ ├── __init__.py
│ ├── blocklists.py
│ └── const.py
├── tests
├── conftest.py
├── fixtures
│ ├── __init__.py
│ ├── data-mastodon.json
│ ├── data-noop-01.csv
│ ├── data-rapidblock.json
│ ├── data-silences-01.csv
│ └── data-suspends-01.csv
├── helpers
│ ├── __init__.py
│ └── util.py
├── test_allowlist.py
├── test_blockseverity.py
├── test_cmdline.py
├── test_configfile.py
├── test_domainblock.py
├── test_merge_comments.py
├── test_merge_thresholds.py
├── test_mergeplan.py
├── test_parser_csv.py
├── test_parser_csv_mastodon.py
├── test_parser_json.py
├── test_parser_rapidblockcsv.py
└── test_parser_rapidblockjson.py
└── uv.lock
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length: 88
3 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve.
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behaviour:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behaviour**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Environment**
24 | Help us understand where the code is running.
25 | - OS/Distribution: [e.g. Linux/Ubuntu/Debian]
26 | - Python version [e.g. Python 3.10]
27 | - Mastodon version [e.g. Mastodon 4.10]
28 |
29 | **Additional context**
30 | Add any other context about the problem here that could help us find and fix the bug.
31 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project.
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. E.g. "I am frustrated when [...]"
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: CI
3 |
4 | on:
5 | merge_group:
6 | push:
7 | branches: [main, "ci-*"]
8 | tags: ["*"]
9 | pull_request:
10 | branches: [main, "ci-*"]
11 | workflow_dispatch:
12 |
13 | env:
14 | FORCE_COLOR: "1"
15 | PIP_DISABLE_PIP_VERSION_CHECK: "1"
16 | PIP_NO_PYTHON_VERSION_WARNING: "1"
17 |
18 | permissions: {}
19 |
20 | jobs:
21 | build-package:
22 | name: Build & verify package
23 | runs-on: ubuntu-latest
24 |
25 | steps:
26 | - uses: actions/checkout@v4
27 | with:
28 | fetch-depth: 0
29 |
30 | - uses: hynek/build-and-inspect-python-package@v2
31 | id: baipp
32 | with:
33 | skip-wheel: true
34 |
35 | outputs:
36 | # Used to define the matrix for tests below. The value is based on
37 | # packaging metadata (trove classifiers).
38 | supported-python-versions: ${{ steps.baipp.outputs.supported_python_classifiers_json_array }}
39 |
40 | tests:
41 | name: Tests on ${{ matrix.python-version }}
42 | runs-on: ubuntu-latest
43 | needs: build-package
44 |
45 | strategy:
46 | fail-fast: false
47 | matrix:
48 | # Created by the build-and-inspect-python-package action above.
49 | python-version: ${{ fromJson(needs.build-package.outputs.supported-python-versions) }}
50 |
51 | steps:
52 | - name: Download pre-built packages
53 | uses: actions/download-artifact@v4
54 | with:
55 | name: Packages
56 | path: dist
57 | - run: tar xf dist/*.tar.gz --strip-components=1
58 | - uses: actions/setup-python@v5
59 | with:
60 | python-version: ${{ matrix.python-version }}
61 | allow-prereleases: true
62 | - uses: hynek/setup-cached-uv@v2
63 | - run: uv venv
64 | - run: uv pip install .[dev]
65 | - run: |
66 | cd tests
67 | uv run coverage run -p -m pytest
68 |
69 | - name: Upload coverage data
70 | uses: actions/upload-artifact@v4
71 | with:
72 | name: coverage-data-${{ matrix.python-version }}
73 | path: tests/.coverage.*
74 | include-hidden-files: true
75 |
76 | coverage:
77 | name: Combine & check coverage
78 | runs-on: ubuntu-latest
79 | needs: tests
80 |
81 | steps:
82 | - name: Download pre-built packages
83 | uses: actions/download-artifact@v4
84 | with:
85 | name: Packages
86 | path: dist
87 | - run: tar xf dist/*.tar.gz --strip-components=1
88 | - uses: actions/setup-python@v5
89 | with:
90 | python-version-file: .python-version-default
91 | - uses: hynek/setup-cached-uv@v2
92 |
93 | - name: Download coverage data
94 | uses: actions/download-artifact@v4
95 | with:
96 | pattern: coverage-data-*
97 | merge-multiple: true
98 |
99 | - run: uv venv
100 | - name: Combine coverage & fail if it's <70%.
101 | run: |
102 | uv pip install --system coverage[toml]
103 |
104 | uv run coverage combine
105 | uv run coverage html --skip-covered --skip-empty
106 |
107 | # Report and write to summary.
108 | uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
109 |
110 | # Report again and fail if under 70%.
111 | uv run coverage report --fail-under=70
112 |
113 | - name: Upload HTML report if check failed.
114 | uses: actions/upload-artifact@v4
115 | with:
116 | name: html-report
117 | path: htmlcov
118 | if: ${{ failure() }}
119 |
120 | docs:
121 | name: Build docs & run doctests
122 | runs-on: ubuntu-latest
123 | needs: build-package
124 | steps:
125 | - name: Download pre-built packages
126 | uses: actions/download-artifact@v4
127 | with:
128 | name: Packages
129 | path: dist
130 | - run: tar xf dist/*.tar.gz --strip-components=1
131 | - uses: actions/setup-python@v5
132 | with:
133 | # Keep in sync with tox/docs and .readthedocs.yaml.
134 | python-version: "3.12"
135 | - uses: hynek/setup-cached-uv@v2
136 | - run: uv venv
137 |
138 | # pyright:
139 | # name: Check types using pyright
140 | # runs-on: ubuntu-latest
141 | # steps:
142 | # - uses: actions/checkout@v4
143 | # - uses: actions/setup-python@v5
144 | # with:
145 | # python-version-file: .python-version-default
146 | # - uses: hynek/setup-cached-uv@v2
147 |
148 | # - run: uv venv
149 | # - run: uv pip install .[dev] pyright typing
150 | # - run: uv run pyright src
151 |
152 | install-dev:
153 | name: Verify dev env
154 | runs-on: ubuntu-latest
155 |
156 | steps:
157 | - uses: actions/checkout@v4
158 | - uses: actions/setup-python@v5
159 | with:
160 | python-version-file: .python-version-default
161 | - uses: hynek/setup-cached-uv@v2
162 |
163 | - run: uv venv --python $(cat .python-version-default)
164 | - run: uv pip install -e .[dev]
165 |
166 | - name: Ensure we can import the fediblockhole package
167 | run: |
168 | source .venv/bin/activate
169 |
170 | python -Ic 'import fediblockhole; print(fediblockhole.__version__)'
171 |
172 | # Ensure everything required is passing for branch protection.
173 | required-checks-pass:
174 | if: always()
175 |
176 | needs:
177 | - coverage
178 | - docs
179 | - install-dev
180 |
181 | runs-on: ubuntu-latest
182 |
183 | steps:
184 | - name: Decide whether the needed jobs succeeded or failed
185 | uses: re-actors/alls-green@release/v1
186 | with:
187 | jobs: ${{ toJSON(needs) }}
188 |
--------------------------------------------------------------------------------
/.python-version-default:
--------------------------------------------------------------------------------
1 | 3.12
2 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | Notable changes to the project will be documented in this changelog.
4 |
5 | This project uses [Semantic Versioning] and generally follows the conventions of [Keep A Changelog].
6 |
7 | ## [Unreleased]
8 |
9 | ## [v0.4.6] - 2024-11-01
10 |
11 | ### Added
12 |
13 | - Added use of Flake8 as linter to improve code consistency (59d306a)
14 | - Added use of the Black code formatting tool to improve code consistency (59d306a)
15 | - Cleaned up code lint and formatting (ffb8219)
16 | - Added issue templates to help new contributors get their issues addressed quicker (3b655d6)
17 | - Added ability to use environment variables to provide BearerTokens. Thanks to @offbyone (8c5761e)
18 |
19 | ## [v0.4.5] - 2023-12-30
20 |
21 | ### Added
22 |
23 | - Added `override_private_comment` option for annotation of automated blocks (4d12bac)
24 | - Added blocklist audit file option to debug and track blocklist operations (9200fc3)
25 |
26 | ## [v0.4.4] - 2023-07-09
27 |
28 | ### Added
29 |
30 | - Added citation for creators of #Fediblock (a64875b)
31 | - Added parser for Mastodon 4.1 blocklist CSV format (9f95f14)
32 | - Added container support (76d5b61)
33 |
34 | ### Fixed
35 |
36 | - Use __future__.annotations so type hints work with Python < 2.9 (8265639)
37 | - test util no longer tries to load default config file if conf tomldata is empty. (2da57b2)
38 |
39 | ## [v0.4.3] - 2023-02-13
40 |
41 | ### Added
42 |
43 | - Added Mastodon public API parser type because #33 (9fe9342)
44 | - Added ability to set scheme when talking to instances (9fe9342)
45 | - Added tests of comment merging. (fb3a7ec)
46 | - Added blocklist thresholds. (bb1d89e)
47 | - Added logging to help debug threshold-based merging. (b67ff0c)
48 | - Added extra documentation on configuring thresholds. (6c72af8)
49 | - Updated documentation to reflect Mastodon v4.1.0 changes to the application scopes screen. (b92dd21)
50 |
51 | ### Changed
52 |
53 | - Dropped minimum Python version to 3.6 (df3c16f)
54 | - Don't merge comments if new comment is empty. (b8aa11e)
55 | - Tweaked comment merging to pass tests. (fb3a7ec)
56 |
57 | ## [v0.4.2] - 2023-01-19
58 |
59 | ### Fixed
60 |
61 | - Blockdata var already converted to _asdict() (8d3b9da)
62 |
63 | ## [v0.4.1] - 2023-01-15
64 |
65 | Allowlist support.
66 |
67 | ### Added
68 |
69 | - Allowlists just remove blocks from merged list before push. (a25773f)
70 | - Added helper submodule for testing utils (bf48a96)
71 | - Added basic tests of allowlist config args. (a3d3571)
72 | - Added test cases for cmdline parsing. (11accf3)
73 | - Added test cases for configfile parsing. (11accf3)
74 | - Added documentation on allowlists. (26f5464)
75 | - Fixed bug in how DomainBlock defaults handle reject_media, reject_reports. (6d4e18b)
76 | - Added support for allowlists. Updated docstring for merge_blocklists() (7a31c33)
77 | - Added DomainBlock type hint to update_known_block(). (69c28f1)
78 | - Use ._asdict() to get info to pass to add block API call. (69c28f1)
79 |
80 | ### Changed
81 |
82 | - Updated README to explain allowlist mechanism. (dc4bbd7)
83 | - Edited sample config to better explain URL source (9bd7914)
84 | - Restructured argparsing for easier testing. (11accf3)
85 | - str2bool() now converts '' to False. Added some extra debug logging of blocklist parsing. (894b133)
86 | - Updated documentation to explain need for `admin:read` access to fetch followers stats. (2cec9e1)
87 | - Aligned API call rate limit with server default. (55dad3f)
88 |
89 | ### Removed
90 |
91 | - Remove implied setting of reject_media/reports if severity is set to 'suspend'. (3aa2e37)
92 |
93 | ### Fixed
94 |
95 | - Fixed bug: mergeplan in config file was ignored. Reported in #22 (11accf3)
96 | - Fixed bug in _asdict() of severity level. (9817c99)
97 | - Fix DomainBlock.id usage during __iter__() (a718af5)
98 |
99 | ## [v0.4.0] - 2023-01-13
100 |
101 | Substantial changes to better support multiple blocklist formats
102 |
103 | ### Added
104 |
105 | - Added support for RapidBlock blocklists, both CSV and JSON formats. (327a44d)
106 | - Added support for per-instance-source import_fields. (327a44d)
107 | - Updated sample config to include new formats. (327a44d)
108 | - A BlockSeverity of 'suspend' implies reject_media and reject_reports. (327a44d)
109 | - Added ability to limit max severity per-URL source. (10011a5)
110 | - Added boolean fields like 'reject_reports' to mergeplan handling. (66f0373)
111 | - Added tests for boolean merge situations. (66f0373)
112 | - Various other test cases added.
113 |
114 | ### Changed
115 |
116 | - Refactored to add a DomainBlock object. (10011a5)
117 | - Refactored to use a BlockParser structure. (10011a5)
118 | - Improved method for checking if changes are needed. (10011a5)
119 | - Refactored fetch from URLs and instances. (327a44d)
120 | - Improved check_followed_severity() behaviour. (327a44d)
121 | - Changed API delay to be in calls per hour. (327a44d)
122 | - Improved comment merging. (0a6eec4)
123 | - Clarified logic in apply_mergeplan() for boolean fields. (66f0373)
124 | - Updated README documentation. (ee9625d)
125 | - Aligned API call rate limit with server default. (55dad3f)
126 |
127 | ### Removed
128 |
129 | - Removed redundant global vars. (327a44d)
130 |
131 | ### Fixed
132 |
133 | - Fixed bug in severity change detection. (e0d40b5)
134 | - Fix DomainBlock.id usage during __iter__() (a718af5)
135 |
136 | ## [v0.3.0] - 2023-01-11
137 |
138 | ### Added
139 |
140 | - Added args to show version information. (1d0649a)
141 | - Added timeout to requests calls. (23b8833)
142 | - Added CHANGELOG.md (ca9d958)
143 |
144 | ### Changed
145 |
146 | - Changed min Python version to v3.10. (f37ab70)
147 |
148 | ## [v0.2.1] - 2023-01-10
149 |
150 | ### Added
151 |
152 | - User-Agent is set to FediBlockHole to identify ourselves to remote servers. (04d9eea)
153 | - Adding packaging to prepare for submission to PyPI. (4ab369f)
154 | - Added ability to set max severity level if an instance has followers of accounts on a to-be-blocked domain. (5518421)
155 | - Added ability to read domain_blocks from instances that make the list public. (4ef84b5)
156 | - Skip obfuscated domains when building the merged blocklist. (4ef84b5)
157 |
158 | ### Changed
159 |
160 | - Updated documentation in README and the sample config. (68a2c93)
161 |
162 | ### Fixed
163 |
164 | - Fixed a bug in config enablement of intermediate blocklists saving. (5518421)
165 |
166 | ## Before 2023-01-10
167 |
168 | - Initial rough versions that were not packaged.
169 |
170 |
171 | [keep a changelog]: https://keepachangelog.com/en/1.0.0/
172 | [semantic versioning]: https://semver.org/spec/v2.0.0.html
173 |
174 |
175 | [unreleased]: https://github.com/eigenmagic/fediblockhole/compare/v0.4.2...HEAD
176 | [v0.4.2]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.2
177 | [v0.4.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.1
178 | [v0.4.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.0
179 | [v0.3.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.3.0
180 | [v0.2.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.2.1
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published
637 | by the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FediBlockHole
2 |
3 | A tool for keeping a Mastodon instance blocklist synchronised with remote lists.
4 |
5 | The broad design goal for FediBlockHole is to support pulling in a list of
6 | blocklists from a set of trusted sources, merge them into a combined blocklist,
7 | and then push that merged list to a set of managed instances.
8 |
9 | Mastodon admins can choose who they think maintain quality lists and subscribe
10 | to them, helping to distribute the load for maintaining blocklists among a
11 | community of people. Control ultimately rests with the admins themselves so they
12 | can outsource as much, or as little, of the effort to others as they deem
13 | appropriate.
14 |
15 | Inspired by the way PiHole works for maintaining a set of blocklists of adtech
16 | domains. Builds on the work of
17 | [@CaribenxMarciaX@scholar.social](https://scholar.social/@CaribenxMarciaX) and
18 | [@gingerrroot@kitty.town](https://kitty.town/@gingerrroot) who started the
19 | #Fediblock hashtag and did a lot of advocacy around it, often at great personal
20 | cost.
21 |
22 | ## Features
23 |
24 | ### Blocklist Sources
25 |
26 | - Read domain block lists from other instances via the Mastodon API.
27 | - Supports both public lists (no auth required) and 'admin' lists requiring
28 | authentication to an instance.
29 | - Read domain block lists from arbitrary URLs, including local files.
30 | - Supports CSV and JSON format blocklists
31 | - Supports RapidBlock CSV and JSON format blocklists
32 |
33 | ### Blocklist Export/Push
34 |
35 | - Push a merged blocklist to a set of Mastodon instances.
36 | - Export per-source, unmerged block lists to local files, in CSV format.
37 | - Export merged blocklists to local files, in CSV format.
38 | - Read block lists from multiple remote instances
39 | - Read block lists from multiple URLs, including local files
40 | - Write a unified block list to a local CSV file
41 | - Push unified blocklist updates to multiple remote instances
42 | - Control import and export fields
43 |
44 | ### Flexible Configuration
45 |
46 | - Provides (hopefully) sensible defaults to minimise first-time setup.
47 | - Global and fine-grained configuration options available for those complex situations that crop up sometimes.
48 | - Allowlists to override blocks in blocklists to ensure you never block instances you want to keep.
49 | - Blocklist thresholds if you want to only block when an instance shows up in multiple blocklists.
50 |
51 | ## Installing
52 |
53 | Installable using `pip`.
54 |
55 | ```
56 | python3 -m pip install fediblockhole
57 | ```
58 |
59 | Install from source by cloning the repo, `cd fediblockhole` and run:
60 |
61 | ```
62 | python3 -m pip install .
63 | ```
64 |
65 | Installation adds a commandline tool: `fediblock-sync`
66 |
67 | Instance admins who want to use this tool for their instance will need to add an
68 | Application at `https:///settings/applications/` so they can
69 | authorize the tool to create and update domain blocks with an OAuth token.
70 |
71 | More on authorization by token below.
72 |
73 | ### Reading remote instance blocklists
74 |
75 | If a remote instance makes its domain blocks public, you don't need
76 | a token to read them.
77 |
78 | If a remote instance only shows its domain blocks to local accounts
79 | you'll need to have a token with `read:blocks` authorization set up.
80 | If you have an account on that instance, you can get a token by setting up a new
81 | Application at `https:///settings/applications/`.
82 |
83 | To read admin blocks from a remote instance, you'll need to ask the instance
84 | admin to add a new Application at
85 | `https:///settings/applications/` and then tell you the access
86 | token.
87 |
88 | The application needs the `admin:read:domain_blocks` OAuth scope. You can allow
89 | full `admin:read` access, but be aware that this authorizes someone to read all
90 | the data in the instance. That's asking a lot of a remote instance admin who
91 | just wants to share domain_blocks with you.
92 |
93 | The `admin:read:domain_blocks` scope is available as of Mastodon v4.1.0, but for
94 | earlier versions admins will need to use the manual method described below.
95 |
96 | You can update the scope for your application in the database directly like
97 | this:
98 |
99 | ```
100 | UPDATE oauth_applications as app
101 | SET scopes = 'admin:read:domain_blocks'
102 | FROM oauth_access_tokens as tok
103 | WHERE app.id = tok.application_id
104 | AND app.name = ''
105 | ;
106 | ```
107 |
108 | When that's done, regenerate the token (so it has the new scopes) in the
109 | application screen in the instance GUI. FediBlockHole should then able to use
110 | the app token to read domain blocks via the API, but nothing else.
111 |
112 | Alternately, you could ask the remote instance admin to set up FediBlockHole and
113 | use it to dump out a CSV blocklist from their instance and then put it somewhere
114 | trusted parties can read it. Then you can define the blocklist as a URL source,
115 | as explained below.
116 |
117 | ### Writing instance blocklists
118 |
119 | To write domain blocks into an instance requires both the `admin:read` and
120 | `admin:write:domain_blocks` OAuth scopes.
121 |
122 | The tool needs `admin:read:domain_blocks` scope to read the current list of
123 | domain blocks so we update ones that already exist, rather than trying to add
124 | all new ones and clutter up the instance.
125 |
126 | `admin:read` access is needed to check if the instance has any accounts that
127 | follow accounts on a domain that is about to get `suspend`ed and automatically
128 | drop the block severity to `silence` level so people have time to migrate
129 | accounts before a full defederation takes effect. Unfortunately, the statistics
130 | measure used to learn this information requires `admin:read` scope.
131 |
132 | You can add `admin:read` scope in the application admin screen. Please be aware
133 | that this grants full read access to all information in the instance to the
134 | application token, so make sure you keep it a secret. At least remove
135 | world-readable permission to any config file you put it in, e.g.:
136 |
137 | ```
138 | chmod o-r
139 | ```
140 |
141 | You can also grant full `admin:write` scope to the application, but if you'd
142 | prefer to keep things more tightly secured, limit the scope to
143 | `admin:read:domain_blocks`.
144 |
145 | Again, this scope is only available in the application config screen as of
146 | Mastodon v4.1.0. If your instance is on an earlier version, you'll need to use
147 | SQL to set the scopes in the database and then regenerate the token:
148 |
149 | ```
150 | UPDATE oauth_applications as app
151 | SET scopes = 'admin:read admin:write:domain_blocks'
152 | FROM oauth_access_tokens as tok
153 | WHERE app.id = tok.application_id
154 | AND app.name = ''
155 | ;
156 | ```
157 |
158 | When that's done, FediBlockHole should be able to use its token to authorise
159 | adding or updating domain blocks via the API.
160 |
161 | ## Using the tool
162 |
163 | Run the tool like this:
164 |
165 | ```
166 | fediblock-sync -c
167 | ```
168 |
169 | If you put the config file in `/etc/default/fediblockhole.conf.toml` you don't
170 | need to pass in the config file path.
171 |
172 | For a list of possible configuration options, check the `--help`.
173 |
174 | You can also read the heavily commented sample configuration file in the repo at
175 | [etc/sample.fediblockhole.conf.toml](https://github.com/eigenmagic/fediblockhole/blob/main/etc/sample.fediblockhole.conf.toml).
176 |
177 | ## Configuring
178 |
179 | Once you have your applications and tokens and scopes set up, create a
180 | configuration file for FediBlockHole to use. You can put it anywhere and use the
181 | `-c ` commandline parameter to tell FediBlockHole where it is.
182 |
183 | Or you can use the default location of `/etc/default/fediblockhole.conf.toml`.
184 |
185 | As the filename suggests, FediBlockHole uses TOML syntax.
186 |
187 | There are 4 key sections:
188 |
189 | 1. `blocklist_urls_sources`: A list of URLs to read blocklists from
190 | 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API
191 | 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API
192 | 1. `allowlist_url_sources`: A list of URLs to read allowlists from
193 |
194 | More detail on configuring the tool is provided below.
195 |
196 | ### URL sources
197 |
198 | The URL sources is a list of URLs to fetch blocklists from.
199 |
200 | Supported formats are currently:
201 |
202 | - Comma-Separated Values (CSV)
203 | - JSON
204 | - Mastodon v4.1 flavoured CSV
205 | - RapidBlock CSV
206 | - RapidBlock JSON
207 |
208 | Blocklists must provide a `domain` field, and should provide a `severity` field.
209 |
210 | `domain` is the domain name of the instance to be blocked/limited.
211 |
212 | `severity` is the severity level of the block/limit. Supported values are: `noop`, `silence`, and `suspend`.
213 |
214 | Optional fields that the tool understands are `public_comment`, `private_comment`, `reject_media`, `reject_reports`, and `obfuscate`.
215 |
216 | #### CSV format
217 |
218 | A CSV format blocklist must contain a header row with at least a `domain` and `severity` field.
219 |
220 | Optional fields, as listed about, may also be included.
221 |
222 | #### Mastodon v4.1 CSV format
223 |
224 | As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
225 | infinite wisdom, the Mastodon devs decided that field names should begin with a
226 | `#` character in the header, unlike the field names in the JSON output via the
227 | API… or in pretty much any other CSV file anywhere else.
228 |
229 | Setting the format to `mastodon_csv` will strip off the `#` character when
230 | parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
231 | other CSV formatted blocklist.
232 |
233 | #### JSON format
234 |
235 | JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.
236 |
237 | This is a list of dictionaries, with at minimum a `domain` field, and preferably
238 | a `severity` field. The other optional fields are, well, optional.
239 |
240 | #### RapidBlock CSV format
241 |
242 | The RapidBlock CSV format has no header and a single field, so it's not
243 | _strictly_ a CSV file as there are no commas separating values. It is basically
244 | just a list of domains to block, separated by '\r\n'.
245 |
246 | When using this format, the tool assumes the `severity` level is `suspend`.
247 |
248 | #### RapidBlock JSON format
249 |
250 | The RapidBlock JSON format provides more detailed information about domain
251 | blocks, but is still somewhat limited.
252 |
253 | It has a single `isBlocked` flag indicating if a domain should be blocked or
254 | not. There is no support for the 'silence' block level.
255 |
256 | There is no support for 'reject_media' or 'reject_reports' or 'obfuscate'.
257 |
258 | All comments are public, by virtue of the public nature of RapidBlock.
259 |
260 | ### Instance sources
261 |
262 | The tool can also read domain_blocks from instances directly.
263 |
264 | The configuration is a list of dictionaries of the form:
265 | ```
266 | { domain = '', token = '', admin = false }
267 | ```
268 |
269 | The `domain` is the fully-qualified domain name of the API host for an instance
270 | you want to read domain blocks from.
271 |
272 | The `token` is an optional OAuth token for the application that's configured in
273 | the instance to allow you to read domain blocks, as discussed above.
274 |
275 | The `token` can also be specified using environment variables. This provides
276 | improved security compared to storing the OAuth token in a configuration file,
277 | but it will require the environment variable to be set so that FediBlockHole can
278 | access it. See below in [Instance destinations](#instance-destinations) for more
279 | detail on how to use environment variables to provide authentication tokens.
280 |
281 | `admin` is an optional field that tells the tool to use the more detailed admin
282 | API endpoint for domain_blocks, rather than the more public API endpoint that
283 | doesn't provide as much detail. You will need a `token` that's been configured to
284 | permit access to the admin domain_blocks scope, as detailed above.
285 |
286 | ### Instance destinations
287 |
288 | The tool supports pushing a unified blocklist to multiple instances.
289 |
290 | Configure the list of instances you want to push your blocklist to in the
291 | `blocklist_instance_destinations` list. Each entry is of the form:
292 |
293 | ```
294 | { domain = '', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
295 | ```
296 |
297 | The field `domain` is required. It is the fully-qualified domain name of the
298 | instance you want to push to.
299 |
300 | A BearerToken is also required, for authenticating with the instance. It can be provided in two ways:
301 |
302 | 1. A token can be provided directly in the entry as a `token` field, like this:
303 | ```
304 | { domain = '', token = '', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
305 | ```
306 | This was the only mechanism available up to version 0.4.5 of Fediblockhole.
307 |
308 | 1. A token can be provided from the environment.
309 |
310 | If a token is not directly provided with the `token` field, Fediblockhole will
311 | look for an environment variable that contains the token.
312 |
313 | By default, the name of the environment variable will be the domain name
314 | converted to upper case and with dot/period characters converted to
315 | underscores, and the suffix `_TOKEN`. For example, the token variable for the
316 | domain `eigenmagic.net` would be `EIGENMAGIC_NET_TOKEN`.
317 |
318 | You can also specify the environment variable to look for, using the
319 | `token_env_var` field, like this:
320 | ```
321 | { domain = '', token_env_var = 'MY_CUSTOM_DOMAIN_TOKEN', import_fields = ['public_comment'], max_severity = 'suspend', max_followed_severity = 'suspend' }
322 | ```
323 |
324 | Fediblockhole will then look for a token in the `MY_CUSTOM_DOMAIN_TOKEN` environment variable.
325 |
326 | If a specific `token_env_var` is provided, the default variable name will
327 | not be used. If both the `token` and `token_env_var` fields are provided,
328 | the token provided in the `token` field will be used, and a warning will be
329 | issued to notify you that you might have misconfigured things.
330 |
331 |
332 | The BearerToken is
333 | an application token with both `admin:read:domain_blocks` and
334 | `admin:write:domain_blocks` authorization.
335 |
336 | The fields `max_followed_severity` and `import_fields` are optional.
337 |
338 | The optional `import_fields` setting allows you to restrict which fields are
339 | imported from each instance. If you want to import the `reject_reports` settings
340 | from one instance, but no others, you can use the `import_fields` setting to do
341 | it. **Note:** The `domain` and `severity` fields are always imported.
342 |
343 | The optional `max_severity` setting limits the maximum severity you will allow a
344 | remote blocklist to set. This helps you import a list from a remote instance but
345 | only at the `silence` level, even if that remote instance has a block at
346 | `suspend` level. If not set, defaults to `suspend`.
347 |
348 | The optional `max_followed_severity` setting sets a per-instance limit on the
349 | severity of a domain_block if there are accounts on the instance that follow
350 | accounts on the domain to be blocked. If `max_followed_severity` isn't set, it
351 | defaults to `silence`.
352 |
353 | This setting exists to give people time to move off an instance that is about to
354 | be defederated and bring their followers from your instance with them. Without
355 | it, if a new `suspend` block appears in any of the blocklists you subscribe to (or
356 | a block level increases from `silence` to `suspend`) and you're using the default
357 | `max` mergeplan, the tool would immediately suspend the instance, cutting
358 | everyone on the blocked instance off from their existing followers on your
359 | instance, even if they move to a new instance. If you actually want that
360 | outcome, you can set `max_followed_severity = 'suspend'` and use the `max`
361 | mergeplan.
362 |
363 | Once the follow count drops to 0 on your instance, the tool will automatically
364 | use the highest severity it finds again (if you're using the `max` mergeplan).
365 |
366 | ### Allowlists
367 |
368 | Sometimes you might want to completely ignore the blocklist definitions for
369 | certain domains. That's what allowlists are for.
370 |
371 | Allowlists remove any domain in the list from the merged list of blocks before
372 | the merged list is saved out to a file or pushed to any instance.
373 |
374 | Allowlists can be in any format supported by `blocklist_urls_sources` but ignore
375 | all fields that aren't `domain`.
376 |
377 | You can also allow domains on the commandline by using the `-A` or `--allow`
378 | flag and providing the domain name to allow. You can use the flag multiple
379 | times to allow multiple domains.
380 |
381 | It is probably wise to include your own instance domain in an allowlist so you
382 | don't accidentally defederate from yourself.
383 |
384 | ## More advanced configuration
385 |
386 | For a list of possible configuration options, check the `--help` and read the
387 | sample configuration file in `etc/sample.fediblockhole.conf.toml`.
388 |
389 | ### save_intermediate
390 |
391 | This option tells the tool to save the unmerged blocklists it fetches from
392 | remote instances and URLs into separate files. This is handy for debugging, or
393 | just to have a non-unified set of blocklist files.
394 |
395 | Works with the `savedir` setting to control where to save the files.
396 |
397 | These are parsed blocklists, not the raw data, and so will be affected by `import_fields`.
398 |
399 | The filename is based on the URL or domain used so you can tell where each list came from.
400 |
401 | ### savedir
402 |
403 | Sets where to save intermediate blocklist files. Defaults to `/tmp`.
404 |
405 | ### blocklist_auditfile
406 |
407 | If provided, will save an audit file of counts and percentages by domain. Useful for debugging
408 | thresholds. Defaults to None.
409 |
410 | ### no_push_instance
411 |
412 | Defaults to False.
413 |
414 | When set, the tool won't actually try to push the unified blocklist to any
415 | configured instances.
416 |
417 | If you want to see what the tool would try to do, but not actually apply any
418 | updates, use `--dryrun`.
419 |
420 | ### no_fetch_url
421 |
422 | Skip the fetching of blocklists from any URLs that are configured.
423 |
424 | ### no_fetch_instance
425 |
426 | Skip the fetching of blocklists from any remote instances that are configured.
427 |
428 | ### override_private_comment
429 |
430 | Defaults to None.
431 |
432 | Stamp all *new* blocks pushed to a remote server with this comment or code.
433 | Helps to identify blocks you've created on a server via Fediblockhole versus ones that
434 | already existed.
435 |
436 | ### mergeplan
437 |
438 | If two (or more) blocklists define blocks for the same domain, but they're
439 | different, `mergeplan` tells the tool how to resolve the conflict.
440 |
441 | `max` is the default. It uses the _highest_ severity block it finds as the one
442 | that should be used in the unified blocklist.
443 |
444 | `min` does the opposite. It uses the _lowest_ severity block it finds as the one
445 | to use in the unified blocklist.
446 |
447 | A full discussion of severities is beyond the scope of this README, but here is
448 | a quick overview of how it works for this tool.
449 |
450 | The severities are:
451 |
452 | - **noop**, level 0: This is essentially an 'unblock' but you can include a
453 | comment.
454 | - **silence**, level 1: A silence adds friction to federation with an instance.
455 | - **suspend**, level 2: A full defederation with the instance.
456 |
457 | With `mergeplan` set to `max`, _silence_ would take precedence over _noop_, and
458 | _suspend_ would take precedence over both.
459 |
460 | With `mergeplan` set to `min`, _silence_ would take precedence over _suspend_,
461 | and _noop_ would take precedence over both.
462 |
463 | You would want to use `max` to ensure that you always block with whichever your
464 | harshest fellow admin thinks should happen.
465 |
466 | You would want to use `min` to ensure that your blocks do what your most lenient
467 | fellow admin thinks should happen.
468 |
469 | ### import_fields
470 |
471 | `import_fields` controls which fields will be imported from remote
472 | instances and URL blocklists, and which fields are pushed to instances from the
473 | unified blocklist.
474 |
475 | The fields `domain` and `severity` are always included, so only define extra
476 | fields, if you want them.
477 |
478 | You can't export fields you haven't imported, so `export_fields` should be a
479 | subset of `import_fields`, but you can run the tool multiple times. You could,
480 | for example, include lots of fields for an initial import to build up a
481 | comprehensive list for export, combined with the `--no-push-instances` option so
482 | you don't actually apply the full list to anywhere.
483 |
484 | Then you could use a different set of options when importing so you have all the
485 | detail in a file, but only push `public_comment` to instances.
486 |
487 | ### export_fields
488 |
489 | `export_fields` controls which fields will get saved to the unified blocklist
490 | file, if you export one.
491 |
492 | The fields `domain` and `severity` are always included, so only define extra
493 | fields, if you want them.
--------------------------------------------------------------------------------
/chart/.helmignore:
--------------------------------------------------------------------------------
1 | # A helm chart's templates and default values can be packaged into a .tgz file.
2 | # When doing that, not everything should be bundled into the .tgz file. This
3 | # file describes what to not bundle.
4 | #
5 | # Manually added by us
6 | # --------------------
7 | #
8 |
9 | # Boilerplate .helmignore from `helm create mastodon`
10 | # ---------------------------------------------------
11 | #
12 | # Patterns to ignore when building packages.
13 | # This supports shell glob matching, relative path matching, and
14 | # negation (prefixed with !). Only one pattern per line.
15 | .DS_Store
16 | # Common VCS dirs
17 | .git/
18 | .gitignore
19 | .bzr/
20 | .bzrignore
21 | .hg/
22 | .hgignore
23 | .svn/
24 | # Common backup files
25 | *.swp
26 | *.bak
27 | *.tmp
28 | *.orig
29 | *~
30 | # Various IDEs
31 | .project
32 | .idea/
33 | *.tmproj
34 | .vscode/
35 |
--------------------------------------------------------------------------------
/chart/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: fediblockhole
3 | description: FediBlockHole is a tool for keeping a Mastodon instance blocklist synchronised with remote lists.
4 |
5 | # A chart can be either an 'application' or a 'library' chart.
6 | #
7 | # Application charts are a collection of templates that can be packaged into versioned archives
8 | # to be deployed.
9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 |
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 1.1.0
19 |
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | appVersion: 0.4.2
24 |
--------------------------------------------------------------------------------
/chart/fediblockhole.conf.toml:
--------------------------------------------------------------------------------
1 | # List of instances to read blocklists from.
2 | # If the instance makes its blocklist public, no authorization token is needed.
3 | # Otherwise, `token` is a Bearer token authorised to read domain_blocks.
4 | # If `admin` = True, use the more detailed admin API, which requires a token with a
5 | # higher level of authorization.
6 | # If `import_fields` are provided, only import these fields from the instance.
7 | # Overrides the global `import_fields` setting.
8 | blocklist_instance_sources = [
9 | # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
10 | # { domain = 'jorts.horse', token = '' }, # user accessible block list
11 | # { domain = 'eigenmagic.net', token = '', admin = true }, # admin access required
12 | ]
13 |
14 | # List of URLs to read csv blocklists from
15 | # Format tells the parser which format to use when parsing the blocklist
16 | # max_severity tells the parser to override any severities that are higher than this value
17 | # import_fields tells the parser to only import that set of fields from a specific source
18 | blocklist_url_sources = [
19 | # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
20 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
21 |
22 | ]
23 |
24 | ## These global allowlists override blocks from blocklists
25 | # These are the same format and structure as blocklists, but they take precedence
26 | allowlist_url_sources = [
27 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' },
28 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' },
29 | ]
30 |
31 | # List of instances to write blocklist to
32 | blocklist_instance_destinations = [
33 | # { domain = 'eigenmagic.net', token = '', max_followed_severity = 'silence'},
34 | ]
35 |
36 | ## Store a local copy of the remote blocklists after we fetch them
37 | #save_intermediate = true
38 |
39 | ## Directory to store the local blocklist copies
40 | # savedir = '/tmp'
41 |
42 | ## File to save the fully merged blocklist into
43 | # blocklist_savefile = '/tmp/merged_blocklist.csv'
44 |
45 | ## Don't push blocklist to instances, even if they're defined above
46 | # no_push_instance = false
47 |
48 | ## Don't fetch blocklists from URLs, even if they're defined above
49 | # no_fetch_url = false
50 |
51 | ## Don't fetch blocklists from instances, even if they're defined above
52 | # no_fetch_instance = false
53 |
54 | ## Set the mergeplan to use when dealing with overlaps between blocklists
55 | # The default 'max' mergeplan will use the harshest severity block found for a domain.
56 | # The 'min' mergeplan will use the lightest severity block found for a domain.
57 | # mergeplan = 'max'
58 |
59 | ## Set which fields we import
60 | ## 'domain' and 'severity' are always imported, these are additional
61 | ##
62 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate']
63 |
64 | ## Set which fields we export
65 | ## 'domain' and 'severity' are always exported, these are additional
66 | ##
67 | export_fields = ['public_comment']
68 |
--------------------------------------------------------------------------------
/chart/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/* vim: set filetype=mustache: */}}
2 | {{/*
3 | Expand the name of the chart.
4 | */}}
5 | {{- define "fediblockhole.name" -}}
6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
7 | {{- end }}
8 |
9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "fediblockhole.fullname" -}}
15 | {{- if .Values.fullnameOverride }}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
17 | {{- else }}
18 | {{- $name := default .Chart.Name .Values.nameOverride }}
19 | {{- if contains $name .Release.Name }}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
21 | {{- else }}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
23 | {{- end }}
24 | {{- end }}
25 | {{- end }}
26 |
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "fediblockhole.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
32 | {{- end }}
33 |
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "fediblockhole.labels" -}}
38 | helm.sh/chart: {{ include "fediblockhole.chart" . }}
39 | {{ include "fediblockhole.selectorLabels" . }}
40 | {{- if .Chart.AppVersion }}
41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
42 | {{- end }}
43 | app.kubernetes.io/managed-by: {{ .Release.Service }}
44 | {{- end }}
45 |
46 | {{/*
47 | Selector labels
48 | */}}
49 | {{- define "fediblockhole.selectorLabels" -}}
50 | app.kubernetes.io/name: {{ include "fediblockhole.name" . }}
51 | app.kubernetes.io/instance: {{ .Release.Name }}
52 | {{- end }}
53 |
54 | {{/*
55 | Rolling pod annotations
56 | */}}
57 | {{- define "fediblockhole.rollingPodAnnotations" -}}
58 | rollme: {{ .Release.Revision | quote }}
59 | checksum/config-configmap: {{ include ( print $.Template.BasePath "/configmap-conf-toml.yaml" ) . | sha256sum | quote }}
60 | {{- end }}
61 |
62 | {{/*
63 | Create the default conf file path and filename
64 | */}}
65 | {{- define "fediblockhole.conf_file_path" -}}
66 | {{- default "/etc/default/" .Values.fediblockhole.conf_file.path }}
67 | {{- end }}
68 | {{- define "fediblockhole.conf_file_filename" -}}
69 | {{- default "fediblockhole.conf.toml" .Values.fediblockhole.conf_file.filename }}
70 | {{- end }}
71 |
--------------------------------------------------------------------------------
/chart/templates/configmap-conf-toml.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: {{ include "fediblockhole.fullname" . }}-conf-toml
5 | labels:
6 | {{- include "fediblockhole.labels" . | nindent 4 }}
7 | data:
8 | {{ (.Files.Glob "fediblockhole.conf.toml").AsConfig | nindent 4 }}
9 |
--------------------------------------------------------------------------------
/chart/templates/cronjob-fediblock-sync.yaml:
--------------------------------------------------------------------------------
1 | {{ if .Values.fediblockhole.cron.sync.enabled -}}
2 | apiVersion: batch/v1
3 | kind: CronJob
4 | metadata:
5 | name: {{ include "fediblockhole.fullname" . }}-sync
6 | labels:
7 | {{- include "fediblockhole.labels" . | nindent 4 }}
8 | spec:
9 | schedule: {{ .Values.fediblockhole.cron.sync.schedule }}
10 | failedJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.failedJobsHistoryLimit }}
11 | successfulJobsHistoryLimit: {{ .Values.fediblockhole.cron.sync.successfulJobsHistoryLimit }}
12 | jobTemplate:
13 | spec:
14 | template:
15 | metadata:
16 | name: {{ include "fediblockhole.fullname" . }}-sync
17 | {{- with .Values.jobAnnotations }}
18 | annotations:
19 | {{- toYaml . | nindent 12 }}
20 | {{- end }}
21 | spec:
22 | restartPolicy: OnFailure
23 | containers:
24 | - name: {{ include "fediblockhole.fullname" . }}-sync
25 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
26 | imagePullPolicy: {{ .Values.image.pullPolicy }}
27 | command:
28 | - fediblock-sync
29 | - -c
30 | - "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}"
31 | volumeMounts:
32 | - name: config
33 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- include "fediblockhole.conf_file_filename" . -}}"
34 | subPath: "{{- include "fediblockhole.conf_file_filename" . -}}"
35 | {{ if .Values.fediblockhole.allow_file.filename }}
36 | - name: allowfile
37 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.allow_file.filename -}}"
38 | subPath: "{{- .Values.fediblockhole.allow_file.filename -}}"
39 | {{ end }}
40 | {{ if .Values.fediblockhole.block_file.filename }}
41 | - name: blockfile
42 | mountPath: "{{- include "fediblockhole.conf_file_path" . -}}{{- .Values.fediblockhole.block_file.filename -}}"
43 | subPath: "{{- .Values.fediblockhole.block_file.filename -}}"
44 | {{ end }}
45 | volumes:
46 | - name: config
47 | configMap:
48 | name: {{ include "fediblockhole.fullname" . }}-conf-toml
49 | items:
50 | - key: {{ include "fediblockhole.conf_file_filename" . | quote }}
51 | path: {{ include "fediblockhole.conf_file_filename" . | quote }}
52 | {{ if .Values.fediblockhole.allow_file.filename }}
53 | - name: allowfile
54 | configMap:
55 | name: {{ include "fediblockhole.fullname" . }}-allow-csv
56 | items:
57 | - key: {{ .Values.fediblockhole.allow_file.filename | quote }}
58 | path: {{ .Values.fediblockhole.allow_file.filename | quote }}
59 | {{ end }}
60 | {{ if .Values.fediblockhole.block_file.filename }}
61 | - name: blockfile
62 | configMap:
63 | name: {{ include "fediblockhole.fullname" . }}-block-csv
64 | items:
65 | - key: {{ .Values.fediblockhole.block_file.filename | quote }}
66 | path: {{ .Values.fediblockhole.block_file.filename | quote }}
67 | {{ end }}
68 | {{- end }}
69 |
--------------------------------------------------------------------------------
/chart/values.yaml:
--------------------------------------------------------------------------------
1 | image:
2 | repository: ghcr.io/cunningpike/fediblockhole
3 | # https://github.com/cunningpike/fediblockhole/pkgs/container/fediblockhole/versions
4 | #
5 | # alternatively, use `latest` for the latest release or `edge` for the image
6 | # built from the most recent commit
7 | #
8 | # tag: latest
9 | tag: ""
10 | # use `Always` when using `latest` tag
11 | pullPolicy: IfNotPresent
12 |
13 | fediblockhole:
14 | # location of the configuration file. Default is /etc/default/fediblockhole.conf.toml
15 | conf_file:
16 | path: ""
17 | filename: ""
18 | # Location of a local allowlist file. It is recommended that this file should at a
19 | # minimum contain the web_domain of your own instance.
20 | allow_file:
21 | # Optionally, set the name of the file. This should match the data key in the
22 | # associated ConfigMap
23 | filename: ""
24 | # Location of a local blocklist file.
25 | block_file:
26 | # Optionally, set the name of the file. This should match the data key in the
27 | # associated ConfigMap
28 | filename: ""
29 | cron:
30 | # -- run `fediblock-sync` every hour
31 | sync:
32 | # @ignored
33 | enabled: false
34 | # @ignored
35 | schedule: "0 * * * *"
36 | failedJobsHistoryLimit: 1
37 | successfulJobsHistoryLimit: 3
38 |
39 | # if you manually change the UID/GID environment variables, ensure these values
40 | # match:
41 | podSecurityContext:
42 | runAsUser: 991
43 | runAsGroup: 991
44 | fsGroup: 991
45 |
46 | # @ignored
47 | securityContext: {}
48 |
49 | # -- Kubernetes manages pods for jobs and pods for deployments differently, so you might
50 | # need to apply different annotations to the two different sets of pods. The annotations
51 | # set with podAnnotations will be added to all deployment-managed pods.
52 | podAnnotations: {}
53 |
54 | # -- The annotations set with jobAnnotations will be added to all job pods.
55 | jobAnnotations: {}
56 |
57 | # -- Default resources for all Deployments and jobs unless overwritten
58 | resources: {}
59 | # We usually recommend not to specify default resources and to leave this as a conscious
60 | # choice for the user. This also increases chances charts run on environments with little
61 | # resources, such as Minikube. If you do want to specify resources, uncomment the following
62 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
63 | # limits:
64 | # cpu: 100m
65 | # memory: 128Mi
66 | # requests:
67 | # cpu: 100m
68 | # memory: 128Mi
69 |
70 | # @ignored
71 | nodeSelector: {}
72 |
73 | # @ignored
74 | tolerations: []
75 |
76 | # -- Affinity for all pods unless overwritten
77 | affinity: {}
78 |
--------------------------------------------------------------------------------
/container/.dockerignore:
--------------------------------------------------------------------------------
1 | Dockerfile
2 | #README.md
3 | *.pyc
4 | *.pyo
5 | *.pyd
6 | __pycache__
7 |
--------------------------------------------------------------------------------
/container/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use the official lightweight Python image.
2 | # https://hub.docker.com/_/python
3 | FROM python:slim
4 |
5 | # Copy local code to the container image.
6 | ENV APP_HOME /app
7 | WORKDIR $APP_HOME
8 |
9 | # Install production dependencies.
10 | RUN pip install fediblockhole
11 |
12 | USER 1001
13 | # Set the command on start to fediblock-sync.
14 | ENTRYPOINT ["fediblock-sync"]
15 |
--------------------------------------------------------------------------------
/etc/sample.fediblockhole.conf.toml:
--------------------------------------------------------------------------------
1 | # List of instances to read blocklists from.
2 | # If the instance makes its blocklist public, no authorization token is needed.
3 | # Otherwise, `token` is a Bearer token authorised to read domain_blocks.
4 | # If `admin` = True, use the more detailed admin API, which requires a token with a
5 | # higher level of authorization.
6 | # If `import_fields` are provided, only import these fields from the instance.
7 | # Overrides the global `import_fields` setting.
8 | blocklist_instance_sources = [
9 | # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
10 | # { domain = 'jorts.horse', token = '' }, # user accessible block list
11 | # { domain = 'jorts.horse', token_env_var = 'FBH_JORTS_TOKEN' }, # use environment variable for token
12 | # { domain = 'eigenmagic.net', token = '', admin = true }, # admin access required
13 | # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_READ_TOKEN', admin = true }, # same, but use environment variable for token
14 |
15 | ]
16 |
17 | # List of URLs to read csv blocklists from
18 | # Format tells the parser which format to use when parsing the blocklist
19 | # max_severity tells the parser to override any severities that are higher than this value
20 | # import_fields tells the parser to only import that set of fields from a specific source
21 | blocklist_url_sources = [
22 | # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
23 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
24 |
25 | ]
26 |
27 | ## These global allowlists override blocks from blocklists
28 | # These are the same format and structure as blocklists, but they take precedence
29 | allowlist_url_sources = [
30 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-01.csv', format = 'csv' },
31 | { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-allowlist-02.csv', format = 'csv' },
32 | ]
33 |
34 | # List of instances to write blocklist to
35 | blocklist_instance_destinations = [
36 | # { domain = 'eigenmagic.net', token = '', max_followed_severity = 'silence'},
37 |
38 | # Alternate mechanism using environment variable for the token
39 | # { domain = 'eigenmagic.net', token_env_var = 'FBH_EIGENMAGIC_TOKEN', max_followed_severity = 'silence'},
40 |
41 | ]
42 |
43 | ## Store a local copy of the remote blocklists after we fetch them
44 | #save_intermediate = true
45 |
46 | ## Directory to store the local blocklist copies
47 | # savedir = '/tmp'
48 |
49 | ## File to save the fully merged blocklist into
50 | # blocklist_savefile = '/tmp/merged_blocklist.csv'
51 |
52 | ## File to save the audit log of counts across sources
53 | # blocklist_auditfile = '/tmp/domain_counts_list.csv'
54 |
55 | ## Don't push blocklist to instances, even if they're defined above
56 | # no_push_instance = false
57 |
58 | ## Don't fetch blocklists from URLs, even if they're defined above
59 | # no_fetch_url = false
60 |
61 | ## Don't fetch blocklists from instances, even if they're defined above
62 | # no_fetch_instance = false
63 |
64 | ## Set the mergeplan to use when dealing with overlaps between blocklists
65 | # The default 'max' mergeplan will use the harshest severity block found for a domain.
66 | # The 'min' mergeplan will use the lightest severity block found for a domain.
67 | # mergeplan = 'max'
68 |
69 | ## Optional threshold-based merging.
70 | # Only merge in domain blocks if the domain is mentioned in
71 | # at least `threshold` blocklists.
72 | # `merge_thresold` is an integer, with a default value of 0.
73 | # The `merge_threshold_type` can be `count` or `pct`.
74 | # If `count` type is selected, the threshold is reached when the domain
75 | # is mentioned in at least `merge_threshold` blocklists. The default value
76 | # of 0 means that every block in every list will be merged in.
77 | # If `pct` type is selected, `merge_threshold` is interpreted as a percentage,
78 | # i.e. if `merge_threshold` = 20, blocks will only be merged in if the domain
79 | # is present in at least 20% of blocklists.
80 | # Percentage calculated as number_of_mentions / total_number_of_blocklists.
81 | # The percentage method is more flexibile, but also more complicated, so take care
82 | # when using it.
83 | #
84 | # merge_threshold_type = 'count'
85 | # merge_threshold = 0
86 |
87 | ## set an override private comment to be added when pushing a NEW block to an instance
88 | # this does not require importing private comments
89 | # override_private_comment = 'Added by Fediblock Sync'
90 |
91 | ## Set which fields we import
92 | ## 'domain' and 'severity' are always imported, these are additional
93 | ##
94 | import_fields = ['public_comment', 'reject_media', 'reject_reports', 'obfuscate']
95 |
96 | ## Set which fields we export
97 | ## 'domain' and 'severity' are always exported, these are additional
98 | ##
99 | export_fields = ['public_comment']
100 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "fediblockhole"
3 | version = "0.4.6"
4 | description = "Federated blocklist management for Mastodon"
5 | readme = "README.md"
6 | license = {file = "LICENSE"}
7 | requires-python = ">=3.8"
8 | keywords = ["mastodon", "fediblock"]
9 | authors = [
10 | {name = "Justin Warren"}, {email = "justin@eigenmagic.com"}
11 | ]
12 | classifiers = [
13 | "Development Status :: 4 - Beta",
14 | "Intended Audience :: Developers",
15 | "Intended Audience :: System Administrators",
16 | "License :: OSI Approved :: GNU Affero General Public License v3",
17 | "Natural Language :: English",
18 | "Programming Language :: Python :: 3",
19 | "Programming Language :: Python :: 3.10",
20 | "Programming Language :: Python :: 3.9",
21 | "Programming Language :: Python :: 3.8",
22 | ]
23 | dependencies = [
24 | "requests",
25 | "toml"
26 | ]
27 |
28 | [project.urls]
29 | homepage = "https://github.com/eigenmagic/fediblockhole"
30 | documentation = "https://github.com/eigenmagic/fediblockhole"
31 | repository = "https://github.com/eigenmagic/fediblockhole"
32 |
33 | [project.scripts]
34 | fediblock-sync = "fediblockhole:main"
35 |
36 | [build-system]
37 | requires = ["hatchling"]
38 | build-backend = "hatchling.build"
39 |
40 | [tool.pytest.ini_options]
41 | addopts = [
42 | "--import-mode=importlib",
43 | ]
44 | norecursedirs = [
45 | "tests/helpers",
46 | ]
47 |
48 | [tool.uv]
49 | dev-dependencies = [
50 | "coverage[toml]>=7.6.1",
51 | "pytest ~= 8.3",
52 | ]
53 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | toml
3 | pytest
--------------------------------------------------------------------------------
/samples/demo-allowlist-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","private_comment","public_comment","reject_media","reject_reports","obfuscate"
2 | "eigenmagic.net","noop","Never block me","Only the domain field matters for allowlists",False,False,False
3 | "example.org","noop","Never block me either","The severity is ignored in allowlists as are all other fields",False,False,False
4 | "demo01.example.org","noop","Never block me either","But you can use them to leave yourself or others notes on why the item is here",False,False,False
5 |
--------------------------------------------------------------------------------
/samples/demo-allowlist-02.csv:
--------------------------------------------------------------------------------
1 | "domain","private_comment"
2 | "example.org","The private comment won't get loaded, but can be handy to leave yourself a note."
3 |
--------------------------------------------------------------------------------
/samples/demo-blocklist-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","reject_media","reject_reports","private_comment","public_comment","obfuscate"
2 | "qoto.org","suspend",True,True,,,True
3 | "sealion.club","suspend",True,True,,,True
4 | "develop.gab.com","suspend",True,True,,,True
5 | "gab.ai","suspend",True,True,,,True
6 | "gab.sleeck.eu","suspend",True,True,,,True
7 | "gab.com","suspend",True,True,,,True
8 | "kiwifarms.is","suspend",True,True,,,True
9 | "kiwifarms.net","suspend",True,True,,,True
10 | "gabfed.com","suspend",True,True,,,True
--------------------------------------------------------------------------------
/src/fediblockhole/__init__.py:
--------------------------------------------------------------------------------
1 | """A tool for managing federated Mastodon blocklists
2 | """
3 |
4 | from __future__ import annotations
5 |
6 | import argparse
7 | import csv
8 | import json
9 | import os.path
10 | import sys
11 | import time
12 | import urllib.request as urlr
13 | from importlib.metadata import version
14 |
15 | import requests
16 | import toml
17 |
18 | from .blocklists import BlockAuditList, Blocklist, parse_blocklist
19 | from .const import BlockAudit, BlockSeverity, DomainBlock
20 |
21 | __version__ = version("fediblockhole")
22 |
23 | import logging
24 |
25 | logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
26 | log = logging.getLogger("fediblockhole")
27 |
28 | # Max size of a URL-fetched blocklist
29 | URL_BLOCKLIST_MAXSIZE = 1024**3
30 |
31 | # Wait at most this long for a remote server to respond
32 | REQUEST_TIMEOUT = 30
33 |
34 | # Time to wait between instance API calls to we don't melt them
35 | # The default Mastodon rate limit is 300 calls per 5 minutes
36 | API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes
37 |
38 | # We always import the domain and the severity
39 | IMPORT_FIELDS = ["domain", "severity"]
40 |
41 | # Allowlists always import these fields
42 | ALLOWLIST_IMPORT_FIELDS = [
43 | "domain",
44 | "severity",
45 | "public_comment",
46 | "private_comment",
47 | "reject_media",
48 | "reject_reports",
49 | "obfuscate",
50 | ]
51 |
52 | # We always export the domain and the severity
53 | EXPORT_FIELDS = ["domain", "severity"]
54 |
55 |
56 | def sync_blocklists(conf: argparse.Namespace):
57 | """Sync instance blocklists from remote sources.
58 |
59 | @param conf: A configuration dictionary
60 | """
61 | # Build a dict of blocklists we retrieve from remote sources.
62 | # We will merge these later using a merge algorithm we choose.
63 |
64 | # Always import these fields
65 | import_fields = IMPORT_FIELDS
66 | # Add extra import fields if defined in config
67 | import_fields.extend(conf.import_fields)
68 |
69 | # Always export these fields
70 | export_fields = EXPORT_FIELDS
71 | # Add extra export fields if defined in config
72 | export_fields.extend(conf.export_fields)
73 |
74 | blocklists = []
75 | # Fetch blocklists from URLs
76 | if not conf.no_fetch_url:
77 | blocklists.extend(
78 | fetch_from_urls(
79 | conf.blocklist_url_sources,
80 | import_fields,
81 | conf.save_intermediate,
82 | conf.savedir,
83 | export_fields,
84 | )
85 | )
86 |
87 | # Fetch blocklists from remote instances
88 | if not conf.no_fetch_instance:
89 | blocklists.extend(
90 | fetch_from_instances(
91 | conf.blocklist_instance_sources,
92 | import_fields,
93 | conf.save_intermediate,
94 | conf.savedir,
95 | export_fields,
96 | )
97 | )
98 |
99 | # Merge blocklists into an update dict
100 | merged = merge_blocklists(
101 | blocklists,
102 | conf.mergeplan,
103 | conf.merge_threshold,
104 | conf.merge_threshold_type,
105 | conf.blocklist_auditfile,
106 | )
107 |
108 | # Remove items listed in allowlists, if any
109 | allowlists = fetch_allowlists(conf)
110 | merged = apply_allowlists(merged, conf, allowlists)
111 |
112 | # Save the final mergelist, if requested
113 | if conf.blocklist_savefile:
114 | log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
115 | save_blocklist_to_file(merged, conf.blocklist_savefile, export_fields)
116 |
117 | # Push the blocklist to destination instances
118 | if not conf.no_push_instance:
119 | log.info("Pushing domain blocks to instances...")
120 | for dest in conf.blocklist_instance_destinations:
121 | target = dest["domain"]
122 | token = dest["token"]
123 | scheme = dest.get("scheme", "https")
124 | max_followed_severity = BlockSeverity(
125 | dest.get("max_followed_severity", "silence")
126 | )
127 | push_blocklist(
128 | token,
129 | target,
130 | merged,
131 | conf.dryrun,
132 | import_fields,
133 | max_followed_severity,
134 | scheme,
135 | conf.override_private_comment,
136 | )
137 |
138 |
139 | def apply_allowlists(merged: Blocklist, conf: argparse.Namespace, allowlists: dict):
140 | """Apply allowlists"""
141 | # Apply allows specified on the commandline
142 | for domain in conf.allow_domains:
143 | log.info(f"'{domain}' allowed by commandline, removing any blocks...")
144 | if domain in merged.blocks:
145 | del merged.blocks[domain]
146 |
147 | # Apply allows from URLs lists
148 | log.info("Removing domains from URL allowlists...")
149 | for alist in allowlists:
150 | log.debug(f"Processing allows from '{alist.origin}'...")
151 | for allowed in alist.blocks.values():
152 | domain = allowed.domain
153 | log.debug(f"Removing allowlisted domain '{domain}' from merged list.")
154 | if domain in merged.blocks:
155 | del merged.blocks[domain]
156 |
157 | return merged
158 |
159 |
160 | def fetch_allowlists(conf: argparse.Namespace) -> Blocklist:
161 | """ """
162 | if conf.allowlist_url_sources:
163 | allowlists = fetch_from_urls(
164 | conf.allowlist_url_sources,
165 | ALLOWLIST_IMPORT_FIELDS,
166 | conf.save_intermediate,
167 | conf.savedir,
168 | )
169 | return allowlists
170 | return Blocklist()
171 |
172 |
173 | def fetch_from_urls(
174 | url_sources: dict,
175 | import_fields: list = IMPORT_FIELDS,
176 | save_intermediate: bool = False,
177 | savedir: str = None,
178 | export_fields: list = EXPORT_FIELDS,
179 | ) -> dict:
180 | """Fetch blocklists from URL sources
181 | @param blocklists: A dict of existing blocklists, keyed by source
182 | @param url_sources: A dict of configuration info for url sources
183 | @returns: A dict of blocklists, same as input, but (possibly) modified
184 | """
185 | log.info("Fetching domain blocks from URLs...")
186 | blocklists = []
187 | for item in url_sources:
188 | url = item["url"]
189 | # If import fields are provided, they override the global ones passed in
190 | source_import_fields = item.get("import_fields", None)
191 | if source_import_fields:
192 | # Ensure we always use the default fields
193 | import_fields = IMPORT_FIELDS.extend(source_import_fields)
194 |
195 | max_severity = item.get("max_severity", "suspend")
196 | listformat = item.get("format", "csv")
197 | with urlr.urlopen(url) as fp:
198 | rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode("utf-8")
199 | bl = parse_blocklist(rawdata, url, listformat, import_fields, max_severity)
200 | blocklists.append(bl)
201 | if save_intermediate:
202 | save_intermediate_blocklist(bl, savedir, export_fields)
203 |
204 | return blocklists
205 |
206 |
207 | def fetch_from_instances(
208 | sources: dict,
209 | import_fields: list = IMPORT_FIELDS,
210 | save_intermediate: bool = False,
211 | savedir: str = None,
212 | export_fields: list = EXPORT_FIELDS,
213 | ) -> dict:
214 | """Fetch blocklists from other instances
215 | @param blocklists: A dict of existing blocklists, keyed by source
216 | @param url_sources: A dict of configuration info for url sources
217 | @returns: A dict of blocklists, same as input, but (possibly) modified
218 | """
219 | log.info("Fetching domain blocks from instances...")
220 | blocklists = []
221 | for item in sources:
222 | domain = item["domain"]
223 | admin = item.get("admin", False)
224 | token = item.get("token", None)
225 | scheme = item.get("scheme", "https")
226 | # itemsrc = f"{scheme}://{domain}/api"
227 |
228 | # If import fields are provided, they override the global ones passed in
229 | source_import_fields = item.get("import_fields", None)
230 | if source_import_fields:
231 | # Ensure we always use the default fields
232 | import_fields = IMPORT_FIELDS.extend(source_import_fields)
233 |
234 | bl = fetch_instance_blocklist(domain, token, admin, import_fields, scheme)
235 | blocklists.append(bl)
236 | if save_intermediate:
237 | save_intermediate_blocklist(bl, savedir, export_fields)
238 | return blocklists
239 |
240 |
241 | def merge_blocklists(
242 | blocklists: list[Blocklist],
243 | mergeplan: str = "max",
244 | threshold: int = 0,
245 | threshold_type: str = "count",
246 | save_block_audit_file: str = None,
247 | ) -> Blocklist:
248 | """Merge fetched remote blocklists into a bulk update
249 | @param blocklists: A dict of lists of DomainBlocks, keyed by source.
250 | Each value is a list of DomainBlocks
251 | @param mergeplan: An optional method of merging overlapping block definitions
252 | 'max' (the default) uses the highest severity block found
253 | 'min' uses the lowest severity block found
254 | @param threshold: An integer used in the threshold mechanism.
255 | If a domain is not present in this number/pct or more of the blocklists,
256 | it will not get merged into the final list.
257 | @param threshold_type: choice of ['count', 'pct']
258 | If `count`, threshold is met if block is present in `threshold`
259 | or more blocklists.
260 | If `pct`, theshold is met if block is present in
261 | count_of_mentions / number_of_blocklists.
262 | @param returns: A dict of DomainBlocks keyed by domain
263 | """
264 | merged = Blocklist("fediblockhole.merge_blocklists")
265 | audit = BlockAuditList("fediblockhole.merge_blocklists")
266 |
267 | num_blocklists = len(blocklists)
268 |
269 | # Create a domain keyed list of blocks for each domain
270 | domain_blocks = {}
271 |
272 | for bl in blocklists:
273 | for block in bl.values():
274 | if "*" in block.domain:
275 | log.debug(f"Domain '{block.domain}' is obfuscated. Skipping it.")
276 | continue
277 | elif block.domain in domain_blocks:
278 | domain_blocks[block.domain].append(block)
279 | else:
280 | domain_blocks[block.domain] = [
281 | block,
282 | ]
283 |
284 | # Only merge items if `threshold` is met or exceeded
285 | for domain in domain_blocks:
286 | domain_matches_count = len(domain_blocks[domain])
287 | domain_matches_percent = domain_matches_count / num_blocklists * 100
288 | if threshold_type == "count":
289 | domain_threshold_level = domain_matches_count
290 | elif threshold_type == "pct":
291 | domain_threshold_level = domain_matches_percent
292 | # log.debug(f"domain threshold level: {domain_threshold_level}")
293 | else:
294 | raise ValueError(
295 | f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'" # noqa
296 | )
297 |
298 | log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}")
299 | if domain_threshold_level >= threshold:
300 | # Add first block in the list to merged
301 | block = domain_blocks[domain][0]
302 | log.debug(f"Yes. Merging block: {block}")
303 |
304 | # Merge the others with this record
305 | for newblock in domain_blocks[domain][1:]:
306 | block = apply_mergeplan(block, newblock, mergeplan)
307 | merged.blocks[block.domain] = block
308 |
309 | if save_block_audit_file:
310 | blockdata: BlockAudit = {
311 | "domain": domain,
312 | "count": domain_matches_count,
313 | "percent": domain_matches_percent,
314 | }
315 | audit.blocks[domain] = blockdata
316 |
317 | if save_block_audit_file:
318 | log.info(f"Saving audit file to {save_block_audit_file}")
319 | save_domain_block_audit_to_file(audit, save_block_audit_file)
320 |
321 | return merged
322 |
323 |
324 | def apply_mergeplan(
325 | oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str = "max"
326 | ) -> dict:
327 | """Use a mergeplan to decide how to merge two overlapping block definitions
328 |
329 | @param oldblock: The existing block definition.
330 | @param newblock: The new block definition we want to merge in.
331 | @param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
332 | """
333 | # Default to the existing block definition
334 | blockdata = oldblock._asdict()
335 |
336 | # Merge comments
337 | keylist = ["public_comment", "private_comment"]
338 | for key in keylist:
339 | try:
340 | oldcomment = getattr(oldblock, key)
341 | newcomment = getattr(newblock, key)
342 | blockdata[key] = merge_comments(oldcomment, newcomment)
343 | except KeyError:
344 | log.debug(
345 | f"Key '{key}' missing from block definition so cannot compare. Continuing..." # noqa
346 | )
347 | continue
348 |
349 | # How do we override an earlier block definition?
350 | if mergeplan in ["max", None]:
351 | # Use the highest block level found (the default)
352 | # log.debug(f"Using 'max' mergeplan.")
353 |
354 | if newblock.severity > oldblock.severity:
355 | # log.debug(f"New block severity is higher. Using that.")
356 | blockdata["severity"] = newblock.severity
357 |
358 | # For 'reject_media', 'reject_reports', and 'obfuscate' if
359 | # the value is set and is True for the domain in
360 | # any blocklist then the value is set to True.
361 | for key in ["reject_media", "reject_reports", "obfuscate"]:
362 | newval = getattr(newblock, key)
363 | if newval is True:
364 | blockdata[key] = True
365 |
366 | elif mergeplan in ["min"]:
367 | # Use the lowest block level found
368 | log.debug("Using 'min' mergeplan.")
369 |
370 | if newblock.severity < oldblock.severity:
371 | blockdata["severity"] = newblock.severity
372 |
373 | # For 'reject_media', 'reject_reports', and 'obfuscate' if
374 | # the value is set and is False for the domain in
375 | # any blocklist then the value is set to False.
376 | for key in ["reject_media", "reject_reports", "obfuscate"]:
377 | newval = getattr(newblock, key)
378 | if newval is False:
379 | blockdata[key] = False
380 |
381 | else:
382 | raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
383 |
384 | # log.debug(f"Block severity set to {blockdata['severity']}")
385 |
386 | return DomainBlock(**blockdata)
387 |
388 |
389 | def merge_comments(oldcomment: str, newcomment: str) -> str:
390 | """Merge two comments
391 |
392 | @param oldcomment: The original comment we're merging into
393 | @param newcomment: The new commment we want to merge in
394 | @returns: a new str of the merged comment
395 | """
396 | # Don't merge if both comments are None or ''
397 | if oldcomment in ["", None] and newcomment in ["", None]:
398 | return ""
399 |
400 | # If both comments are the same, or new comment is empty, don't merge
401 | if oldcomment == newcomment or newcomment in ["", None]:
402 | return oldcomment
403 |
404 | # If old comment is empty, just return the new one
405 | if oldcomment in ["", None]:
406 | return newcomment
407 |
408 | # We want to skip duplicate fragments so we don't end up
409 | # re-concatenating the same strings every time there's an
410 | # update, causing the comment to grow without bound.
411 | # We tokenize the comments, splitting them on ', ', and comparing
412 | # the tokens, skipping duplicates.
413 | # This means "boring, lack of moderation, nazis, scrapers" merging
414 | # with "lack of moderation, scrapers" should result in
415 | # "boring, lack of moderation, nazis, scrapers"
416 | old_tokens = oldcomment.split(", ")
417 | new_tokens = newcomment.split(", ")
418 |
419 | # Remove any empty string tokens that we get
420 | while "" in old_tokens:
421 | old_tokens.remove("")
422 | while "" in new_tokens:
423 | new_tokens.remove("")
424 |
425 | # Remove duplicate tokens
426 | for token in old_tokens:
427 | if token in new_tokens:
428 | new_tokens.remove(token)
429 |
430 | # Combine whatever tokens are left into one set
431 | tokenset = old_tokens
432 | tokenset.extend(new_tokens)
433 |
434 | # Return the merged string
435 | return ", ".join(tokenset)
436 |
437 |
438 | def requests_headers(token: str = None):
439 | """Set common headers for requests"""
440 | headers = {"User-Agent": f"FediBlockHole/{__version__}"}
441 | if token:
442 | headers["Authorization"] = f"Bearer {token}"
443 |
444 | return headers
445 |
446 |
447 | def fetch_instance_blocklist(
448 | host: str,
449 | token: str = None,
450 | admin: bool = False,
451 | import_fields: list = ["domain", "severity"],
452 | scheme: str = "https",
453 | ) -> list[DomainBlock]:
454 | """Fetch existing block list from server
455 |
456 | @param host: The remote host to connect to.
457 | @param token: The (optional) OAuth Bearer token to authenticate with.
458 | @param admin: Boolean flag to use the admin API if True.
459 | @param import_fields: A list of fields to import from the remote instance.
460 | @returns: A list of the domain blocks from the instance.
461 | """
462 | log.info(f"Fetching instance blocklist from {host} ...")
463 |
464 | if admin:
465 | api_path = "/api/v1/admin/domain_blocks"
466 | parse_format = "json"
467 | else:
468 | api_path = "/api/v1/instance/domain_blocks"
469 | parse_format = "mastodon_api_public"
470 |
471 | headers = requests_headers(token)
472 |
473 | url = f"{scheme}://{host}{api_path}"
474 |
475 | blockdata = []
476 | link = True
477 | while link:
478 | response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
479 | if response.status_code != 200:
480 | log.error(f"Cannot fetch remote blocklist: {response.content}")
481 | raise ValueError("Unable to fetch domain block list: %s", response)
482 |
483 | # Each block of returned data is a JSON list of dicts
484 | # so we parse them and append them to the fetched list
485 | # of JSON data we need to parse.
486 |
487 | blockdata.extend(json.loads(response.content.decode("utf-8")))
488 | # Parse the link header to find the next url to fetch
489 | # This is a weird and janky way of doing pagination but
490 | # hey nothing we can do about it we just have to deal
491 | link = response.headers.get("Link", None)
492 | if link is None:
493 | break
494 | pagination = link.split(", ")
495 | if len(pagination) != 2:
496 | link = None
497 | break
498 | else:
499 | next = pagination[0]
500 | # prev = pagination[1]
501 |
502 | urlstring, rel = next.split("; ")
503 | url = urlstring.strip("<").rstrip(">")
504 |
505 | blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
506 |
507 | return blocklist
508 |
509 |
510 | def delete_block(token: str, host: str, id: int, scheme: str = "https"):
511 | """Remove a domain block"""
512 | log.debug(f"Removing domain block {id} at {host}...")
513 | api_path = "/api/v1/admin/domain_blocks/"
514 |
515 | url = f"{scheme}://{host}{api_path}{id}"
516 |
517 | response = requests.delete(
518 | url, headers=requests_headers(token), timeout=REQUEST_TIMEOUT
519 | )
520 | if response.status_code != 200:
521 | if response.status_code == 404:
522 | log.warning(f"No such domain block: {id}")
523 | return
524 |
525 | raise ValueError(
526 | f"Something went wrong: {response.status_code}: {response.content}"
527 | )
528 |
529 |
530 | def fetch_instance_follows(
531 | token: str, host: str, domain: str, scheme: str = "https"
532 | ) -> int:
533 | """Fetch the followers of the target domain at the instance
534 |
535 | @param token: the Bearer authentication token for OAuth access
536 | @param host: the instance API hostname/IP address
537 | @param domain: the domain to search for followers of
538 | @returns: int, number of local followers of remote instance accounts
539 | """
540 | api_path = "/api/v1/admin/measures"
541 | url = f"{scheme}://{host}{api_path}"
542 |
543 | key = "instance_follows"
544 |
545 | # This data structure only allows us to request a single domain
546 | # at a time, which limits the load on the remote instance of each call
547 | data = {
548 | "keys": [key],
549 | key: {"domain": domain},
550 | }
551 |
552 | # The Mastodon API only accepts JSON formatted POST data for measures
553 | response = requests.post(
554 | url, headers=requests_headers(token), json=data, timeout=REQUEST_TIMEOUT
555 | )
556 | if response.status_code != 200:
557 | if response.status_code == 403:
558 | log.error(
559 | f"Cannot fetch follow information for {domain} from {host}: {response.content}" # noqa
560 | )
561 |
562 | raise ValueError(
563 | f"Something went wrong: {response.status_code}: {response.content}"
564 | )
565 |
566 | # Get the total returned
567 | follows = int(response.json()[0]["total"])
568 | return follows
569 |
570 |
571 | def check_followed_severity(
572 | host: str,
573 | token: str,
574 | domain: str,
575 | severity: BlockSeverity,
576 | max_followed_severity: BlockSeverity = BlockSeverity("silence"),
577 | scheme: str = "https",
578 | ):
579 | """Check an instance to see if it has followers of a to-be-blocked instance"""
580 |
581 | log.debug("Checking followed severity...")
582 | # Return straight away if we're not increasing the severity
583 | if severity <= max_followed_severity:
584 | return severity
585 |
586 | # If the instance has accounts that follow people on the to-be-blocked domain,
587 | # limit the maximum severity to the configured `max_followed_severity`.
588 | log.debug("checking for instance follows...")
589 | follows = fetch_instance_follows(token, host, domain, scheme)
590 | time.sleep(API_CALL_DELAY)
591 | if follows > 0:
592 | log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
593 | if severity > max_followed_severity:
594 | log.warning(
595 | f"Instance {host} has {follows} followers of accounts at {domain}. "
596 | f"Limiting block severity to {max_followed_severity}."
597 | )
598 | return max_followed_severity
599 | return severity
600 |
601 |
602 | def is_change_needed(oldblock: dict, newblock: dict, import_fields: list):
603 | change_needed = oldblock.compare_fields(newblock, import_fields)
604 | return change_needed
605 |
606 |
607 | def update_known_block(
608 | token: str, host: str, block: DomainBlock, scheme: str = "https"
609 | ):
610 | """Update an existing domain block with information in blockdict"""
611 | api_path = "/api/v1/admin/domain_blocks/"
612 |
613 | id = block.id
614 | blockdata = block._asdict()
615 | del blockdata["id"]
616 |
617 | url = f"{scheme}://{host}{api_path}{id}"
618 |
619 | response = requests.put(
620 | url, headers=requests_headers(token), json=blockdata, timeout=REQUEST_TIMEOUT
621 | )
622 | if response.status_code != 200:
623 | raise ValueError(
624 | f"Something went wrong: {response.status_code}: {response.content}"
625 | )
626 |
627 |
628 | def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str = "https"):
629 | """Block a domain on Mastodon host"""
630 | log.debug(f"Adding block entry for {blockdata.domain} at {host}...")
631 | api_path = "/api/v1/admin/domain_blocks"
632 |
633 | url = f"{scheme}://{host}{api_path}"
634 |
635 | response = requests.post(
636 | url,
637 | headers=requests_headers(token),
638 | json=blockdata._asdict(),
639 | timeout=REQUEST_TIMEOUT,
640 | )
641 | if response.status_code == 422:
642 | # A stricter block already exists. Probably for the base domain.
643 | err = json.loads(response.content)
644 | log.warning(err["error"])
645 |
646 | elif response.status_code != 200:
647 |
648 | raise ValueError(
649 | f"Something went wrong: {response.status_code}: {response.content}"
650 | )
651 |
652 |
653 | def push_blocklist(
654 | token: str,
655 | host: str,
656 | blocklist: list[DomainBlock],
657 | dryrun: bool = False,
658 | import_fields: list = ["domain", "severity"],
659 | max_followed_severity: BlockSeverity = BlockSeverity("silence"),
660 | scheme: str = "https",
661 | override_private_comment: str = None,
662 | ):
663 | """Push a blocklist to a remote instance.
664 |
665 | Updates existing entries if they exist, creates new blocks if they don't.
666 |
667 | @param token: The Bearer token for OAUTH API authentication
668 | @param host: The instance host, FQDN or IP
669 | @param blocklist: A list of block definitions. They must include the domain.
670 | @param import_fields: A list of fields to import to the instances.
671 | """
672 | log.info(f"Pushing blocklist to host {host} ...")
673 | # Fetch the existing blocklist from the instance
674 | # Force use of the admin API, and add 'id' to the list of fields
675 | if "id" not in import_fields:
676 | import_fields.append("id")
677 | serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
678 |
679 | # # Convert serverblocks to a dictionary keyed by domain name
680 | # knownblocks = {row.domain: row for row in serverblocks}
681 |
682 | for newblock in blocklist.values():
683 |
684 | log.debug(f"Processing block: {newblock}")
685 | if newblock.domain in serverblocks:
686 | log.debug(
687 | f"Block already exists for {newblock.domain}, "
688 | f"checking for differences..."
689 | )
690 |
691 | oldblock = serverblocks[newblock.domain]
692 |
693 | change_needed = is_change_needed(oldblock, newblock, import_fields)
694 |
695 | # Is the severity changing?
696 | if "severity" in change_needed:
697 | log.debug("Severity change requested, checking...")
698 | if newblock.severity > oldblock.severity:
699 | # Confirm if we really want to change the severity
700 | # If we still have followers of the remote domain,
701 | # we may not want to go all the way to full suspend,
702 | # depending on the configuration
703 | newseverity = check_followed_severity(
704 | host,
705 | token,
706 | oldblock.domain,
707 | newblock.severity,
708 | max_followed_severity,
709 | scheme,
710 | )
711 | if newseverity != oldblock.severity:
712 | newblock.severity = newseverity
713 | else:
714 | log.info(
715 | "Keeping severity of block the same to avoid disrupting followers." # noqa
716 | )
717 | change_needed.remove("severity")
718 |
719 | if change_needed:
720 | log.info(
721 | f"Change detected. Need to update {change_needed} "
722 | f"for domain block for {oldblock.domain}"
723 | )
724 | log.info(f"Old block definition: {oldblock}")
725 | log.info(f"Pushing new block definition: {newblock}")
726 | blockdata = oldblock.copy()
727 | blockdata.update(newblock)
728 | log.debug(f"Block as dict: {blockdata._asdict()}")
729 |
730 | if not dryrun:
731 | update_known_block(token, host, blockdata, scheme)
732 | # add a pause here so we don't melt the instance
733 | time.sleep(API_CALL_DELAY)
734 | else:
735 | log.info("Dry run selected. Not applying changes.")
736 |
737 | else:
738 | log.debug("No differences detected. Not updating.")
739 | pass
740 |
741 | else:
742 | # stamp this record with a private comment, since we're the ones adding it
743 | if override_private_comment:
744 | newblock.private_comment = override_private_comment
745 |
746 | # This is a new block for the target instance, so we
747 | # need to add a block rather than update an existing one
748 | log.info(f"Adding new block: {newblock}...")
749 | log.debug(f"Block as dict: {newblock._asdict()}")
750 |
751 | # Make sure the new block doesn't clobber a domain with followers
752 | newblock.severity = check_followed_severity(
753 | host,
754 | token,
755 | newblock.domain,
756 | newblock.severity,
757 | max_followed_severity,
758 | scheme,
759 | )
760 | if not dryrun:
761 | add_block(token, host, newblock, scheme)
762 | # add a pause here so we don't melt the instance
763 | time.sleep(API_CALL_DELAY)
764 | else:
765 | log.info("Dry run selected. Not adding block.")
766 |
767 |
768 | def load_config(configfile: str):
769 | """Augment commandline arguments with config file parameters
770 |
771 | Config file is expected to be in TOML format
772 | """
773 | conf = toml.load(configfile)
774 | return conf
775 |
776 |
777 | def save_intermediate_blocklist(
778 | blocklist: Blocklist, filedir: str, export_fields: list = ["domain", "severity"]
779 | ):
780 | """Save a local copy of a blocklist we've downloaded"""
781 | # Invent a filename based on the remote source
782 | # If the source was a URL, convert it to something less messy
783 | # If the source was a remote domain, just use the name of the domain
784 | source = blocklist.origin
785 | log.debug(f"Saving intermediate blocklist from {source}")
786 | source = source.replace("/", "-")
787 | filename = f"{source}.csv"
788 | filepath = os.path.join(filedir, filename)
789 | save_blocklist_to_file(blocklist, filepath, export_fields)
790 |
791 |
792 | def save_blocklist_to_file(
793 | blocklist: Blocklist, filepath: str, export_fields: list = ["domain", "severity"]
794 | ):
795 | """Save a blocklist we've downloaded from a remote source
796 |
797 | @param blocklist: A dictionary of block definitions, keyed by domain
798 | @param filepath: The path to the file the list should be saved in.
799 | @param export_fields: Which fields to include in the export.
800 | """
801 | try:
802 | sorted_list = sorted(blocklist.blocks.items())
803 | except KeyError:
804 | log.error("Field 'domain' not found in blocklist.")
805 | log.debug(f"blocklist is: {sorted_list}")
806 | except AttributeError:
807 | log.error("Attribute error!")
808 | import pdb
809 |
810 | pdb.set_trace()
811 |
812 | log.debug(f"export fields: {export_fields}")
813 |
814 | with open(filepath, "w") as fp:
815 | writer = csv.DictWriter(fp, export_fields, extrasaction="ignore")
816 | writer.writeheader()
817 | for key, value in sorted_list:
818 | writer.writerow(value)
819 |
820 |
821 | def resolve_replacements(endpoints: list[dict]) -> list[dict]:
822 | """Resolve any replacement tokens in the list of endpoints"""
823 |
824 | resolved = []
825 | for item in endpoints:
826 | item = dict(**item)
827 | if "token" in item and "token_env_var" in item:
828 | log.warning(
829 | f"Both `token` and `token_env_var` have been provided; using"
830 | f" the explicit token for {item.get('domain', 'the entry')}"
831 | )
832 |
833 | # We take the token that's explicitly stated,
834 | # even if there's also an environment variable set.
835 | # Delete the token_env_var key
836 | del item["token_env_var"]
837 |
838 | elif "token" in item:
839 | pass
840 |
841 | elif "token_env_var" in item:
842 | value = os.getenv(item["token_env_var"])
843 | if value is None:
844 | raise ValueError(
845 | f"Environment variable" f" '{item['token_env_var']}' not set."
846 | )
847 |
848 | item["token"] = value
849 |
850 | else:
851 | # lastly, try look for a default token.
852 | domain = item.get("domain")
853 | if domain is not None:
854 | domain_env_var_prefix = domain.upper().replace(".", "_")
855 | domain_env_var = f"{domain_env_var_prefix}_TOKEN"
856 | value = os.getenv(domain_env_var)
857 | if value is not None:
858 | item["token"] = value
859 |
860 | resolved.append(item)
861 | return resolved
862 |
863 |
864 | def save_domain_block_audit_to_file(blocklist: BlockAuditList, filepath: str):
865 | """Save an audit log of domains blocked
866 |
867 | @param blocklist: A dictionary of block definitions, keyed by domain
868 | @param filepath: The path to the file the list should be saved in.
869 | """
870 | export_fields = ["domain", "count", "percent"]
871 |
872 | try:
873 | sorted_list = sorted(blocklist.blocks.items())
874 | except KeyError:
875 | log.error("Field 'domain' not found in blocklist.")
876 | log.debug(f"blocklist is: {sorted_list}")
877 | except AttributeError:
878 | log.error("Attribute error!")
879 | import pdb
880 |
881 | pdb.set_trace()
882 |
883 | log.debug("exporting audit file")
884 |
885 | with open(filepath, "w") as fp:
886 | writer = csv.DictWriter(fp, export_fields, extrasaction="ignore")
887 | writer.writeheader()
888 | for key, value in sorted_list:
889 | writer.writerow(value)
890 |
891 |
892 | def augment_args(args, tomldata: str = None):
893 | """Augment commandline arguments with config file parameters
894 |
895 | If tomldata is provided, uses that data instead of loading
896 | from a config file.
897 | """
898 | if tomldata:
899 | conf = toml.loads(tomldata)
900 | else:
901 | conf = toml.load(args.config)
902 |
903 | if not args.no_fetch_url:
904 | args.no_fetch_url = conf.get("no_fetch_url", False)
905 |
906 | if not args.no_fetch_instance:
907 | args.no_fetch_instance = conf.get("no_fetch_instance", False)
908 |
909 | if not args.no_push_instance:
910 | args.no_push_instance = conf.get("no_push_instance", False)
911 |
912 | if not args.blocklist_savefile:
913 | args.blocklist_savefile = conf.get("blocklist_savefile", None)
914 |
915 | if not args.save_intermediate:
916 | args.save_intermediate = conf.get("save_intermediate", False)
917 |
918 | if not args.override_private_comment:
919 | args.override_private_comment = conf.get("override_private_comment", None)
920 |
921 | if not args.savedir:
922 | args.savedir = conf.get("savedir", "/tmp")
923 |
924 | if not args.blocklist_auditfile:
925 | args.blocklist_auditfile = conf.get("blocklist_auditfile", None)
926 |
927 | if not args.export_fields:
928 | args.export_fields = conf.get("export_fields", [])
929 |
930 | if not args.import_fields:
931 | args.import_fields = conf.get("import_fields", [])
932 |
933 | if not args.mergeplan:
934 | args.mergeplan = conf.get("mergeplan", "max")
935 |
936 | if not args.merge_threshold:
937 | args.merge_threshold = conf.get("merge_threshold", 0)
938 |
939 | if not args.merge_threshold_type:
940 | args.merge_threshold_type = conf.get("merge_threshold_type", "count")
941 |
942 | args.blocklist_url_sources = conf.get("blocklist_url_sources", [])
943 | args.blocklist_instance_sources = resolve_replacements(
944 | conf.get("blocklist_instance_sources", [])
945 | )
946 | args.allowlist_url_sources = conf.get("allowlist_url_sources", [])
947 | args.blocklist_instance_destinations = resolve_replacements(
948 | conf.get("blocklist_instance_destinations", [])
949 | )
950 |
951 | return args
952 |
953 |
954 | def setup_argparse():
955 | """Setup the commandline arguments"""
956 | ap = argparse.ArgumentParser(
957 | description="Bulk blocklist tool",
958 | epilog=f"Part of FediBlockHole v{__version__}",
959 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
960 | )
961 | ap.add_argument(
962 | "-c",
963 | "--config",
964 | default="/etc/default/fediblockhole.conf.toml",
965 | help="Config file",
966 | )
967 | ap.add_argument(
968 | "-V", "--version", action="store_true", help="Show version and exit."
969 | )
970 |
971 | ap.add_argument(
972 | "-o",
973 | "--outfile",
974 | dest="blocklist_savefile",
975 | help="Save merged blocklist to a local file.",
976 | )
977 | ap.add_argument(
978 | "-S",
979 | "--save-intermediate",
980 | dest="save_intermediate",
981 | action="store_true",
982 | help="Save intermediate blocklists we fetch to local files.",
983 | )
984 | ap.add_argument(
985 | "-D",
986 | "--savedir",
987 | dest="savedir",
988 | help="Directory path to save intermediate lists.",
989 | )
990 | ap.add_argument("-m", "--mergeplan", choices=["min", "max"], help="Set mergeplan.")
991 | ap.add_argument(
992 | "-b",
993 | "--block-audit-file",
994 | dest="blocklist_auditfile",
995 | help="Save blocklist auditfile to this location.",
996 | )
997 | ap.add_argument("--merge-threshold", type=int, help="Merge threshold value")
998 | ap.add_argument(
999 | "--merge-threshold-type",
1000 | choices=["count", "pct"],
1001 | help="Type of merge threshold to use.",
1002 | )
1003 | ap.add_argument(
1004 | "--override-private-comment",
1005 | dest="override_private_comment",
1006 | help="Override private_comment with this string for new blocks when pushing blocklists.", # noqa
1007 | )
1008 |
1009 | ap.add_argument(
1010 | "-I",
1011 | "--import-field",
1012 | dest="import_fields",
1013 | action="append",
1014 | help="Extra blocklist fields to import.",
1015 | )
1016 | ap.add_argument(
1017 | "-E",
1018 | "--export-field",
1019 | dest="export_fields",
1020 | action="append",
1021 | help="Extra blocklist fields to export.",
1022 | )
1023 | ap.add_argument(
1024 | "-A",
1025 | "--allow",
1026 | dest="allow_domains",
1027 | action="append",
1028 | default=[],
1029 | help="Override any blocks to allow this domain.",
1030 | )
1031 |
1032 | ap.add_argument(
1033 | "--no-fetch-url",
1034 | dest="no_fetch_url",
1035 | action="store_true",
1036 | help="Don't fetch from URLs, even if configured.",
1037 | )
1038 | ap.add_argument(
1039 | "--no-fetch-instance",
1040 | dest="no_fetch_instance",
1041 | action="store_true",
1042 | help="Don't fetch from instances, even if configured.",
1043 | )
1044 | ap.add_argument(
1045 | "--no-push-instance",
1046 | dest="no_push_instance",
1047 | action="store_true",
1048 | help="Don't push to instances, even if configured.",
1049 | )
1050 |
1051 | ap.add_argument(
1052 | "--loglevel",
1053 | choices=["debug", "info", "warning", "error", "critical"],
1054 | help="Set log output level.",
1055 | )
1056 | ap.add_argument(
1057 | "--dryrun",
1058 | action="store_true",
1059 | help="Don't actually push updates, just show what would happen.",
1060 | )
1061 |
1062 | return ap
1063 |
1064 |
1065 | def main():
1066 |
1067 | ap = setup_argparse()
1068 | args = ap.parse_args()
1069 |
1070 | if args.loglevel is not None:
1071 | levelname = args.loglevel.upper()
1072 | log.setLevel(getattr(logging, levelname))
1073 |
1074 | if args.version:
1075 | print(f"v{__version__}")
1076 | sys.exit(0)
1077 |
1078 | # Load the configuration file
1079 | args = augment_args(args)
1080 |
1081 | # Do the work of syncing
1082 | sync_blocklists(args)
1083 |
--------------------------------------------------------------------------------
/src/fediblockhole/blocklists.py:
--------------------------------------------------------------------------------
1 | """Parse various blocklist data formats
2 | """
3 |
4 | from __future__ import annotations
5 |
6 | import csv
7 | import json
8 | import logging
9 | from dataclasses import dataclass, field
10 | from typing import Iterable
11 |
12 | from .const import BlockAudit, BlockSeverity, DomainBlock
13 |
14 | log = logging.getLogger("fediblockhole")
15 |
16 |
17 | @dataclass
18 | class Blocklist:
19 | """A Blocklist object
20 |
21 | A Blocklist is a list of DomainBlocks from an origin
22 | """
23 |
24 | origin: str = None
25 | blocks: dict[str, DomainBlock] = field(default_factory=dict)
26 |
27 | def __len__(self):
28 | return len(self.blocks)
29 |
30 | def __class_getitem__(cls, item):
31 | return dict[str, DomainBlock]
32 |
33 | def __getitem__(self, item):
34 | return self.blocks[item]
35 |
36 | def __iter__(self):
37 | return self.blocks.__iter__()
38 |
39 | def items(self):
40 | return self.blocks.items()
41 |
42 | def values(self):
43 | return self.blocks.values()
44 |
45 |
46 | @dataclass
47 | class BlockAuditList:
48 | """A BlockAuditlist object
49 |
50 | A BlockAuditlist is a list of BlockAudits from an origin
51 | """
52 |
53 | origin: str = None
54 | blocks: dict[str, BlockAudit] = field(default_factory=dict)
55 |
56 | def __len__(self):
57 | return len(self.blocks)
58 |
59 | def __class_getitem__(cls, item):
60 | return dict[str, BlockAudit]
61 |
62 | def __getitem__(self, item):
63 | return self.blocks[item]
64 |
65 | def __iter__(self):
66 | return self.blocks.__iter__()
67 |
68 | def items(self):
69 | return self.blocks.items()
70 |
71 | def values(self):
72 | return self.blocks.values()
73 |
74 |
75 | class BlocklistParser(object):
76 | """
77 | Base class for parsing blocklists
78 | """
79 |
80 | do_preparse = False
81 |
82 | def __init__(
83 | self,
84 | import_fields: list = ["domain", "severity"],
85 | max_severity: str = "suspend",
86 | ):
87 | """Create a Parser
88 |
89 | @param import_fields: an optional list of fields to limit the parser to.
90 | Ignore any fields in a block item that aren't in import_fields.
91 | """
92 | self.import_fields = import_fields
93 | self.max_severity = BlockSeverity(max_severity)
94 |
95 | def preparse(self, blockdata) -> Iterable:
96 | """Some raw datatypes need to be converted into an iterable"""
97 | raise NotImplementedError
98 |
99 | def parse_blocklist(self, blockdata, origin: str = None) -> Blocklist:
100 | """Parse an iterable of blocklist items
101 | @param blocklist: An Iterable of blocklist items
102 | @returns: A dict of DomainBlocks, keyed by domain
103 | """
104 | if self.do_preparse:
105 | blockdata = self.preparse(blockdata)
106 |
107 | parsed_list = Blocklist(origin)
108 | for blockitem in blockdata:
109 | block = self.parse_item(blockitem)
110 | parsed_list.blocks[block.domain] = block
111 | return parsed_list
112 |
113 | def parse_item(self, blockitem) -> DomainBlock:
114 | """Parse an individual block item
115 |
116 | @param blockitem: an individual block to be parsed
117 | @param import_fields: fields of a block we will import
118 | """
119 | raise NotImplementedError
120 |
121 |
122 | class BlocklistParserJSON(BlocklistParser):
123 | """Parse a JSON formatted blocklist"""
124 |
125 | do_preparse = True
126 |
127 | def preparse(self, blockdata) -> Iterable:
128 | """Parse the blockdata as JSON if needed"""
129 | if type(blockdata) is type(""):
130 | return json.loads(blockdata)
131 | return blockdata
132 |
133 | def parse_item(self, blockitem: dict) -> DomainBlock:
134 | # Remove fields we don't want to import
135 | origitem = blockitem.copy()
136 | for key in origitem:
137 | if key not in self.import_fields:
138 | del blockitem[key]
139 |
140 | # Convert dict to NamedTuple with the double-star operator
141 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa
142 | block = DomainBlock(**blockitem)
143 | if block.severity > self.max_severity:
144 | block.severity = self.max_severity
145 | return block
146 |
147 |
148 | class BlocklistParserMastodonAPIPublic(BlocklistParserJSON):
149 | """The public blocklist API is slightly different to the admin one"""
150 |
151 | def parse_item(self, blockitem: dict) -> DomainBlock:
152 | # Remove fields we don't want to import
153 | origitem = blockitem.copy()
154 | for key in origitem:
155 | # The Mastodon public API uses the 'public' field
156 | # to mean 'public_comment' because what even is consistency?
157 | if key == "comment":
158 | key = "public_comment"
159 | blockitem["public_comment"] = blockitem["comment"]
160 | del blockitem["comment"]
161 | if key not in self.import_fields:
162 | del blockitem[key]
163 |
164 | # Convert dict to NamedTuple with the double-star operator
165 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa
166 | block = DomainBlock(**blockitem)
167 | if block.severity > self.max_severity:
168 | block.severity = self.max_severity
169 | return block
170 |
171 |
172 | class BlocklistParserCSV(BlocklistParser):
173 | """Parse CSV formatted blocklists
174 |
175 | The parser expects the CSV data to include a header with the field names.
176 | """
177 |
178 | do_preparse = True
179 |
180 | def preparse(self, blockdata) -> Iterable:
181 | """Use a csv.DictReader to create an iterable from the blockdata"""
182 | return csv.DictReader(blockdata.split("\n"))
183 |
184 | def parse_item(self, blockitem: dict) -> DomainBlock:
185 | # Coerce booleans from string to Python bool
186 | # FIXME: Is this still necessary with the DomainBlock object?
187 | for boolkey in ["reject_media", "reject_reports", "obfuscate"]:
188 | if boolkey in blockitem:
189 | blockitem[boolkey] = str2bool(blockitem[boolkey])
190 |
191 | # Remove fields we don't want to import
192 | origitem = blockitem.copy()
193 | for key in origitem:
194 | if key not in self.import_fields:
195 | log.debug(f"ignoring field '{key}'")
196 | del blockitem[key]
197 |
198 | # Convert dict to DomainBlock with the double-star operator
199 | # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments # noqa
200 | block = DomainBlock(**blockitem)
201 | if block.severity > self.max_severity:
202 | block.severity = self.max_severity
203 | return block
204 |
205 |
206 | class BlocklistParserMastodonCSV(BlocklistParserCSV):
207 | """Parse Mastodon CSV formatted blocklists
208 |
209 | The Mastodon v4.1.x domain block CSV export prefixes its
210 | field names with a '#' character because… reasons?
211 | """
212 |
213 | do_preparse = True
214 |
215 | def parse_item(self, blockitem: dict) -> DomainBlock:
216 | """Build a new blockitem dict with new un-#ed keys"""
217 | newdict = {}
218 | for key in blockitem:
219 | newkey = key.lstrip("#")
220 | newdict[newkey] = blockitem[key]
221 |
222 | return super().parse_item(newdict)
223 |
224 |
225 | class RapidBlockParserCSV(BlocklistParserCSV):
226 | """Parse RapidBlock CSV blocklists
227 |
228 | RapidBlock CSV blocklists are just a newline separated list of domains.
229 | """
230 |
231 | def preparse(self, blockdata) -> Iterable:
232 | """Prepend a 'domain' field header to the data"""
233 | log.debug(f"blockdata: {blockdata[:100]}")
234 | blockdata = "".join(["domain\r\n", blockdata])
235 |
236 | return csv.DictReader(blockdata.split("\r\n"))
237 |
238 |
239 | class RapidBlockParserJSON(BlocklistParserJSON):
240 | """Parse RapidBlock JSON formatted blocklists"""
241 |
242 | def preparse(self, blockdata) -> Iterable:
243 | rb_dict = json.loads(blockdata)
244 | # We want to iterate over all the dictionary items
245 | return rb_dict["blocks"].items()
246 |
247 | def parse_item(self, blockitem: tuple) -> DomainBlock:
248 | """Parse an individual item in a RapidBlock list"""
249 | # Each item is a tuple of:
250 | # (domain, {dictionary of attributes})
251 | domain = blockitem[0]
252 |
253 | # RapidBlock has a binary block level which we map
254 | # to 'suspend' if True, and 'noop' if False.
255 | isblocked = blockitem[1]["isBlocked"]
256 | if isblocked:
257 | severity = "suspend"
258 | else:
259 | severity = "noop"
260 |
261 | if "public_comment" in self.import_fields:
262 | public_comment = blockitem[1]["reason"]
263 | else:
264 | public_comment = ""
265 |
266 | # There's a 'tags' field as well, but we can't
267 | # do much with that in Mastodon yet
268 |
269 | block = DomainBlock(domain, severity, public_comment)
270 | if block.severity > self.max_severity:
271 | block.severity = self.max_severity
272 |
273 | return block
274 |
275 |
276 | def str2bool(boolstring: str) -> bool:
277 | """Helper function to convert boolean strings to actual Python bools"""
278 | boolstring = boolstring.lower()
279 | if boolstring in ["true", "t", "1", "y", "yes"]:
280 | return True
281 | elif boolstring in ["", "false", "f", "0", "n", "no"]:
282 | return False
283 | else:
284 | raise ValueError(f"Cannot parse value '{boolstring}' as boolean")
285 |
286 |
287 | FORMAT_PARSERS = {
288 | "csv": BlocklistParserCSV,
289 | "mastodon_csv": BlocklistParserMastodonCSV,
290 | "json": BlocklistParserJSON,
291 | "mastodon_api_public": BlocklistParserMastodonAPIPublic,
292 | "rapidblock.csv": RapidBlockParserCSV,
293 | "rapidblock.json": RapidBlockParserJSON,
294 | }
295 |
296 |
297 | # helper function to select the appropriate Parser
298 | def parse_blocklist(
299 | blockdata,
300 | origin,
301 | format="csv",
302 | import_fields: list = ["domain", "severity"],
303 | max_severity: str = "suspend",
304 | ):
305 | """Parse a blocklist in the given format"""
306 | log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...")
307 |
308 | parser = FORMAT_PARSERS[format](import_fields, max_severity)
309 | return parser.parse_blocklist(blockdata, origin)
310 |
--------------------------------------------------------------------------------
/src/fediblockhole/const.py:
--------------------------------------------------------------------------------
1 | """ Constant objects used by FediBlockHole
2 | """
3 |
4 | from __future__ import annotations
5 |
6 | import enum
7 | import logging
8 |
9 | log = logging.getLogger("fediblockhole")
10 |
11 |
12 | class SeverityLevel(enum.IntEnum):
13 | """How severe should a block be? Higher is more severe."""
14 |
15 | NONE = enum.auto()
16 | SILENCE = enum.auto()
17 | SUSPEND = enum.auto()
18 |
19 |
20 | class BlockSeverity(object):
21 | """A representation of a block severity
22 |
23 | We add some helpful functions rather than using a bare IntEnum
24 | """
25 |
26 | def __init__(self, severity: str = None):
27 | self._level = self.str2level(severity)
28 |
29 | @property
30 | def level(self):
31 | return self._level
32 |
33 | @level.setter
34 | def level(self, value):
35 | if isinstance(value, SeverityLevel):
36 | self._level = value
37 | elif type(value) is type(""):
38 | self._level = self.str2level(value)
39 | else:
40 | raise ValueError(f"Invalid level value '{value}'")
41 |
42 | def str2level(self, severity: str = None):
43 | """Convert a string severity level to an internal enum"""
44 |
45 | if severity in [None, "", "noop"]:
46 | return SeverityLevel.NONE
47 |
48 | elif severity in ["silence"]:
49 | return SeverityLevel.SILENCE
50 |
51 | elif severity in ["suspend"]:
52 | return SeverityLevel.SUSPEND
53 |
54 | else:
55 | raise ValueError(f"Invalid severity value '{severity}'")
56 |
57 | def __repr__(self):
58 | return f"'{str(self)}'"
59 |
60 | def __str__(self):
61 | """A string version of the severity level"""
62 | levelmap = {
63 | SeverityLevel.NONE: "noop",
64 | SeverityLevel.SILENCE: "silence",
65 | SeverityLevel.SUSPEND: "suspend",
66 | }
67 | return levelmap[self.level]
68 |
69 | def __lt__(self, other):
70 | if self._level < other._level:
71 | return True
72 |
73 | def __gt__(self, other):
74 | if self._level > other._level:
75 | return True
76 |
77 | def __eq__(self, other):
78 | if other is not None and self._level == other._level:
79 | return True
80 |
81 | def __le__(self, other):
82 | if self._level <= other._level:
83 | return True
84 |
85 | def __ge__(self, other):
86 | if self._level >= other._level:
87 | return True
88 |
89 |
90 | class BlockAudit(object):
91 |
92 | fields = [
93 | "domain",
94 | "count",
95 | "percent",
96 | ]
97 |
98 | all_fields = ["domain", "count", "percent", "id"]
99 |
100 | def __init__(self, domain: str, count: int = 0, percent: int = 0, id: int = None):
101 | """Initialize the BlockAudit"""
102 | self.domain = domain
103 | self.count = count
104 | self.percent = percent
105 | self.id = id
106 |
107 | def _asdict(self):
108 | """Return a dict version of this object"""
109 | dictval = {
110 | "domain": self.domain,
111 | "count": self.count,
112 | "percent": self.percent,
113 | }
114 | if self.id:
115 | dictval["id"] = self.id
116 |
117 | return dictval
118 |
119 | def __repr__(self):
120 |
121 | return f""
122 |
123 | def copy(self):
124 | """Make a copy of this object and return it"""
125 | retval = BlockAudit(**self._asdict())
126 | return retval
127 |
128 | def update(self, dict):
129 | """Update my kwargs"""
130 | for key in dict:
131 | setattr(self, key, dict[key])
132 |
133 | def __iter__(self):
134 | """Be iterable"""
135 | keys = self.fields
136 |
137 | if getattr(self, "id", False):
138 | keys.append("id")
139 |
140 | for k in keys:
141 | yield k
142 |
143 | def __getitem__(self, k, default=None):
144 | "Behave like a dict for getting values"
145 | if k not in self.all_fields:
146 | raise KeyError(f"Invalid key '{k}'")
147 |
148 | return getattr(self, k, default)
149 |
150 | def get(self, k, default=None):
151 | return self.__getitem__(k, default)
152 |
153 |
154 | # class _DomainBlock(NamedTuple):
155 | # domain: str # FIXME: Use an actual Domain object from somewhere?
156 | # severity: BlockSeverity = BlockSeverity.SUSPEND
157 | # public_comment: str = ''
158 | # private_comment: str = ''
159 | # reject_media: bool = False
160 | # reject_reports: bool = False
161 | # obfuscate: bool = False
162 |
163 |
164 | class DomainBlock(object):
165 |
166 | fields = [
167 | "domain",
168 | "severity",
169 | "public_comment",
170 | "private_comment",
171 | "reject_media",
172 | "reject_reports",
173 | "obfuscate",
174 | ]
175 |
176 | all_fields = [
177 | "domain",
178 | "severity",
179 | "public_comment",
180 | "private_comment",
181 | "reject_media",
182 | "reject_reports",
183 | "obfuscate",
184 | "id",
185 | ]
186 |
187 | def __init__(
188 | self,
189 | domain: str,
190 | severity: BlockSeverity = BlockSeverity("suspend"),
191 | public_comment: str = "",
192 | private_comment: str = "",
193 | reject_media: bool = False,
194 | reject_reports: bool = False,
195 | obfuscate: bool = False,
196 | id: int = None,
197 | ):
198 | """Initialize the DomainBlock"""
199 | self.domain = domain
200 | self.severity = severity
201 | self.public_comment = public_comment
202 | self.private_comment = private_comment
203 | self.reject_media = reject_media
204 | self.reject_reports = reject_reports
205 | self.obfuscate = obfuscate
206 | self.id = id
207 |
208 | @property
209 | def severity(self):
210 | return self._severity
211 |
212 | @severity.setter
213 | def severity(self, sev):
214 | if isinstance(sev, BlockSeverity):
215 | self._severity = sev
216 | else:
217 | self._severity = BlockSeverity(sev)
218 |
219 | def _asdict(self):
220 | """Return a dict version of this object"""
221 | dictval = {
222 | "domain": self.domain,
223 | "severity": str(self.severity),
224 | "public_comment": self.public_comment,
225 | "private_comment": self.private_comment,
226 | "reject_media": self.reject_media,
227 | "reject_reports": self.reject_reports,
228 | "obfuscate": self.obfuscate,
229 | }
230 | if self.id:
231 | dictval["id"] = self.id
232 |
233 | return dictval
234 |
235 | def compare_fields(self, other, fields=None) -> list:
236 | """Compare two DomainBlocks on specific fields.
237 | If all the fields are equal, the DomainBlocks are equal.
238 |
239 | @returns: a list of the fields that are different
240 | """
241 | if not isinstance(other, DomainBlock):
242 | raise ValueError(f"Cannot compare DomainBlock to {type(other)}:{other}")
243 |
244 | if fields is None:
245 | fields = self.fields
246 |
247 | diffs = []
248 | # Check if all the fields are equal
249 | for field in self.fields:
250 | if getattr(self, field) != getattr(other, field):
251 | diffs.append(field)
252 | return diffs
253 |
254 | def __eq__(self, other):
255 | diffs = self.compare_fields(other)
256 | if len(diffs) == 0:
257 | return True
258 |
259 | def __repr__(self):
260 |
261 | return f""
262 |
263 | def copy(self):
264 | """Make a copy of this object and return it"""
265 | retval = DomainBlock(**self._asdict())
266 | return retval
267 |
268 | def update(self, dict):
269 | """Update my kwargs"""
270 | for key in dict:
271 | setattr(self, key, dict[key])
272 |
273 | def __iter__(self):
274 | """Be iterable"""
275 | keys = self.fields
276 |
277 | if getattr(self, "id", False):
278 | keys.append("id")
279 |
280 | for k in keys:
281 | yield k
282 |
283 | def __getitem__(self, k, default=None):
284 | "Behave like a dict for getting values"
285 | if k not in self.all_fields:
286 | raise KeyError(f"Invalid key '{k}'")
287 |
288 | return getattr(self, k, default)
289 |
290 | def get(self, k, default=None):
291 | return self.__getitem__(k, default)
292 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import pytest
5 |
6 | sys.path.append(os.path.join(os.path.dirname(__file__), "helpers"))
7 |
8 |
9 | def load_data(datafile):
10 | """Load test data from a fixture datafile"""
11 | with open(os.path.join(os.path.dirname(__file__), "fixtures", datafile)) as fp:
12 | return fp.read()
13 |
14 |
15 | @pytest.fixture
16 | def data_mastodon_json():
17 | return load_data("data-mastodon.json")
18 |
19 |
20 | @pytest.fixture
21 | def data_rapidblock_json():
22 | return load_data("data-rapidblock.json")
23 |
24 |
25 | @pytest.fixture
26 | def data_suspends_01():
27 | return load_data("data-suspends-01.csv")
28 |
29 |
30 | @pytest.fixture
31 | def data_silences_01():
32 | return load_data("data-silences-01.csv")
33 |
34 |
35 | @pytest.fixture
36 | def data_noop_01():
37 | return load_data("data-noop-01.csv")
38 |
--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/fixtures/__init__.py
--------------------------------------------------------------------------------
/tests/fixtures/data-mastodon.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": "234",
4 | "domain": "example.org",
5 | "created_at": "2023-01-09T05:17:50.614Z",
6 | "severity": "suspend",
7 | "reject_media": true,
8 | "reject_reports": true,
9 | "private_comment": "A private comment",
10 | "public_comment": "A public comment",
11 | "obfuscate": true
12 | },
13 | {
14 | "id": "233",
15 | "domain": "example2.org",
16 | "created_at": "2023-01-09T05:09:01.859Z",
17 | "severity": "silence",
18 | "reject_media": true,
19 | "reject_reports": true,
20 | "private_comment": "Another private comment",
21 | "public_comment": "Another public comment",
22 | "obfuscate": true
23 | },
24 | {
25 | "id": "232",
26 | "domain": "example3.org",
27 | "created_at": "2023-01-09T05:08:58.833Z",
28 | "severity": "suspend",
29 | "reject_media": true,
30 | "reject_reports": true,
31 | "private_comment": "More comments? What is this?",
32 | "public_comment": "Yes we love to comment",
33 | "obfuscate": true
34 | },
35 | {
36 | "id": "231",
37 | "domain": "example4.org",
38 | "created_at": "2023-01-09T05:04:01.856Z",
39 | "severity": "noop",
40 | "reject_media": true,
41 | "reject_reports": true,
42 | "private_comment": "I cannot believe all the comments",
43 | "public_comment": "Look how many comments we can fit in here",
44 | "obfuscate": true
45 | },
46 | {
47 | "id": "230",
48 | "domain": "example5.org",
49 | "created_at": "2023-01-08T21:37:22.665Z",
50 | "severity": "suspend",
51 | "reject_media": false,
52 | "reject_reports": false,
53 | "private_comment": "",
54 | "public_comment": "lack of moderation",
55 | "obfuscate": false
56 | },
57 | {
58 | "id": "2308",
59 | "domain": "example6.org",
60 | "created_at": "2023-01-06T08:36:53.989Z",
61 | "severity": "suspend",
62 | "reject_media": false,
63 | "reject_reports": false,
64 | "private_comment": "",
65 | "public_comment": "anti-trans bigotry",
66 | "obfuscate": false
67 | },
68 | {
69 | "id": "2306",
70 | "domain": "example7.org",
71 | "created_at": "2023-01-04T08:14:05.381Z",
72 | "severity": "suspend",
73 | "reject_media": false,
74 | "reject_reports": false,
75 | "private_comment": "",
76 | "public_comment": "lack of moderation",
77 | "obfuscate": false
78 | },
79 | {
80 | "id": "2305",
81 | "domain": "example8.org",
82 | "created_at": "2023-01-04T08:13:48.891Z",
83 | "severity": "suspend",
84 | "reject_media": false,
85 | "reject_reports": false,
86 | "private_comment": "freeze peach",
87 | "public_comment": "lack of moderation, conspiracy weirdness",
88 | "obfuscate": false
89 | },
90 | {
91 | "id": "2301",
92 | "domain": "example9.org",
93 | "created_at": "2023-01-04T08:11:32.904Z",
94 | "severity": "silence",
95 | "reject_media": false,
96 | "reject_reports": false,
97 | "private_comment": "",
98 | "public_comment": "alt-right conspiracies",
99 | "obfuscate": false
100 | },
101 | {
102 | "id": "453",
103 | "domain": "example15.org",
104 | "created_at": "2022-12-05T08:26:59.920Z",
105 | "severity": "suspend",
106 | "reject_media": true,
107 | "reject_reports": true,
108 | "private_comment": "cryptocurrency",
109 | "public_comment": "cryptocurrency",
110 | "obfuscate": true
111 | }
112 | ]
113 |
--------------------------------------------------------------------------------
/tests/fixtures/data-noop-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
2 | "public-comment.example.org","noop","This is a public comment","This is a private comment",FALSE,FALSE,FALSE
3 | "private-comment.example.org","noop",,"This is a private comment",FALSE,FALSE,FALSE
4 | "diff-comment.example.org","noop","Noop public comment","Noop private comment",FALSE,FALSE,FALSE
5 | "2diff-comment.example.org","noop","Public duplicate","Private duplicate",FALSE,FALSE,FALSE
6 | "qoto.org","noop",,,FALSE,FALSE,FALSE
7 | "sealion.club","noop",,,FALSE,FALSE,FALSE
8 | "develop.gab.com","noop",,,FALSE,FALSE,FALSE
9 | "gab.ai","noop",,,FALSE,FALSE,FALSE
10 | "gab.sleeck.eu","noop",,,FALSE,FALSE,FALSE
11 | "gab.com","noop",,,FALSE,FALSE,FALSE
12 | "kiwifarms.is","noop",,,FALSE,FALSE,FALSE
13 | "kiwifarms.net","noop",,,FALSE,FALSE,FALSE
14 | "gabfed.com","noop",,,FALSE,FALSE,FALSE
15 |
--------------------------------------------------------------------------------
/tests/fixtures/data-silences-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
2 | "public-comment.example.org","silence","This is a public comment","This is a private comment",FALSE,FALSE,FALSE
3 | "private-comment.example.org","silence",,"This is a private comment",FALSE,FALSE,FALSE
4 | "diff-comment.example.org","silence","Silence public comment","Silence private comment",FALSE,FALSE,FALSE
5 | "2diff-comment.example.org","silence","Public duplicate","Private duplicate",FALSE,FALSE,FALSE
6 | "qoto.org","silence",,,FALSE,FALSE,FALSE
7 | "sealion.club","silence",,,FALSE,FALSE,FALSE
8 | "develop.gab.com","silence",,,FALSE,FALSE,FALSE
9 | "gab.ai","silence",,,FALSE,FALSE,FALSE
10 | "gab.sleeck.eu","silence",,,FALSE,FALSE,FALSE
11 | "gab.com","silence",,,FALSE,FALSE,FALSE
12 | "kiwifarms.is","silence",,,FALSE,FALSE,FALSE
13 | "kiwifarms.net","silence",,,FALSE,FALSE,FALSE
14 | "gabfed.com","silence",,,FALSE,FALSE,FALSE
15 |
--------------------------------------------------------------------------------
/tests/fixtures/data-suspends-01.csv:
--------------------------------------------------------------------------------
1 | "domain","severity","public_comment","private_comment","reject_media","reject_reports","obfuscate"
2 | "public-comment.example.org","suspend","This is a public comment","This is a private comment",TRUE,TRUE,TRUE
3 | "private-comment.example.org","suspend",,"This is a private comment",TRUE,TRUE,TRUE
4 | "diff-comment.example.org","suspend","Suspend public comment","Suspend private comment",TRUE,TRUE,TRUE
5 | "2diff-comment.example.org","suspend","Suspend comment 1","Suspend private 1",TRUE,TRUE,TRUE
6 | "qoto.org","suspend",,,TRUE,TRUE,TRUE
7 | "sealion.club","suspend",,,TRUE,TRUE,TRUE
8 | "develop.gab.com","suspend",,,TRUE,TRUE,TRUE
9 | "gab.ai","suspend",,,TRUE,TRUE,TRUE
10 | "gab.sleeck.eu","suspend",,,TRUE,TRUE,TRUE
11 | "gab.com","suspend",,,TRUE,TRUE,TRUE
12 | "kiwifarms.is","suspend",,,TRUE,TRUE,TRUE
13 | "kiwifarms.net","suspend",,,TRUE,TRUE,TRUE
14 | "gabfed.com","suspend",,,TRUE,TRUE,TRUE
15 |
--------------------------------------------------------------------------------
/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eigenmagic/fediblockhole/ba40084772a565f36af1290070e6c9bba14fb9e7/tests/helpers/__init__.py
--------------------------------------------------------------------------------
/tests/helpers/util.py:
--------------------------------------------------------------------------------
1 | """ Utility functions for tests
2 | """
3 | from fediblockhole import setup_argparse, augment_args
4 |
5 |
6 | def shim_argparse(testargv: list = [], tomldata: str = None):
7 | """Helper function to parse test args
8 | """
9 | ap = setup_argparse()
10 | args = ap.parse_args(testargv)
11 | if tomldata is not None:
12 | args = augment_args(args, tomldata)
13 | return args
14 |
--------------------------------------------------------------------------------
/tests/test_allowlist.py:
--------------------------------------------------------------------------------
1 | """ Test allowlists
2 | """
3 |
4 | import pytest
5 | from util import shim_argparse
6 |
7 | from fediblockhole import apply_allowlists
8 | from fediblockhole.blocklists import Blocklist
9 | from fediblockhole.const import DomainBlock
10 |
11 |
12 | def test_cmdline_allow_removes_domain():
13 | """Test that -A removes entries from merged"""
14 | conf = shim_argparse(["-A", "removeme.org"])
15 |
16 | merged = Blocklist(
17 | "test_allowlist.merged",
18 | {
19 | "example.org": DomainBlock("example.org"),
20 | "example2.org": DomainBlock("example2.org"),
21 | "removeme.org": DomainBlock("removeme.org"),
22 | "keepblockingme.org": DomainBlock("keepblockingme.org"),
23 | },
24 | )
25 |
26 | merged = apply_allowlists(merged, conf, {})
27 |
28 | with pytest.raises(KeyError):
29 | merged["removeme.org"]
30 |
31 |
32 | def test_allowlist_removes_domain():
33 | """Test that an item in an allowlist removes entries from merged"""
34 | conf = shim_argparse()
35 |
36 | merged = Blocklist(
37 | "test_allowlist.merged",
38 | {
39 | "example.org": DomainBlock("example.org"),
40 | "example2.org": DomainBlock("example2.org"),
41 | "removeme.org": DomainBlock("removeme.org"),
42 | "keepblockingme.org": DomainBlock("keepblockingme.org"),
43 | },
44 | )
45 |
46 | allowlists = [
47 | Blocklist(
48 | "test_allowlist",
49 | {
50 | "removeme.org": DomainBlock("removeme.org", "noop"),
51 | },
52 | )
53 | ]
54 |
55 | merged = apply_allowlists(merged, conf, allowlists)
56 |
57 | with pytest.raises(KeyError):
58 | merged["removeme.org"]
59 |
60 |
61 | def test_allowlist_removes_tld():
62 | """Test that an item in an allowlist removes entries from merged"""
63 | conf = shim_argparse()
64 |
65 | merged = Blocklist(
66 | "test_allowlist.merged",
67 | {
68 | ".cf": DomainBlock(".cf"),
69 | "example.org": DomainBlock("example.org"),
70 | ".tk": DomainBlock(".tk"),
71 | "keepblockingme.org": DomainBlock("keepblockingme.org"),
72 | },
73 | )
74 |
75 | allowlists = [
76 | Blocklist(
77 | "test_allowlist.list1",
78 | {
79 | ".cf": DomainBlock(".cf", "noop"),
80 | ".tk": DomainBlock(".tk", "noop"),
81 | },
82 | )
83 | ]
84 |
85 | merged = apply_allowlists(merged, conf, allowlists)
86 |
87 | with pytest.raises(KeyError):
88 | merged[".cf"]
89 |
90 | with pytest.raises(KeyError):
91 | merged[".tk"]
92 |
--------------------------------------------------------------------------------
/tests/test_blockseverity.py:
--------------------------------------------------------------------------------
1 | from fediblockhole.const import BlockSeverity
2 |
3 |
4 | def test_severity_eq():
5 |
6 | s1 = BlockSeverity("suspend")
7 | s2 = BlockSeverity("suspend")
8 |
9 | assert s1 == s2
10 |
11 | s3 = BlockSeverity("silence")
12 | s4 = BlockSeverity("silence")
13 |
14 | assert s3 == s4
15 |
16 | s5 = BlockSeverity("noop")
17 | s6 = BlockSeverity("noop")
18 |
19 | assert s5 == s6
20 |
21 |
22 | def test_severity_ne():
23 | s1 = BlockSeverity("noop")
24 | s2 = BlockSeverity("silence")
25 | s3 = BlockSeverity("suspend")
26 |
27 | assert s1 != s2
28 | assert s2 != s3
29 | assert s1 != s3
30 |
31 |
32 | def test_severity_lt():
33 | s1 = BlockSeverity("noop")
34 | s2 = BlockSeverity("silence")
35 | s3 = BlockSeverity("suspend")
36 |
37 | assert s1 < s2
38 | assert s2 < s3
39 | assert s1 < s3
40 |
41 |
42 | def test_severity_gt():
43 | s1 = BlockSeverity("noop")
44 | s2 = BlockSeverity("silence")
45 | s3 = BlockSeverity("suspend")
46 |
47 | assert s2 > s1
48 | assert s3 > s2
49 | assert s3 > s1
50 |
51 |
52 | def test_severity_le():
53 | s1 = BlockSeverity("noop")
54 | s2 = BlockSeverity("silence")
55 | s2a = BlockSeverity("silence")
56 | s3 = BlockSeverity("suspend")
57 |
58 | assert s1 <= s2
59 | assert s2a <= s2
60 | assert s2 <= s3
61 | assert s1 <= s3
62 |
63 |
64 | def test_severity_ge():
65 | s1 = BlockSeverity("noop")
66 | s2 = BlockSeverity("silence")
67 | s2a = BlockSeverity("silence")
68 | s3 = BlockSeverity("suspend")
69 |
70 | assert s2 >= s1
71 | assert s2a >= s1
72 | assert s3 >= s2
73 | assert s3 >= s1
74 |
--------------------------------------------------------------------------------
/tests/test_cmdline.py:
--------------------------------------------------------------------------------
1 | """Test the commandline defined parameters correctly
2 | """
3 |
4 | from fediblockhole import setup_argparse
5 |
6 |
7 | def test_cmdline_no_configfile():
8 | """Test bare command with no configfile"""
9 | ap = setup_argparse()
10 | args = ap.parse_args([])
11 |
12 | assert args.config == "/etc/default/fediblockhole.conf.toml"
13 | assert args.mergeplan is None
14 | assert args.blocklist_savefile is None
15 | assert args.save_intermediate is False
16 | assert args.savedir is None
17 | assert args.import_fields is None
18 | assert args.export_fields is None
19 |
20 | assert args.no_fetch_url is False
21 | assert args.no_fetch_instance is False
22 | assert args.no_push_instance is False
23 | assert args.dryrun is False
24 |
25 | assert args.loglevel is None
26 |
27 |
28 | def test_cmdline_mergeplan_min():
29 | """Test setting mergeplan min"""
30 | ap = setup_argparse()
31 | args = ap.parse_args(["-m", "min"])
32 |
33 | assert args.mergeplan == "min"
34 |
35 |
36 | def test_set_allow_domain():
37 | """Set a single allow domain on commandline"""
38 | ap = setup_argparse()
39 | args = ap.parse_args(["-A", "example.org"])
40 |
41 | assert args.allow_domains == ["example.org"]
42 |
43 |
44 | def test_set_multiple_allow_domains():
45 | """Set multiple allow domains on commandline"""
46 | ap = setup_argparse()
47 | args = ap.parse_args(
48 | ["-A", "example.org", "-A", "example2.org", "-A", "example3.org"]
49 | )
50 |
51 | assert args.allow_domains == ["example.org", "example2.org", "example3.org"]
52 |
--------------------------------------------------------------------------------
/tests/test_configfile.py:
--------------------------------------------------------------------------------
1 | """Test the config file is loading parameters correctly
2 | """
3 |
4 | from textwrap import dedent
5 |
6 | from util import shim_argparse
7 |
8 | from fediblockhole import augment_args, setup_argparse
9 |
10 |
11 | def test_parse_tomldata():
12 | tomldata = """
13 | # Test TOML config for FediBlockHole
14 |
15 | blocklist_instance_sources = []
16 |
17 | blocklist_url_sources = []
18 |
19 | save_intermediate = true
20 |
21 | import_fields = ['public_comment']
22 | """
23 | ap = setup_argparse()
24 | args = ap.parse_args([])
25 | args = augment_args(args, tomldata)
26 |
27 | assert args.blocklist_instance_sources == []
28 | assert args.blocklist_url_sources == []
29 | assert args.save_intermediate is True
30 | assert args.import_fields == ["public_comment"]
31 |
32 |
33 | def test_set_mergeplan_max():
34 | tomldata = """mergeplan = 'max'
35 | """
36 | args = shim_argparse([], tomldata)
37 |
38 | assert args.mergeplan == "max"
39 |
40 |
41 | def test_set_mergeplan_min():
42 | tomldata = """mergeplan = 'min'
43 | """
44 | args = shim_argparse([], tomldata)
45 |
46 | assert args.mergeplan == "min"
47 |
48 |
49 | def test_set_allowlists():
50 | tomldata = """# Comment on config
51 | allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ]
52 | """
53 | args = shim_argparse([], tomldata)
54 |
55 | assert args.mergeplan == "max"
56 | assert args.allowlist_url_sources == [
57 | {
58 | "url": "file:///path/to/allowlist",
59 | "format": "csv",
60 | }
61 | ]
62 |
63 |
64 | def test_set_merge_thresold_default():
65 | tomldata = """
66 | """
67 | args = shim_argparse([], tomldata)
68 |
69 | assert args.mergeplan == "max"
70 | assert args.merge_threshold_type == "count"
71 |
72 |
73 | def test_set_merge_thresold_count():
74 | tomldata = """# Add a merge threshold
75 | merge_threshold_type = 'count'
76 | merge_threshold = 2
77 | """
78 | args = shim_argparse([], tomldata)
79 |
80 | assert args.mergeplan == "max"
81 | assert args.merge_threshold_type == "count"
82 | assert args.merge_threshold == 2
83 |
84 |
85 | def test_set_merge_thresold_pct():
86 | tomldata = """# Add a merge threshold
87 | merge_threshold_type = 'pct'
88 | merge_threshold = 35
89 | """
90 | args = shim_argparse([], tomldata)
91 |
92 | assert args.mergeplan == "max"
93 | assert args.merge_threshold_type == "pct"
94 | assert args.merge_threshold == 35
95 |
96 |
97 | def test_destination_token_from_environment(monkeypatch):
98 | tomldata = dedent(
99 | """\
100 | blocklist_instance_destinations = [
101 | { domain='example.com', token='raw-token'},
102 | { domain='example2.com', token_env_var='TOKEN_ENV_VAR' },
103 | { domain='env-token.com' },
104 | { domain='www.env-token.com' },
105 | ]
106 | """
107 | )
108 |
109 | monkeypatch.setenv("TOKEN_ENV_VAR", "env-token")
110 | monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token")
111 | monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token")
112 |
113 | args = shim_argparse([], tomldata)
114 |
115 | assert args.blocklist_instance_destinations[0]["token"] == "raw-token"
116 | assert args.blocklist_instance_destinations[1]["token"] == "env-token"
117 | assert args.blocklist_instance_destinations[2]["token"] == "env-token"
118 | assert args.blocklist_instance_destinations[3]["token"] == "www-env-token"
119 |
120 |
121 | def test_instance_sources_token_from_environment(monkeypatch):
122 | tomldata = dedent(
123 | """\
124 | blocklist_instance_sources = [
125 | { domain='example.com', token='raw-token'},
126 | { domain='example2.com', token_env_var='TOKEN_ENV_VAR' },
127 | { domain='env-token.com' },
128 | { domain='www.env-token.com' },
129 | ]
130 | """
131 | )
132 |
133 | monkeypatch.setenv("TOKEN_ENV_VAR", "env-token")
134 | monkeypatch.setenv("ENV-TOKEN_COM_TOKEN", "env-token")
135 | monkeypatch.setenv("WWW_ENV-TOKEN_COM_TOKEN", "www-env-token")
136 |
137 | args = shim_argparse([], tomldata)
138 |
139 | assert args.blocklist_instance_sources[0]["token"] == "raw-token"
140 | assert args.blocklist_instance_sources[1]["token"] == "env-token"
141 | assert args.blocklist_instance_sources[2]["token"] == "env-token"
142 | assert args.blocklist_instance_sources[3]["token"] == "www-env-token"
143 |
--------------------------------------------------------------------------------
/tests/test_domainblock.py:
--------------------------------------------------------------------------------
1 | """Test the DomainBlock structure
2 | """
3 |
4 | import pytest
5 |
6 | from fediblockhole.const import BlockSeverity, DomainBlock, SeverityLevel
7 |
8 |
9 | def test_blocksev_blankstring():
10 | a = BlockSeverity("")
11 | assert a.level == SeverityLevel.NONE
12 |
13 |
14 | def test_blocksev_string_noop():
15 | a = BlockSeverity("noop")
16 | assert a.level == SeverityLevel.NONE
17 |
18 |
19 | def test_blocksev_none():
20 | a = BlockSeverity(None)
21 | assert a.level == SeverityLevel.NONE
22 |
23 |
24 | def test_empty_domainblock_fails():
25 | with pytest.raises(TypeError):
26 | a = DomainBlock() # noqa
27 |
28 |
29 | def test_default_suspend():
30 | a = DomainBlock("example.org")
31 | assert a.domain == "example.org"
32 | assert a.severity.level == SeverityLevel.SUSPEND
33 |
34 |
35 | def test_severity_suspend():
36 | a = DomainBlock("example.org", "suspend")
37 | assert a.domain == "example.org"
38 | assert a.severity.level == SeverityLevel.SUSPEND
39 |
40 |
41 | def test_severity_silence():
42 | a = DomainBlock("example.org", "silence")
43 | assert a.domain == "example.org"
44 | assert a.severity.level == SeverityLevel.SILENCE
45 |
46 |
47 | def test_severity_noop_string():
48 | a = DomainBlock("example.org", "noop")
49 | assert a.domain == "example.org"
50 | assert a.severity.level == SeverityLevel.NONE
51 |
52 |
53 | def test_severity_none():
54 | a = DomainBlock("example.org", None)
55 | assert a.domain == "example.org"
56 | assert a.severity.level == SeverityLevel.NONE
57 |
58 |
59 | def test_compare_equal_blocks():
60 |
61 | a = DomainBlock("example1.org", "suspend")
62 | b = DomainBlock("example1.org", "suspend")
63 |
64 | assert a == b
65 |
66 |
67 | def test_compare_diff_domains():
68 |
69 | a = DomainBlock("example1.org", "suspend")
70 | b = DomainBlock("example2.org", "suspend")
71 |
72 | assert a != b
73 |
74 |
75 | def test_compare_diff_sevs():
76 |
77 | a = DomainBlock("example1.org", "suspend")
78 | b = DomainBlock("example1.org", "silence")
79 |
80 | assert a != b
81 |
82 |
83 | def test_compare_diff_sevs_2():
84 |
85 | a = DomainBlock("example1.org", "suspend")
86 | b = DomainBlock("example1.org", "noop")
87 |
88 | assert a != b
89 |
--------------------------------------------------------------------------------
/tests/test_merge_comments.py:
--------------------------------------------------------------------------------
1 | """ Test merging of comments
2 | """
3 |
4 | from fediblockhole import merge_comments
5 |
6 |
7 | def test_merge_blank_comments():
8 |
9 | oldcomment = ""
10 | newcomment = ""
11 |
12 | merged_comment = merge_comments(oldcomment, newcomment)
13 |
14 | assert merged_comment == ""
15 |
16 |
17 | def test_merge_None_comments():
18 |
19 | oldcomment = None
20 | newcomment = None
21 |
22 | merged_comment = merge_comments(oldcomment, newcomment)
23 |
24 | assert merged_comment == ""
25 |
26 |
27 | def test_merge_oldstr_newNone():
28 |
29 | oldcomment = "fred, bibble"
30 | newcomment = None
31 |
32 | merged_comment = merge_comments(oldcomment, newcomment)
33 |
34 | assert merged_comment == "fred, bibble"
35 |
36 |
37 | def test_merge_oldempty_newcomment():
38 |
39 | oldcomment = ""
40 | newcomment = "fred, bibble"
41 |
42 | merged_comment = merge_comments(oldcomment, newcomment)
43 |
44 | assert merged_comment == "fred, bibble"
45 |
46 |
47 | def test_merge_oldNone_newcomment():
48 |
49 | oldcomment = None
50 | newcomment = "fred, bibble"
51 |
52 | merged_comment = merge_comments(oldcomment, newcomment)
53 |
54 | assert merged_comment == "fred, bibble"
55 |
56 |
57 | def test_merge_two_different():
58 |
59 | oldcomment = "happy, medium, spinning"
60 | newcomment = "fred, bibble"
61 |
62 | merged_comment = merge_comments(oldcomment, newcomment)
63 |
64 | assert merged_comment == "happy, medium, spinning, fred, bibble"
65 |
66 |
67 | def test_merge_overlaps():
68 |
69 | oldcomment = "happy, medium, spinning"
70 | newcomment = "fred, medium, bibble, spinning"
71 |
72 | merged_comment = merge_comments(oldcomment, newcomment)
73 |
74 | assert merged_comment == "happy, medium, spinning, fred, bibble"
75 |
--------------------------------------------------------------------------------
/tests/test_merge_thresholds.py:
--------------------------------------------------------------------------------
1 | """Test merge with thresholds
2 | """
3 |
4 | from fediblockhole import merge_blocklists
5 | from fediblockhole.blocklists import Blocklist, parse_blocklist
6 | from fediblockhole.const import DomainBlock
7 |
8 | import_fields = [
9 | "domain",
10 | "severity",
11 | "public_comment",
12 | "private_comment",
13 | "reject_media",
14 | "reject_reports",
15 | "obfuscate",
16 | ]
17 |
18 |
19 | # FIXME: Deprecated data loader. Now using fixtures.
20 | def __load_test_blocklist_data(datafiles):
21 |
22 | blocklists = []
23 |
24 | for df in datafiles:
25 | with open(df) as fp:
26 | data = fp.read()
27 | bl = parse_blocklist(data, df, "csv", import_fields)
28 | blocklists.append(bl)
29 |
30 | return blocklists
31 |
32 |
33 | def test_mergeplan_count_2():
34 | """Only merge a block if present in 2 or more lists"""
35 |
36 | bl_1 = Blocklist(
37 | "test01",
38 | {
39 | "onemention.example.org": DomainBlock(
40 | "onemention.example.org", "suspend", "", "", True, True, True
41 | ),
42 | "twomention.example.org": DomainBlock(
43 | "twomention.example.org", "suspend", "", "", True, True, True
44 | ),
45 | "threemention.example.org": DomainBlock(
46 | "threemention.example.org", "suspend", "", "", True, True, True
47 | ),
48 | },
49 | )
50 |
51 | bl_2 = Blocklist(
52 | "test2",
53 | {
54 | "twomention.example.org": DomainBlock(
55 | "twomention.example.org", "suspend", "", "", True, True, True
56 | ),
57 | "threemention.example.org": DomainBlock(
58 | "threemention.example.org", "suspend", "", "", True, True, True
59 | ),
60 | },
61 | )
62 |
63 | bl_3 = Blocklist(
64 | "test3",
65 | {
66 | "threemention.example.org": DomainBlock( # noqa
67 | "threemention.example.org", "suspend", "", "", True, True, True
68 | ),
69 | "threemention.example.org": DomainBlock( # noqa
70 | "threemention.example.org", "suspend", "", "", True, True, True
71 | ),
72 | },
73 | )
74 |
75 | ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=2)
76 |
77 | assert "onemention.example.org" not in ml
78 | assert "twomention.example.org" in ml
79 | assert "threemention.example.org" in ml
80 |
81 |
82 | def test_mergeplan_count_3():
83 | """Only merge a block if present in 3 or more lists"""
84 |
85 | bl_1 = Blocklist(
86 | "test01",
87 | {
88 | "onemention.example.org": DomainBlock(
89 | "onemention.example.org", "suspend", "", "", True, True, True
90 | ),
91 | "twomention.example.org": DomainBlock(
92 | "twomention.example.org", "suspend", "", "", True, True, True
93 | ),
94 | "threemention.example.org": DomainBlock(
95 | "threemention.example.org", "suspend", "", "", True, True, True
96 | ),
97 | },
98 | )
99 |
100 | bl_2 = Blocklist(
101 | "test2",
102 | {
103 | "twomention.example.org": DomainBlock(
104 | "twomention.example.org", "suspend", "", "", True, True, True
105 | ),
106 | "threemention.example.org": DomainBlock(
107 | "threemention.example.org", "suspend", "", "", True, True, True
108 | ),
109 | },
110 | )
111 |
112 | bl_3 = Blocklist(
113 | "test3",
114 | {
115 | "threemention.example.org": DomainBlock( # noqa
116 | "threemention.example.org", "suspend", "", "", True, True, True
117 | ),
118 | "threemention.example.org": DomainBlock( # noqa
119 | "threemention.example.org", "suspend", "", "", True, True, True
120 | ),
121 | },
122 | )
123 |
124 | ml = merge_blocklists([bl_1, bl_2, bl_3], "max", threshold=3)
125 |
126 | assert "onemention.example.org" not in ml
127 | assert "twomention.example.org" not in ml
128 | assert "threemention.example.org" in ml
129 |
130 |
131 | def test_mergeplan_pct_30():
132 | """Only merge a block if present in 2 or more lists"""
133 |
134 | bl_1 = Blocklist(
135 | "test01",
136 | {
137 | "onemention.example.org": DomainBlock(
138 | "onemention.example.org", "suspend", "", "", True, True, True
139 | ),
140 | "twomention.example.org": DomainBlock(
141 | "twomention.example.org", "suspend", "", "", True, True, True
142 | ),
143 | "fourmention.example.org": DomainBlock(
144 | "fourmention.example.org", "suspend", "", "", True, True, True
145 | ),
146 | },
147 | )
148 |
149 | bl_2 = Blocklist(
150 | "test2",
151 | {
152 | "twomention.example.org": DomainBlock(
153 | "twomention.example.org", "suspend", "", "", True, True, True
154 | ),
155 | "threemention.example.org": DomainBlock(
156 | "threemention.example.org", "suspend", "", "", True, True, True
157 | ),
158 | "fourmention.example.org": DomainBlock(
159 | "fourmention.example.org", "suspend", "", "", True, True, True
160 | ),
161 | },
162 | )
163 |
164 | bl_3 = Blocklist(
165 | "test3",
166 | {
167 | "threemention.example.org": DomainBlock(
168 | "threemention.example.org", "suspend", "", "", True, True, True
169 | ),
170 | "fourmention.example.org": DomainBlock(
171 | "fourmention.example.org", "suspend", "", "", True, True, True
172 | ),
173 | },
174 | )
175 |
176 | bl_4 = Blocklist(
177 | "test4",
178 | {
179 | "threemention.example.org": DomainBlock(
180 | "threemention.example.org", "suspend", "", "", True, True, True
181 | ),
182 | "fourmention.example.org": DomainBlock(
183 | "fourmention.example.org", "suspend", "", "", True, True, True
184 | ),
185 | },
186 | )
187 |
188 | ml = merge_blocklists(
189 | [bl_1, bl_2, bl_3, bl_4], "max", threshold=30, threshold_type="pct"
190 | )
191 |
192 | assert "onemention.example.org" not in ml
193 | assert "twomention.example.org" in ml
194 | assert "threemention.example.org" in ml
195 | assert "fourmention.example.org" in ml
196 |
197 |
198 | def test_mergeplan_pct_55():
199 | """Only merge a block if present in 2 or more lists"""
200 |
201 | bl_1 = Blocklist(
202 | "test01",
203 | {
204 | "onemention.example.org": DomainBlock(
205 | "onemention.example.org", "suspend", "", "", True, True, True
206 | ),
207 | "twomention.example.org": DomainBlock(
208 | "twomention.example.org", "suspend", "", "", True, True, True
209 | ),
210 | "fourmention.example.org": DomainBlock(
211 | "fourmention.example.org", "suspend", "", "", True, True, True
212 | ),
213 | },
214 | )
215 |
216 | bl_2 = Blocklist(
217 | "test2",
218 | {
219 | "twomention.example.org": DomainBlock(
220 | "twomention.example.org", "suspend", "", "", True, True, True
221 | ),
222 | "threemention.example.org": DomainBlock(
223 | "threemention.example.org", "suspend", "", "", True, True, True
224 | ),
225 | "fourmention.example.org": DomainBlock(
226 | "fourmention.example.org", "suspend", "", "", True, True, True
227 | ),
228 | },
229 | )
230 |
231 | bl_3 = Blocklist(
232 | "test3",
233 | {
234 | "threemention.example.org": DomainBlock(
235 | "threemention.example.org", "suspend", "", "", True, True, True
236 | ),
237 | "fourmention.example.org": DomainBlock(
238 | "fourmention.example.org", "suspend", "", "", True, True, True
239 | ),
240 | },
241 | )
242 |
243 | bl_4 = Blocklist(
244 | "test4",
245 | {
246 | "threemention.example.org": DomainBlock(
247 | "threemention.example.org", "suspend", "", "", True, True, True
248 | ),
249 | "fourmention.example.org": DomainBlock(
250 | "fourmention.example.org", "suspend", "", "", True, True, True
251 | ),
252 | },
253 | )
254 |
255 | ml = merge_blocklists(
256 | [bl_1, bl_2, bl_3, bl_4], "max", threshold=55, threshold_type="pct"
257 | )
258 |
259 | assert "onemention.example.org" not in ml
260 | assert "twomention.example.org" not in ml
261 | assert "threemention.example.org" in ml
262 | assert "fourmention.example.org" in ml
263 |
--------------------------------------------------------------------------------
/tests/test_mergeplan.py:
--------------------------------------------------------------------------------
1 | """Various mergeplan tests
2 | """
3 |
4 | from fediblockhole import apply_mergeplan, merge_blocklists, merge_comments
5 | from fediblockhole.blocklists import parse_blocklist
6 | from fediblockhole.const import DomainBlock, SeverityLevel
7 |
8 | import_fields = [
9 | "domain",
10 | "severity",
11 | "public_comment",
12 | "private_comment",
13 | "reject_media",
14 | "reject_reports",
15 | "obfuscate",
16 | ]
17 |
18 |
19 | def load_test_blocklist_data(datafiles):
20 |
21 | blocklists = []
22 |
23 | for data in datafiles:
24 | bl = parse_blocklist(data, "pytest", "csv", import_fields)
25 | blocklists.append(bl)
26 |
27 | return blocklists
28 |
29 |
30 | def test_mergeplan_max(data_suspends_01, data_silences_01):
31 | """Test 'max' mergeplan"""
32 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
33 | bl = merge_blocklists(blocklists, "max")
34 | assert len(bl) == 13
35 |
36 | for key in bl:
37 | assert bl[key].severity.level == SeverityLevel.SUSPEND
38 |
39 |
40 | def test_mergeplan_min(data_suspends_01, data_silences_01):
41 | """Test 'max' mergeplan"""
42 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
43 |
44 | bl = merge_blocklists(blocklists, "min")
45 | assert len(bl) == 13
46 |
47 | for key in bl:
48 | assert bl[key].severity.level == SeverityLevel.SILENCE
49 |
50 |
51 | def test_mergeplan_default(data_suspends_01, data_silences_01):
52 | """Default mergeplan is max, so see if it's chosen"""
53 | blocklists = load_test_blocklist_data([data_suspends_01, data_silences_01])
54 |
55 | bl = merge_blocklists(blocklists)
56 | assert len(bl) == 13
57 |
58 | for key in bl:
59 | assert bl[key].severity.level == SeverityLevel.SUSPEND
60 |
61 |
62 | def test_mergeplan_3_max(data_suspends_01, data_silences_01, data_noop_01):
63 | """3 datafiles and mergeplan of 'max'"""
64 | blocklists = load_test_blocklist_data(
65 | [data_suspends_01, data_silences_01, data_noop_01]
66 | )
67 |
68 | bl = merge_blocklists(blocklists, "max")
69 | assert len(bl) == 13
70 |
71 | for key in bl:
72 | assert bl[key].severity.level == SeverityLevel.SUSPEND
73 | assert bl[key].reject_media is True
74 | assert bl[key].reject_reports is True
75 | assert bl[key].obfuscate is True
76 |
77 |
78 | def test_mergeplan_3_min(data_suspends_01, data_silences_01, data_noop_01):
79 | """3 datafiles and mergeplan of 'min'"""
80 | blocklists = load_test_blocklist_data(
81 | [data_suspends_01, data_silences_01, data_noop_01]
82 | )
83 |
84 | bl = merge_blocklists(blocklists, "min")
85 | assert len(bl) == 13
86 |
87 | for key in bl:
88 | assert bl[key].severity.level == SeverityLevel.NONE
89 | assert bl[key].reject_media is False
90 | assert bl[key].reject_reports is False
91 | assert bl[key].obfuscate is False
92 |
93 |
94 | def test_mergeplan_noop_v_silence_max(data_silences_01, data_noop_01):
95 | """Mergeplan of max should choose silence over noop"""
96 | blocklists = load_test_blocklist_data([data_silences_01, data_noop_01])
97 |
98 | bl = merge_blocklists(blocklists, "max")
99 | assert len(bl) == 13
100 |
101 | for key in bl:
102 | assert bl[key].severity.level == SeverityLevel.SILENCE
103 |
104 |
105 | def test_mergeplan_noop_v_silence_min(data_silences_01, data_noop_01):
106 | """Mergeplan of min should choose noop over silence"""
107 | blocklists = load_test_blocklist_data([data_silences_01, data_noop_01])
108 |
109 | bl = merge_blocklists(blocklists, "min")
110 | assert len(bl) == 13
111 |
112 | for key in bl:
113 | assert bl[key].severity.level == SeverityLevel.NONE
114 |
115 |
116 | def test_merge_public_comment(data_suspends_01, data_silences_01, data_noop_01):
117 | blocklists = load_test_blocklist_data(
118 | [data_suspends_01, data_silences_01, data_noop_01]
119 | )
120 |
121 | bl = merge_blocklists(blocklists, "min")
122 | assert len(bl) == 13
123 |
124 | assert bl["public-comment.example.org"].public_comment == "This is a public comment"
125 |
126 |
127 | def test_merge_private_comment(data_suspends_01, data_silences_01, data_noop_01):
128 | blocklists = load_test_blocklist_data(
129 | [data_suspends_01, data_silences_01, data_noop_01]
130 | )
131 |
132 | bl = merge_blocklists(blocklists, "min")
133 | assert len(bl) == 13
134 |
135 | assert (
136 | bl["private-comment.example.org"].private_comment == "This is a private comment"
137 | )
138 |
139 |
140 | def test_merge_public_comments(data_suspends_01, data_silences_01, data_noop_01):
141 | blocklists = load_test_blocklist_data(
142 | [data_suspends_01, data_silences_01, data_noop_01]
143 | )
144 |
145 | bl = merge_blocklists(blocklists, "min")
146 | assert len(bl) == 13
147 |
148 | assert (
149 | bl["diff-comment.example.org"].public_comment
150 | == "Suspend public comment, Silence public comment, Noop public comment"
151 | )
152 |
153 |
154 | def test_merge_duplicate_comments(data_suspends_01, data_silences_01, data_noop_01):
155 | """The same comment on multiple sources shouldn't get added"""
156 | blocklists = load_test_blocklist_data(
157 | [data_suspends_01, data_silences_01, data_noop_01]
158 | )
159 |
160 | bl = merge_blocklists(blocklists, "min")
161 | assert len(bl) == 13
162 |
163 |
164 | def test_merge_comments_none():
165 |
166 | a = None
167 | b = None
168 |
169 | r = merge_comments(a, b)
170 |
171 | assert r == ""
172 |
173 |
174 | def test_merge_comments_empty():
175 |
176 | a = ""
177 | b = ""
178 |
179 | r = merge_comments(a, b)
180 |
181 | assert r == ""
182 |
183 |
184 | def test_merge_comments_left():
185 |
186 | a = "comment to merge"
187 | b = ""
188 |
189 | r = merge_comments(a, b)
190 |
191 | assert r == "comment to merge"
192 |
193 |
194 | def test_merge_comments_right():
195 |
196 | a = ""
197 | b = "comment to merge"
198 |
199 | r = merge_comments(a, b)
200 |
201 | assert r == "comment to merge"
202 |
203 |
204 | def test_merge_comments_same():
205 |
206 | a = "comment to merge"
207 | b = "comment to merge"
208 |
209 | r = merge_comments(a, b)
210 |
211 | assert r == "comment to merge"
212 |
213 |
214 | def test_merge_comments_diff():
215 |
216 | a = "comment A"
217 | b = "comment B"
218 |
219 | r = merge_comments(a, b)
220 |
221 | assert r == "comment A, comment B"
222 |
223 |
224 | def test_merge_comments_dups():
225 |
226 | a = "boring, nazis, lack of moderation, flagged, special"
227 | b = "spoon, nazis, flagged, lack of moderation, happy, fork"
228 |
229 | r = merge_comments(a, b)
230 |
231 | assert (
232 | r == "boring, nazis, lack of moderation, flagged, special, spoon, happy, fork"
233 | )
234 |
235 |
236 | def test_mergeplan_same_min_bools_false():
237 | """Test merging with mergeplan 'max' and False values doesn't change them"""
238 | a = DomainBlock("example.org", "noop", "", "", False, False, False)
239 | b = DomainBlock("example.org", "noop", "", "", False, False, False)
240 |
241 | r = apply_mergeplan(a, b, "max")
242 |
243 | assert r.reject_media is False
244 | assert r.reject_reports is False
245 | assert r.obfuscate is False
246 |
247 |
248 | def test_mergeplan_same_min_bools_true():
249 | """Test merging with mergeplan 'max' and True values doesn't change them"""
250 | a = DomainBlock("example.org", "noop", "", "", True, False, True)
251 | b = DomainBlock("example.org", "noop", "", "", True, False, True)
252 |
253 | r = apply_mergeplan(a, b, "max")
254 |
255 | assert r.reject_media is True
256 | assert r.reject_reports is False
257 | assert r.obfuscate is True
258 |
259 |
260 | def test_mergeplan_max_bools():
261 | a = DomainBlock("example.org", "suspend", "", "", True, True, True)
262 | b = DomainBlock("example.org", "noop", "", "", False, False, False)
263 |
264 | r = apply_mergeplan(a, b, "max")
265 |
266 | assert r.reject_media is True
267 | assert r.reject_reports is True
268 | assert r.obfuscate is True
269 |
--------------------------------------------------------------------------------
/tests/test_parser_csv.py:
--------------------------------------------------------------------------------
1 | """Tests of the CSV parsing
2 | """
3 |
4 | from fediblockhole.blocklists import BlocklistParserCSV
5 | from fediblockhole.const import SeverityLevel
6 |
7 |
8 | def test_single_line():
9 | csvdata = "example.org"
10 | origin = "csvfile"
11 |
12 | parser = BlocklistParserCSV()
13 | bl = parser.parse_blocklist(csvdata, origin)
14 | assert len(bl) == 0
15 |
16 |
17 | def test_header_only():
18 | csvdata = "domain,severity,public_comment"
19 | origin = "csvfile"
20 |
21 | parser = BlocklistParserCSV()
22 | bl = parser.parse_blocklist(csvdata, origin)
23 | assert len(bl) == 0
24 |
25 |
26 | def test_2_blocks():
27 | csvdata = """domain,severity
28 | example.org,silence
29 | example2.org,suspend
30 | """
31 | origin = "csvfile"
32 |
33 | parser = BlocklistParserCSV()
34 | bl = parser.parse_blocklist(csvdata, origin)
35 |
36 | assert len(bl) == 2
37 | assert "example.org" in bl
38 |
39 |
40 | def test_4_blocks():
41 | csvdata = """domain,severity,public_comment
42 | example.org,silence,"test 1"
43 | example2.org,suspend,"test 2"
44 | example3.org,noop,"test 3"
45 | example4.org,suspend,"test 4"
46 | """
47 | origin = "csvfile"
48 |
49 | parser = BlocklistParserCSV()
50 | bl = parser.parse_blocklist(csvdata, origin)
51 |
52 | assert len(bl) == 4
53 | assert "example.org" in bl
54 | assert "example2.org" in bl
55 | assert "example3.org" in bl
56 | assert "example4.org" in bl
57 |
58 | assert bl["example.org"].severity.level == SeverityLevel.SILENCE
59 | assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND
60 | assert bl["example3.org"].severity.level == SeverityLevel.NONE
61 | assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND
62 |
63 |
64 | def test_ignore_comments():
65 | csvdata = """domain,severity,public_comment,private_comment
66 | example.org,silence,"test 1","ignore me"
67 | example2.org,suspend,"test 2","ignote me also"
68 | example3.org,noop,"test 3","and me"
69 | example4.org,suspend,"test 4","also me"
70 | """
71 | origin = "csvfile"
72 |
73 | parser = BlocklistParserCSV()
74 | bl = parser.parse_blocklist(csvdata, origin)
75 |
76 | assert len(bl) == 4
77 | assert "example.org" in bl
78 | assert "example2.org" in bl
79 | assert "example3.org" in bl
80 | assert "example4.org" in bl
81 |
82 | assert bl["example.org"].public_comment == ""
83 | assert bl["example.org"].private_comment == ""
84 | assert bl["example3.org"].public_comment == ""
85 | assert bl["example4.org"].private_comment == ""
86 |
--------------------------------------------------------------------------------
/tests/test_parser_csv_mastodon.py:
--------------------------------------------------------------------------------
1 | """Tests of the CSV parsing
2 | """
3 |
4 | from fediblockhole.blocklists import BlocklistParserMastodonCSV
5 | from fediblockhole.const import SeverityLevel
6 |
7 |
8 | def test_single_line():
9 | csvdata = "example.org"
10 | origin = "csvfile"
11 |
12 | parser = BlocklistParserMastodonCSV()
13 | bl = parser.parse_blocklist(csvdata, origin)
14 | assert len(bl) == 0
15 |
16 |
17 | def test_header_only():
18 | csvdata = "#domain,#severity,#public_comment"
19 | origin = "csvfile"
20 |
21 | parser = BlocklistParserMastodonCSV()
22 | bl = parser.parse_blocklist(csvdata, origin)
23 | assert len(bl) == 0
24 |
25 |
26 | def test_2_blocks():
27 | csvdata = """domain,severity
28 | example.org,silence
29 | example2.org,suspend
30 | """
31 | origin = "csvfile"
32 |
33 | parser = BlocklistParserMastodonCSV()
34 | bl = parser.parse_blocklist(csvdata, origin)
35 |
36 | assert len(bl) == 2
37 | assert "example.org" in bl
38 |
39 |
40 | def test_4_blocks():
41 | csvdata = """domain,severity,public_comment
42 | example.org,silence,"test 1"
43 | example2.org,suspend,"test 2"
44 | example3.org,noop,"test 3"
45 | example4.org,suspend,"test 4"
46 | """
47 | origin = "csvfile"
48 |
49 | parser = BlocklistParserMastodonCSV()
50 | bl = parser.parse_blocklist(csvdata, origin)
51 |
52 | assert len(bl) == 4
53 | assert "example.org" in bl
54 | assert "example2.org" in bl
55 | assert "example3.org" in bl
56 | assert "example4.org" in bl
57 |
58 | assert bl["example.org"].severity.level == SeverityLevel.SILENCE
59 | assert bl["example2.org"].severity.level == SeverityLevel.SUSPEND
60 | assert bl["example3.org"].severity.level == SeverityLevel.NONE
61 | assert bl["example4.org"].severity.level == SeverityLevel.SUSPEND
62 |
63 |
64 | def test_ignore_comments():
65 | csvdata = """domain,severity,public_comment,private_comment
66 | example.org,silence,"test 1","ignore me"
67 | example2.org,suspend,"test 2","ignote me also"
68 | example3.org,noop,"test 3","and me"
69 | example4.org,suspend,"test 4","also me"
70 | """
71 | origin = "csvfile"
72 |
73 | parser = BlocklistParserMastodonCSV()
74 | bl = parser.parse_blocklist(csvdata, origin)
75 |
76 | assert len(bl) == 4
77 | assert "example.org" in bl
78 | assert "example2.org" in bl
79 | assert "example3.org" in bl
80 | assert "example4.org" in bl
81 |
82 | assert bl["example.org"].public_comment == ""
83 | assert bl["example.org"].private_comment == ""
84 | assert bl["example3.org"].public_comment == ""
85 | assert bl["example4.org"].private_comment == ""
86 |
--------------------------------------------------------------------------------
/tests/test_parser_json.py:
--------------------------------------------------------------------------------
1 | """Tests of the CSV parsing
2 | """
3 |
4 | from fediblockhole.blocklists import BlocklistParserJSON
5 | from fediblockhole.const import SeverityLevel
6 |
7 |
8 | def test_json_parser(data_mastodon_json):
9 |
10 | parser = BlocklistParserJSON()
11 | bl = parser.parse_blocklist(data_mastodon_json, "test_json")
12 |
13 | assert len(bl) == 10
14 | assert "example.org" in bl
15 | assert "example2.org" in bl
16 | assert "example3.org" in bl
17 | assert "example4.org" in bl
18 |
19 | assert bl["example.org"].severity.level == SeverityLevel.SUSPEND
20 | assert bl["example2.org"].severity.level == SeverityLevel.SILENCE
21 | assert bl["example3.org"].severity.level == SeverityLevel.SUSPEND
22 | assert bl["example4.org"].severity.level == SeverityLevel.NONE
23 |
24 |
25 | def test_ignore_comments(data_mastodon_json):
26 |
27 | parser = BlocklistParserJSON()
28 | bl = parser.parse_blocklist(data_mastodon_json, "test_json")
29 |
30 | assert len(bl) == 10
31 | assert "example.org" in bl
32 | assert "example2.org" in bl
33 | assert "example3.org" in bl
34 | assert "example4.org" in bl
35 |
36 | assert bl["example.org"].public_comment == ""
37 | assert bl["example.org"].private_comment == ""
38 | assert bl["example3.org"].public_comment == ""
39 | assert bl["example4.org"].private_comment == ""
40 |
--------------------------------------------------------------------------------
/tests/test_parser_rapidblockcsv.py:
--------------------------------------------------------------------------------
1 | """Tests of the Rapidblock CSV parsing
2 | """
3 |
4 | from fediblockhole.blocklists import RapidBlockParserCSV
5 | from fediblockhole.const import SeverityLevel
6 |
7 | csvdata = (
8 | """example.org\r\nsubdomain.example.org\r\nanotherdomain.org\r\ndomain4.org\r\n"""
9 | )
10 | parser = RapidBlockParserCSV()
11 |
12 |
13 | def test_basic_rapidblock():
14 |
15 | bl = parser.parse_blocklist(csvdata)
16 | assert len(bl) == 4
17 | assert "example.org" in bl
18 | assert "subdomain.example.org" in bl
19 | assert "anotherdomain.org" in bl
20 | assert "domain4.org" in bl
21 |
22 |
23 | def test_severity_is_suspend():
24 | bl = parser.parse_blocklist(csvdata)
25 |
26 | for block in bl.values():
27 | assert block.severity.level == SeverityLevel.SUSPEND
28 |
--------------------------------------------------------------------------------
/tests/test_parser_rapidblockjson.py:
--------------------------------------------------------------------------------
1 | """Test parsing the RapidBlock JSON format
2 | """
3 |
4 | from fediblockhole.blocklists import parse_blocklist
5 | from fediblockhole.const import SeverityLevel
6 |
7 |
8 | def test_parse_rapidblock_json(data_rapidblock_json):
9 |
10 | bl = parse_blocklist(data_rapidblock_json, "pytest", "rapidblock.json")
11 |
12 | assert "101010.pl" in bl
13 | assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND
14 | assert bl["101010.pl"].public_comment == ""
15 |
16 | assert "berserker.town" in bl
17 | assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND
18 | assert bl["berserker.town"].public_comment == ""
19 | assert bl["berserker.town"].private_comment == ""
20 |
21 |
22 | def test_parse_with_comments(data_rapidblock_json):
23 |
24 | bl = parse_blocklist(
25 | data_rapidblock_json,
26 | "pytest",
27 | "rapidblock.json",
28 | ["domain", "severity", "public_comment", "private_comment"],
29 | )
30 |
31 | assert "101010.pl" in bl
32 | assert bl["101010.pl"].severity.level == SeverityLevel.SUSPEND
33 | assert bl["101010.pl"].public_comment == "cryptomining javascript, white supremacy"
34 |
35 | assert "berserker.town" in bl
36 | assert bl["berserker.town"].severity.level == SeverityLevel.SUSPEND
37 | assert bl["berserker.town"].public_comment == "freeze peach"
38 |
--------------------------------------------------------------------------------