├── src
└── chives
│ ├── py.typed
│ ├── __init__.py
│ ├── dates.py
│ ├── urls.py
│ ├── static_site_tests.py
│ └── media.py
├── tests
├── stubs
│ └── vcr.cassette.pyi
├── fixtures
│ ├── media
│ │ ├── blue.png
│ │ ├── space.jpg
│ │ ├── Landscape_0.jpg
│ │ ├── Landscape_1.jpg
│ │ ├── Landscape_2.jpg
│ │ ├── Landscape_3.jpg
│ │ ├── Landscape_4.jpg
│ │ ├── Landscape_5.jpg
│ │ ├── Landscape_6.jpg
│ │ ├── Landscape_7.jpg
│ │ ├── Landscape_8.jpg
│ │ ├── checkerboard.png
│ │ ├── asteroid_belt.png
│ │ ├── asteroid_belt_P.png
│ │ ├── blue_with_hole.png
│ │ ├── electric_field.gif
│ │ ├── underlined_text.png
│ │ ├── wings_tracking_shot.jpg
│ │ ├── wings_tracking_shot.mp4
│ │ ├── Sintel_360_10s_1MB_AV1.mp4
│ │ ├── Sintel_360_10s_1MB_H264.mp4
│ │ ├── Sintel_360_10s_1MB_H264.png
│ │ ├── Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg
│ │ └── Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4
│ └── cassettes
│ │ ├── TestIsMastodonHost.test_non_mastodon_servers[alexwlchan.net].yml
│ │ ├── TestIsMastodonHost.test_non_mastodon_servers[example.com].yml
│ │ ├── TestIsMastodonHost.test_mastodon_servers[social.jvns.ca].yml
│ │ └── TestIsMastodonHost.test_non_mastodon_servers[peertube.tv].yml
├── conftest.py
├── test_dates.py
├── test_urls.py
├── test_static_site_tests.py
└── test_media.py
├── .gitignore
├── dev_requirements.in
├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ └── test.yml
├── LICENSE
├── CONTRIBUTING.md
├── pyproject.toml
├── README.md
├── CHANGELOG.md
└── dev_requirements.txt
/src/chives/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/stubs/vcr.cassette.pyi:
--------------------------------------------------------------------------------
1 | class Cassette: ...
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | .coverage
4 | dist
5 |
--------------------------------------------------------------------------------
/tests/fixtures/media/blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/blue.png
--------------------------------------------------------------------------------
/tests/fixtures/media/space.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/space.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_0.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_1.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_2.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_3.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_4.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_5.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_6.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_7.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Landscape_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_8.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/checkerboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/checkerboard.png
--------------------------------------------------------------------------------
/tests/fixtures/media/asteroid_belt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/asteroid_belt.png
--------------------------------------------------------------------------------
/tests/fixtures/media/asteroid_belt_P.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/asteroid_belt_P.png
--------------------------------------------------------------------------------
/tests/fixtures/media/blue_with_hole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/blue_with_hole.png
--------------------------------------------------------------------------------
/tests/fixtures/media/electric_field.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/electric_field.gif
--------------------------------------------------------------------------------
/tests/fixtures/media/underlined_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/underlined_text.png
--------------------------------------------------------------------------------
/tests/fixtures/media/wings_tracking_shot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/wings_tracking_shot.jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/wings_tracking_shot.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/wings_tracking_shot.mp4
--------------------------------------------------------------------------------
/tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4
--------------------------------------------------------------------------------
/dev_requirements.in:
--------------------------------------------------------------------------------
1 | -e file:.[media,static_site_tests,urls]
2 |
3 | build
4 | mypy
5 | pytest-cov
6 | ruff
7 | silver-nitrate[cassettes]
8 | twine
9 |
--------------------------------------------------------------------------------
/tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4
--------------------------------------------------------------------------------
/tests/fixtures/media/Sintel_360_10s_1MB_H264.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_H264.png
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | requirements.txt linguist-generated=true
2 | dev_requirements.txt linguist-generated=true
3 |
4 | tests/fixtures/cassettes/*.yml linguist-generated=true
5 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """Shared helpers and test fixtures."""
2 |
3 | from nitrate.cassettes import cassette_name, vcr_cassette
4 |
5 | __all__ = ["cassette_name", "vcr_cassette"]
6 |
--------------------------------------------------------------------------------
/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg
--------------------------------------------------------------------------------
/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: weekly
7 | day: monday
8 | time: "09:00"
9 | - package-ecosystem: "pip"
10 | directory: "/"
11 | schedule:
12 | interval: weekly
13 | day: monday
14 | time: "09:00"
15 |
--------------------------------------------------------------------------------
/src/chives/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | chives is a collection of Python functions for working with my local
3 | media archives.
4 |
5 | I store a lot of media archives as static websites [1][2], and I use
6 | Python scripts to manage my media. This package has some functions
7 | I share across multiple sites.
8 |
9 | [1]: https://alexwlchan.net/2024/static-websites/
10 | [2]: https://alexwlchan.net/2025/mildly-dynamic-websites/
11 |
12 | """
13 |
14 | __version__ = "21"
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2025 Alex Chan
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a
4 | copy of this software and associated documentation files (the "Software"),
5 | to deal in the Software without restriction, including without limitation
6 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 | and/or sell copies of the Software, and to permit persons to whom the Software
8 | is furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 | OTHER DEALINGS IN THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # CONTRIBUTING
2 |
3 | You can set up a local development environment by cloning the repo and installing dependencies:
4 |
5 | ```shell
6 | git clone https://github.com/alexwlchan/chives.git
7 | cd chives
8 | python3 -m venv .venv
9 | source .venv/bin/activate
10 | pip install -e .
11 | ```
12 |
13 | If you want to run tests, install the dev dependencies and run the tests:
14 |
15 | ```shell
16 | # Activate the virtualenv and install dev dependencies
17 | source .venv/bin/activate
18 | pip install -r dev_requirements.txt
19 |
20 | # Check formatting
21 | ruff check .
22 | ruff format --check .
23 |
24 | # Check docstrings
25 | interrogate -vv
26 |
27 | # Check types
28 | mypy src tests
29 |
30 | # Run tests
31 | coverage run -m pytest tests
32 | coverage report
33 | ```
34 |
35 | To make changes:
36 |
37 | 1. Create a new branch
38 | 2. Push your changes to GitHub
39 | 3. Open a pull request
40 | 4. Fix any issues flagged by GitHub Actions (including tests, code linting, and type checking)
41 | 6. Merge it!
42 |
43 | To create a new version on PyPI:
44 |
45 | 1. Update the version in `src/chives/__init__.py`
46 | 2. Add release notes in `CHANGELOG.md` and push a new tag to GitHub
47 | 3. Deploy the release using twine:
48 |
49 | ```console
50 | $ python3 -m build
51 | $ python3 -m twine upload dist/* --username=__token__
52 | ```
53 |
54 | You will need [a PyPI API token](https://pypi.org/help/#apitoken) to publish packages.
55 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools >= 65",
4 | ]
5 | build-backend = "setuptools.build_meta"
6 |
7 | [project]
8 | name = "alexwlchan-chives"
9 | description = "Utility functions for working with my local media archives"
10 | readme = "README.md"
11 | authors = [
12 | {name = "Alex Chan", email = "alex@alexwlchan.net"},
13 | ]
14 | maintainers = [
15 | {name = "Alex Chan", email="alex@alexwlchan.net"},
16 | ]
17 | classifiers = [
18 | "Development Status :: 4 - Beta",
19 | "Programming Language :: Python :: 3.13",
20 | ]
21 | requires-python = ">=3.13"
22 | dependencies = []
23 | dynamic = ["version"]
24 | license = "MIT"
25 |
26 | [project.optional-dependencies]
27 | media = ["Pillow"]
28 | static_site_tests = ["pytest", "rapidfuzz"]
29 | urls = ["httpx", "hyperlink"]
30 |
31 | [project.urls]
32 | "Homepage" = "https://github.com/alexwlchan/chives"
33 | "Changelog" = "https://github.com/alexwlchan/chives/blob/main/CHANGELOG.md"
34 |
35 | [tool.setuptools.dynamic]
36 | version = {attr = "chives.__version__"}
37 |
38 | [tool.setuptools.packages.find]
39 | where = ["src"]
40 |
41 | [tool.setuptools.package-data]
42 | nitrate = ["py.typed"]
43 |
44 | [tool.coverage.run]
45 | branch = true
46 | source = ["chives", "tests",]
47 |
48 | [tool.coverage.report]
49 | show_missing = true
50 | skip_covered = true
51 | fail_under = 100
52 |
53 | [tool.pytest.ini_options]
54 | filterwarnings = ["error"]
55 |
56 | [tool.mypy]
57 | mypy_path = "src"
58 | strict = true
59 |
60 | [tool.ruff.lint]
61 | select = ["D", "E", "F"]
62 | ignore = ["D200", "D203", "D204", "D205", "D212", "D401"]
63 |
--------------------------------------------------------------------------------
/tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[alexwlchan.net].yml:
--------------------------------------------------------------------------------
1 | interactions:
2 | - request:
3 | body: ''
4 | headers:
5 | Accept:
6 | - '*/*'
7 | Accept-Encoding:
8 | - gzip, deflate
9 | Connection:
10 | - keep-alive
11 | Host:
12 | - alexwlchan.net
13 | User-Agent:
14 | - python-httpx/0.28.1
15 | method: GET
16 | uri: https://alexwlchan.net/.well-known/nodeinfo
17 | response:
18 | body:
19 | string: ''
20 | headers:
21 | Alt-Svc:
22 | - h3=":443"; ma=2592000
23 | Content-Length:
24 | - '0'
25 | Content-Security-Policy:
26 | - 'default-src ''self'' ''unsafe-inline'' https://youtube-nocookie.com https://www.youtube-nocookie.com;
27 | script-src ''self'' ''unsafe-inline''; connect-src https://analytics.alexwlchan.net;
28 | img-src ''self'' ''unsafe-inline'' data:'
29 | Date:
30 | - Thu, 04 Dec 2025 12:15:34 GMT
31 | Location:
32 | - https://social.alexwlchan.net/.well-known/nodeinfo
33 | Permissions-Policy:
34 | - geolocation=(), midi=(), notifications=(), push=(), sync-xhr=(), microphone=(),
35 | camera=(), magnetometer=(), gyroscope=(), vibrate=(), payment=()
36 | Referrer-Policy:
37 | - no-referrer-when-downgrade
38 | Server:
39 | - Caddy
40 | Strict-Transport-Security:
41 | - max-age=31536000; includeSubDomains
42 | X-Content-Type-Options:
43 | - nosniff
44 | X-Frame-Options:
45 | - ALLOWALL
46 | X-Xss-Protection:
47 | - 1; mode=block
48 | status:
49 | code: 301
50 | message: Moved Permanently
51 | version: 1
52 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # chives
2 |
3 | chives is a collection of Python functions for working with my local
4 | media archives.
5 |
6 | I store a lot of media archives as [static websites][static-sites], and I use Python scripts to manage the sites.
7 | This includes:
8 |
9 | * Verifying every file that's described in the metadata is stored correctly
10 | * Downloading pages from sites I want to bookmark
11 | * Checking the quality and consistency of my metadata
12 |
13 | This package has some functions I share across multiple archives/sites.
14 |
15 | [static-sites]: https://alexwlchan.net/2024/static-websites/
16 |
17 | ## References
18 |
19 | I've written blog posts about some of the code in this repo:
20 |
21 | * [Cleaning up messy dates in JSON](https://alexwlchan.net/2025/messy-dates-in-json/)
22 | * [Detecting AV1-encoded videos with Python](https://alexwlchan.net/2025/detecting-av1-videos/)
23 |
24 | ## Versioning
25 |
26 | This library is monotically versioned.
27 | I'll try not to break anything between releases, but I make no guarantees of back-compatibility.
28 |
29 | I'm making this public because it's convenient for me, and you might find useful code here, but be aware this may not be entirely stable.
30 |
31 | ## Usage
32 |
33 | See the docstrings on individual functions for usage descriptions.
34 |
35 | ## Installation
36 |
37 | If you want to use this in your project, I recommend copying the relevant function and test into your codebase (with a link back to this repo).
38 |
39 | Alternatively, you can install the package from PyPI:
40 |
41 | ```console
42 | $ pip install alexwlchan-chives
43 | ```
44 |
45 | ## Development
46 |
47 | If you want to make changes to the library, there are instructions in [CONTRIBUTING.md](./CONTRIBUTING.md).
48 |
49 | ## License
50 |
51 | MIT.
52 |
--------------------------------------------------------------------------------
/tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[example.com].yml:
--------------------------------------------------------------------------------
1 | interactions:
2 | - request:
3 | body: ''
4 | headers:
5 | Accept:
6 | - '*/*'
7 | Accept-Encoding:
8 | - gzip, deflate
9 | Connection:
10 | - keep-alive
11 | Host:
12 | - example.com
13 | User-Agent:
14 | - python-httpx/0.28.1
15 | method: GET
16 | uri: https://example.com/.well-known/nodeinfo
17 | response:
18 | body:
19 | string: '
Example DomainExample
22 | Domain
This domain is for use in documentation examples without needing
23 | permission. Avoid use in operations.
Learn
24 | more
25 |
26 | '
27 | headers:
28 | Accept-Ranges:
29 | - bytes
30 | Alt-Svc:
31 | - h3=":443"; ma=93600
32 | Cache-Control:
33 | - max-age=0, no-cache, no-store
34 | Connection:
35 | - keep-alive
36 | Content-Length:
37 | - '513'
38 | Content-Type:
39 | - text/html
40 | Date:
41 | - Thu, 04 Dec 2025 12:15:34 GMT
42 | ETag:
43 | - '"bc2473a18e003bdb249eba5ce893033f:1760028122.592274"'
44 | Expires:
45 | - Thu, 04 Dec 2025 12:15:34 GMT
46 | Last-Modified:
47 | - Thu, 09 Oct 2025 16:42:02 GMT
48 | Pragma:
49 | - no-cache
50 | Server:
51 | - AkamaiNetStorage
52 | status:
53 | code: 404
54 | message: Not Found
55 | version: 1
56 |
--------------------------------------------------------------------------------
/tests/test_dates.py:
--------------------------------------------------------------------------------
1 | """Tests for `chives.dates`."""
2 |
3 | import json
4 |
5 | import pytest
6 |
7 | from chives.dates import date_matches_any_format, find_all_dates, reformat_date
8 |
9 |
10 | def test_find_all_dates() -> None:
11 | """find_all_dates finds all the nested dates in a JSON object."""
12 | json_value = json.loads("""{
13 | "doc1": {"id": "1", "date_created": "2025-10-14T05:34:07+0000"},
14 | "shapes": [
15 | {"color": "blue", "date_saved": "2015-03-01 23:34:39 +00:00"},
16 | {"color": "yellow", "date_saved": "2013-9-21 13:43:00Z", "is_square": true},
17 | {"color": "green", "date_saved": null}
18 | ],
19 | "date_verified": "2024-08-30"
20 | }""")
21 |
22 | assert list(find_all_dates(json_value)) == [
23 | (
24 | {"id": "1", "date_created": "2025-10-14T05:34:07+0000"},
25 | "date_created",
26 | "2025-10-14T05:34:07+0000",
27 | ),
28 | (
29 | {"color": "blue", "date_saved": "2015-03-01 23:34:39 +00:00"},
30 | "date_saved",
31 | "2015-03-01 23:34:39 +00:00",
32 | ),
33 | (
34 | {"color": "yellow", "date_saved": "2013-9-21 13:43:00Z", "is_square": True},
35 | "date_saved",
36 | "2013-9-21 13:43:00Z",
37 | ),
38 | (json_value, "date_verified", "2024-08-30"),
39 | ]
40 |
41 |
42 | def test_date_matches_any_format() -> None:
43 | """
44 | Tests for `date_matches_any_format`.
45 | """
46 | assert date_matches_any_format(
47 | "2001-01-01", formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S%z"]
48 | )
49 | assert not date_matches_any_format("2001-01-01", formats=["%Y-%m-%dT%H:%M:%S%z"])
50 |
51 |
52 | @pytest.mark.parametrize(
53 | "s, orig_fmt, formatted_date",
54 | [
55 | ("2025-11-12T15:34:39.570Z", "%Y-%m-%dT%H:%M:%S.%fZ", "2025-11-12T15:34:39Z"),
56 | ("2025-03-12 09:57:03", "%Y-%m-%d %H:%M:%S", "2025-03-12T09:57:03Z"),
57 | ("2016-02-25 05:28:35 GMT", "%Y-%m-%d %H:%M:%S %Z", "2016-02-25T05:28:35Z"),
58 | ("2011-12-06T10:45:15-08:00", "%Y-%m-%dT%H:%M:%S%z", "2011-12-06T18:45:15Z"),
59 | ],
60 | )
61 | def test_reformat_date(s: str, orig_fmt: str, formatted_date: str) -> None:
62 | """Tests for `reformat_date`."""
63 | assert reformat_date(s, orig_fmt) == formatted_date
64 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | jobs:
13 | test:
14 | runs-on: macos-latest
15 | strategy:
16 | matrix:
17 | python-version: ["3.13", "3.14"]
18 |
19 | steps:
20 | - uses: actions/checkout@v6
21 |
22 | - name: Set up Python
23 | uses: actions/setup-python@v6
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | cache: pip
27 |
28 | - name: Install Python dependencies
29 | run: pip install -r dev_requirements.txt
30 |
31 | - name: Install create_thumbnail
32 | env:
33 | GH_TOKEN: ${{ github.token }}
34 | run: |
35 | gh release download \
36 | --repo alexwlchan/create_thumbnail \
37 | --pattern create_thumbnail-aarch64-apple-darwin.tar.gz \
38 | --output create_thumbnail.tar.gz
39 | tar -xzf create_thumbnail.tar.gz --directory /usr/local/bin
40 | chmod +x /usr/local/bin/create_thumbnail
41 | which create_thumbnail
42 |
43 | - name: Install dominant_colours
44 | env:
45 | GH_TOKEN: ${{ github.token }}
46 | run: |
47 | gh release download \
48 | --repo alexwlchan/dominant_colours \
49 | --pattern dominant_colours-aarch64-apple-darwin.tar.gz \
50 | --output dominant_colours.tar.gz
51 | tar -xzf dominant_colours.tar.gz --directory /usr/local/bin
52 | chmod +x /usr/local/bin/dominant_colours
53 | which dominant_colours
54 |
55 | - name: Install get_live_text
56 | env:
57 | GH_TOKEN: ${{ github.token }}
58 | run: |
59 | gh release download \
60 | --repo alexwlchan/get_live_text \
61 | --pattern get_live_text.aarch64-apple-darwin.zip \
62 | --output get_live_text.tar.gz
63 | tar -xzf get_live_text.tar.gz --directory /usr/local/bin
64 | chmod +x /usr/local/bin/get_live_text
65 | which get_live_text
66 |
67 | - name: Install ffprobe
68 | run: |
69 | curl -O https://evermeet.cx/ffmpeg/ffprobe-8.0.1.7z
70 | tar -xzf ffprobe-8.0.1.7z --directory /usr/local/bin
71 | chmod +x /usr/local/bin/ffprobe
72 | which ffprobe
73 |
74 | - name: Check formatting
75 | run: |
76 | ruff check .
77 | ruff format --check .
78 |
79 | - name: Check types
80 | run: mypy src tests
81 |
82 | - name: Run tests
83 | run: |
84 | coverage run -m pytest tests
85 | coverage report
86 |
--------------------------------------------------------------------------------
/src/chives/dates.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for interacting with timestamps and date strings.
3 |
4 | References:
5 | * https://alexwlchan.net/2025/messy-dates-in-json/
6 |
7 | """
8 |
9 | from collections.abc import Iterable, Iterator
10 | from datetime import datetime, timezone
11 | from typing import Any
12 |
13 |
14 | def now() -> str:
15 | """
16 | Returns the current time in the standard format used by my static sites.
17 | """
18 | return (
19 | datetime.now(tz=timezone.utc)
20 | .replace(microsecond=0)
21 | .isoformat()
22 | .replace("+00:00", "Z")
23 | )
24 |
25 |
26 | def find_all_dates(json_value: Any) -> Iterator[tuple[dict[str, Any], str, str]]:
27 | """
28 | Find all the timestamps in a heavily nested JSON object.
29 |
30 | This function looks for any JSON objects with a key-value pair
31 | where the key starts with `date_` and the value is a string, and
32 | emits a 3-tuple:
33 |
34 | * the JSON object
35 | * the key
36 | * the value
37 |
38 | """
39 | if isinstance(json_value, dict):
40 | for key, value in json_value.items():
41 | if (
42 | isinstance(key, str)
43 | and key.startswith("date_")
44 | and isinstance(value, str)
45 | ):
46 | yield json_value, key, value
47 | else:
48 | yield from find_all_dates(value)
49 | elif isinstance(json_value, list):
50 | for value in json_value:
51 | yield from find_all_dates(value)
52 |
53 |
54 | def date_matches_format(date_string: str, format: str) -> bool:
55 | """
56 | Returns True if `date_string` can be parsed as a datetime
57 | using `format`, False otherwise.
58 | """
59 | try:
60 | datetime.strptime(date_string, format)
61 | return True
62 | except ValueError:
63 | return False
64 |
65 |
66 | def date_matches_any_format(date_string: str, formats: Iterable[str]) -> bool:
67 | """
68 | Returns True if `date_string` can be parsed as a datetime
69 | with any of the `formats`, False otherwise.
70 | """
71 | return any(date_matches_format(date_string, fmt) for fmt in formats)
72 |
73 |
74 | def reformat_date(s: str, /, orig_fmt: str) -> str:
75 | """
76 | Reformat a date to one of my desired formats.
77 | """
78 | if "%Z" in orig_fmt:
79 | d = datetime.strptime(s, orig_fmt)
80 | else:
81 | d = datetime.strptime(s.replace("Z", "+0000"), orig_fmt.replace("Z", "%z"))
82 | d = d.replace(microsecond=0)
83 | if d.tzinfo is None:
84 | d = d.replace(tzinfo=timezone.utc)
85 | else:
86 | d = d.astimezone(tz=timezone.utc)
87 | return d.strftime("%Y-%m-%dT%H:%M:%S%z").replace("+0000", "Z")
88 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | ## v21 - 2025-12-22
4 |
5 | Add a method `dates.now()` to return the current time in the timestamp used by all my static sites.
6 |
7 | ## v20 - 2025-12-10
8 |
9 | Use concurrency in `test_no_videos_are_av1`, which can make it faster for
10 | larger media collections.
11 |
12 | ## v19 - 2025-12-09
13 |
14 | Allow passing both `width` and `height` as part of `ThumbnailConfig`, to constrain a thumbnail to a bounding box.
15 |
16 | ## v18 - 2025-12-08
17 |
18 | Expose `get_tint_colour()` as a public function from `chives.media`.
19 |
20 | ## v17 - 2025-12-07
21 |
22 | Account for [EXIF orientation](https://alexwlchan.net/til/2024/photos-can-have-orientation-in-exif/) when getting the width/height of image entities.
23 |
24 | ## v16 - 2025-12-06
25 |
26 | Don't require defining `list_tags_in_metadata()` in projects that don't use tags.
27 |
28 | ## v15 - 2025-12-06
29 |
30 | Fix a bunch of lints from ruff; remove an unused dependency.
31 |
32 | ## v14 - 2025-12-06
33 |
34 | Improve the error message on failed assertions in `StaticSiteTestSuite`.
35 |
36 | ## v13 - 2025-12-06
37 |
38 | Mark a couple more folders/files as ignored in `StaticSiteTestSuite`.
39 |
40 | ## v12 - 2025-12-06
41 |
42 | Add checks for fuzzy tag matching to `StaticSiteTestSuite`.
43 |
44 | ## v11 - 2025-12-06
45 |
46 | Add a new class `StaticSiteTestSuite` which runs my standard set of tests for a static site, e.g. checking every file is saved, checking timestamps use the correct format.
47 |
48 | ## v10 - 2025-12-05
49 |
50 | Add a new `is_url_safe()` function for checking if a path can be safely used in a URL.
51 |
52 | ## v9 - 2025-12-05
53 |
54 | This adds three models to `chives.media`: `ImageEntity`, `VideoEntity`, and `ImageEntity`.
55 | These have all the information I need to show an image/video in a web page.
56 |
57 | It also includes functions `create_image_entity` and `create_video_entity` which construct instances of these models.
58 |
59 | ## v8 - 2025-12-04
60 |
61 | Add the `is_mastodon_host()` function.
62 |
63 | ## v7 - 2025-12-03
64 |
65 | Add the `parse_tumblr_post_url()` function.
66 |
67 | ## v6 - 2025-12-03
68 |
69 | Add the `parse_mastodon_post_url()` function.
70 |
71 | ## v5 - 2025-12-01
72 |
73 | When calling `reformat_date()`, ensure all dates are converted to UTC.
74 |
75 | ## v4 - 2025-11-29
76 |
77 | Rename `chives.timestamps` to `chives.dates`.
78 |
79 | ## v3 - 2025-11-29
80 |
81 | Add the `clean_youtube_url()` function and `urls` extra.
82 | Rearrange the package structure slightly, to allow optional dependencies.
83 |
84 | ## v2 - 2025-11-28
85 |
86 | Add the `is_av1_video()` function for [detecting AV1-encoded videos](https://alexwlchan.net/2025/detecting-av1-videos/).
87 |
88 | ## v1 - 2025-11-28
89 |
90 | Initial release. Included functions:
91 |
92 | * `date_matches_any_format`
93 | * `date_matches_format`
94 | * `find_all_dates`
95 | * `reformat_date`
96 |
--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv pip compile dev_requirements.in --output-file dev_requirements.txt
3 | -e file:.
4 | # via -r dev_requirements.in
5 | annotated-types==0.7.0
6 | # via pydantic
7 | anyio==4.12.0
8 | # via httpx
9 | build==1.3.0
10 | # via -r dev_requirements.in
11 | certifi==2025.11.12
12 | # via
13 | # httpcore
14 | # httpx
15 | # requests
16 | charset-normalizer==3.4.4
17 | # via requests
18 | coverage==7.12.0
19 | # via pytest-cov
20 | docutils==0.22.3
21 | # via readme-renderer
22 | h11==0.16.0
23 | # via httpcore
24 | httpcore==1.0.9
25 | # via httpx
26 | httpx==0.28.1
27 | # via alexwlchan-chives
28 | hyperlink==21.0.0
29 | # via alexwlchan-chives
30 | id==1.5.0
31 | # via twine
32 | idna==3.11
33 | # via
34 | # anyio
35 | # httpx
36 | # hyperlink
37 | # requests
38 | iniconfig==2.3.0
39 | # via pytest
40 | jaraco-classes==3.4.0
41 | # via keyring
42 | jaraco-context==6.0.1
43 | # via keyring
44 | jaraco-functools==4.3.0
45 | # via keyring
46 | javascript-data-files==1.4.1
47 | # via alexwlchan-chives
48 | keyring==25.7.0
49 | # via twine
50 | librt==0.7.3
51 | # via mypy
52 | markdown-it-py==4.0.0
53 | # via rich
54 | mdurl==0.1.2
55 | # via markdown-it-py
56 | more-itertools==10.8.0
57 | # via
58 | # jaraco-classes
59 | # jaraco-functools
60 | mypy==1.19.0
61 | # via -r dev_requirements.in
62 | mypy-extensions==1.1.0
63 | # via mypy
64 | nh3==0.3.2
65 | # via readme-renderer
66 | packaging==25.0
67 | # via
68 | # build
69 | # pytest
70 | # twine
71 | pathspec==0.12.1
72 | # via mypy
73 | pillow==12.0.0
74 | # via alexwlchan-chives
75 | pluggy==1.6.0
76 | # via
77 | # pytest
78 | # pytest-cov
79 | pydantic==2.12.5
80 | # via javascript-data-files
81 | pydantic-core==2.41.5
82 | # via pydantic
83 | pygments==2.19.2
84 | # via
85 | # pytest
86 | # readme-renderer
87 | # rich
88 | pyproject-hooks==1.2.0
89 | # via build
90 | pytest==9.0.2
91 | # via
92 | # alexwlchan-chives
93 | # pytest-cov
94 | # pytest-vcr
95 | # silver-nitrate
96 | pytest-cov==7.0.0
97 | # via -r dev_requirements.in
98 | pytest-vcr==1.0.2
99 | # via silver-nitrate
100 | pyyaml==6.0.3
101 | # via vcrpy
102 | rapidfuzz==3.14.3
103 | # via alexwlchan-chives
104 | readme-renderer==44.0
105 | # via twine
106 | requests==2.32.5
107 | # via
108 | # id
109 | # requests-toolbelt
110 | # twine
111 | requests-toolbelt==1.0.0
112 | # via twine
113 | rfc3986==2.0.0
114 | # via twine
115 | rich==14.2.0
116 | # via twine
117 | ruff==0.14.8
118 | # via -r dev_requirements.in
119 | silver-nitrate==1.8.1
120 | # via -r dev_requirements.in
121 | twine==6.2.0
122 | # via -r dev_requirements.in
123 | typing-extensions==4.15.0
124 | # via
125 | # mypy
126 | # pydantic
127 | # pydantic-core
128 | # typing-inspection
129 | typing-inspection==0.4.2
130 | # via pydantic
131 | urllib3==2.6.0
132 | # via
133 | # requests
134 | # twine
135 | vcrpy==8.0.0
136 | # via pytest-vcr
137 | wrapt==2.0.1
138 | # via vcrpy
139 |
--------------------------------------------------------------------------------
/src/chives/urls.py:
--------------------------------------------------------------------------------
1 | """Code for manipulating and tidying URLs."""
2 |
3 | from pathlib import Path
4 | import re
5 |
6 |
7 | __all__ = [
8 | "clean_youtube_url",
9 | "is_mastodon_host",
10 | "is_url_safe",
11 | "parse_mastodon_post_url",
12 | "parse_tumblr_post_url",
13 | ]
14 |
15 |
16 | def clean_youtube_url(url: str) -> str:
17 | """
18 | Remove any query parameters from a YouTube URL that I don't
19 | want to include.
20 | """
21 | import hyperlink
22 |
23 | u = hyperlink.parse(url)
24 |
25 | u = u.remove("list")
26 | u = u.remove("index")
27 | u = u.remove("t")
28 |
29 | return str(u)
30 |
31 |
32 | def is_mastodon_host(hostname: str) -> bool:
33 | """
34 | Check if a hostname is a Mastodon server.
35 | """
36 | if hostname in {
37 | "hachyderm.io",
38 | "iconfactory.world",
39 | "mas.to",
40 | "mastodon.social",
41 | "social.alexwlchan.net",
42 | }:
43 | return True
44 |
45 | # See https://github.com/mastodon/mastodon/discussions/30547
46 | #
47 | # Fist we look at /.well-known/nodeinfo, which returns a response
48 | # like this for Mastodon servers:
49 | #
50 | # {
51 | # "links": [
52 | # {
53 | # "rel": "http://nodeinfo.diaspora.software/ns/schema/2.0",
54 | # "href": "https://mastodon.online/nodeinfo/2.0"
55 | # }
56 | # ]
57 | # }
58 | #
59 | import httpx
60 |
61 | nodeinfo_resp = httpx.get(f"https://{hostname}/.well-known/nodeinfo")
62 | try:
63 | nodeinfo_resp.raise_for_status()
64 | except Exception:
65 | return False
66 |
67 | # Then we try to call $.links[0].href, which should return something
68 | # like:
69 | #
70 | # {
71 | # "version": "2.0",
72 | # "software": {"name": "mastodon", "version": "4.5.2"},
73 | # …
74 | #
75 | try:
76 | href = nodeinfo_resp.json()["links"][0]["href"]
77 | except (KeyError, IndexError): # pragma: no cover
78 | return False
79 |
80 | link_resp = httpx.get(href)
81 | try:
82 | link_resp.raise_for_status()
83 | except Exception: # pragma: no cover
84 | return False
85 |
86 | try:
87 | return bool(link_resp.json()["software"]["name"] == "mastodon")
88 | except (KeyError, IndexError): # pragma: no cover
89 | return False
90 |
91 |
92 | def parse_mastodon_post_url(url: str) -> tuple[str, str, str]:
93 | """
94 | Parse a Mastodon post URL into its component parts:
95 | server, account, post ID.
96 | """
97 | import hyperlink
98 |
99 | u = hyperlink.parse(url)
100 |
101 | if len(u.path) != 2:
102 | raise ValueError("Cannot parse Mastodon URL!")
103 |
104 | if not u.path[0].startswith("@"):
105 | raise ValueError("Cannot find `acct` in Mastodon URL!")
106 |
107 | if not re.fullmatch(r"^[0-9]+$", u.path[1]):
108 | raise ValueError("Mastodon post ID is not numeric!")
109 |
110 | if u.host == "social.alexwlchan.net":
111 | _, acct, server = u.path[0].split("@")
112 | else:
113 | server = u.host
114 | acct = u.path[0].replace("@", "")
115 |
116 | return server, acct, u.path[1]
117 |
118 |
119 | def parse_tumblr_post_url(url: str) -> tuple[str, str]:
120 | """
121 | Parse a Tumblr URL into its component parts.
122 |
123 | Returns a tuple (blog_identifier, post ID).
124 | """
125 | import hyperlink
126 |
127 | u = hyperlink.parse(url)
128 |
129 | if u.host == "www.tumblr.com":
130 | return u.path[0], u.path[1]
131 |
132 | if u.host.endswith(".tumblr.com") and len(u.path) >= 3 and u.path[0] == "post":
133 | return u.host.replace(".tumblr.com", ""), u.path[1]
134 |
135 | raise ValueError("Cannot parse Tumblr URL!") # pragma: no cover
136 |
137 |
138 | def is_url_safe(path: str | Path) -> bool:
139 | """
140 | Returns True if a path is safe to use in a URL, False otherwise.
141 | """
142 | p = str(path)
143 | return not ("?" in p or "#" in p or "%" in p)
144 |
--------------------------------------------------------------------------------
/tests/fixtures/cassettes/TestIsMastodonHost.test_mastodon_servers[social.jvns.ca].yml:
--------------------------------------------------------------------------------
1 | interactions:
2 | - request:
3 | body: ''
4 | headers:
5 | Accept:
6 | - '*/*'
7 | Accept-Encoding:
8 | - gzip, deflate
9 | Connection:
10 | - keep-alive
11 | Host:
12 | - social.jvns.ca
13 | User-Agent:
14 | - python-httpx/0.28.1
15 | method: GET
16 | uri: https://social.jvns.ca/.well-known/nodeinfo
17 | response:
18 | body:
19 | string: '{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://social.jvns.ca/nodeinfo/2.0"}]}'
20 | headers:
21 | Connection:
22 | - keep-alive
23 | Content-Type:
24 | - application/json; charset=utf-8
25 | Date:
26 | - Thu, 04 Dec 2025 12:14:49 GMT
27 | Strict-Transport-Security:
28 | - max-age=31536000
29 | Transfer-Encoding:
30 | - chunked
31 | cache-control:
32 | - max-age=259200, public
33 | content-length:
34 | - '114'
35 | content-security-policy:
36 | - 'base-uri ''none''; default-src ''none''; frame-ancestors ''none''; font-src
37 | ''self'' https://social.jvns.ca; img-src ''self'' data: blob: https://social.jvns.ca
38 | https://cdn.masto.host; media-src ''self'' data: https://social.jvns.ca https://cdn.masto.host;
39 | manifest-src ''self'' https://social.jvns.ca; form-action ''self''; child-src
40 | ''self'' blob: https://social.jvns.ca; worker-src ''self'' blob: https://social.jvns.ca;
41 | connect-src ''self'' data: blob: https://social.jvns.ca https://cdn.masto.host
42 | wss://social.jvns.ca; script-src ''self'' https://social.jvns.ca ''wasm-unsafe-eval'';
43 | frame-src ''self'' https:; style-src ''self'' https://social.jvns.ca ''nonce-cYXJpX/juTVw0Sc+MAA7BQ=='''
44 | etag:
45 | - W/"41981c7ccfa1674c1535b6eea835d7e5"
46 | referrer-policy:
47 | - same-origin
48 | server:
49 | - Mastodon
50 | vary:
51 | - Origin
52 | x-content-type-options:
53 | - nosniff
54 | x-frame-options:
55 | - DENY
56 | x-request-id:
57 | - d4888141-9cb3-4305-b5dd-a8b0842a2d05
58 | x-runtime:
59 | - '0.003398'
60 | x-xss-protection:
61 | - '0'
62 | status:
63 | code: 200
64 | message: OK
65 | - request:
66 | body: ''
67 | headers:
68 | Accept:
69 | - '*/*'
70 | Accept-Encoding:
71 | - gzip, deflate
72 | Connection:
73 | - keep-alive
74 | Host:
75 | - social.jvns.ca
76 | User-Agent:
77 | - python-httpx/0.28.1
78 | method: GET
79 | uri: https://social.jvns.ca/nodeinfo/2.0
80 | response:
81 | body:
82 | string: '{"version":"2.0","software":{"name":"mastodon","version":"4.5.2"},"protocols":["activitypub"],"services":{"outbound":[],"inbound":[]},"usage":{"users":{"total":4,"activeMonth":4,"activeHalfyear":4},"localPosts":7409},"openRegistrations":false,"metadata":{"nodeName":"Mastodon","nodeDescription":""}}'
83 | headers:
84 | Connection:
85 | - keep-alive
86 | Content-Type:
87 | - application/json; charset=utf-8
88 | Date:
89 | - Thu, 04 Dec 2025 12:14:49 GMT
90 | Strict-Transport-Security:
91 | - max-age=31536000
92 | Transfer-Encoding:
93 | - chunked
94 | cache-control:
95 | - max-age=1800, public
96 | content-length:
97 | - '299'
98 | content-security-policy:
99 | - 'base-uri ''none''; default-src ''none''; frame-ancestors ''none''; font-src
100 | ''self'' https://social.jvns.ca; img-src ''self'' data: blob: https://social.jvns.ca
101 | https://cdn.masto.host; media-src ''self'' data: https://social.jvns.ca https://cdn.masto.host;
102 | manifest-src ''self'' https://social.jvns.ca; form-action ''self''; child-src
103 | ''self'' blob: https://social.jvns.ca; worker-src ''self'' blob: https://social.jvns.ca;
104 | connect-src ''self'' data: blob: https://social.jvns.ca https://cdn.masto.host
105 | wss://social.jvns.ca; script-src ''self'' https://social.jvns.ca ''wasm-unsafe-eval'';
106 | frame-src ''self'' https:; style-src ''self'' https://social.jvns.ca ''nonce-KudwCpfFUyr8bIzc9hLCuA=='''
107 | etag:
108 | - W/"def238b77fc5db88a115321ee60e49e7"
109 | referrer-policy:
110 | - same-origin
111 | server:
112 | - Mastodon
113 | vary:
114 | - Accept, Origin
115 | x-content-type-options:
116 | - nosniff
117 | x-frame-options:
118 | - DENY
119 | x-request-id:
120 | - 3ef92cee-53b7-41e1-b1f1-5b9b094c5616
121 | x-runtime:
122 | - '0.008559'
123 | x-xss-protection:
124 | - '0'
125 | status:
126 | code: 200
127 | message: OK
128 | version: 1
129 |
--------------------------------------------------------------------------------
/tests/test_urls.py:
--------------------------------------------------------------------------------
1 | """Tests for `chives.urls`."""
2 |
3 | from pathlib import Path
4 |
5 | import pytest
6 | from vcr.cassette import Cassette
7 |
8 | from chives.urls import (
9 | clean_youtube_url,
10 | is_mastodon_host,
11 | is_url_safe,
12 | parse_mastodon_post_url,
13 | parse_tumblr_post_url,
14 | )
15 |
16 |
17 | @pytest.mark.parametrize(
18 | "url, cleaned_url",
19 | [
20 | (
21 | "https://www.youtube.com/watch?v=2OHPPSew2nY&list=WL&index=6&t=193s",
22 | "https://www.youtube.com/watch?v=2OHPPSew2nY",
23 | ),
24 | (
25 | "https://www.youtube.com/watch?v=2OHPPSew2nY",
26 | "https://www.youtube.com/watch?v=2OHPPSew2nY",
27 | ),
28 | ],
29 | )
30 | def test_clean_youtube_url(url: str, cleaned_url: str) -> None:
31 | """
32 | All the query parameters get stripped from YouTube URLs correctly.
33 | """
34 | assert clean_youtube_url(url) == cleaned_url
35 |
36 |
37 | @pytest.mark.parametrize(
38 | "url, server, acct, post_id",
39 | [
40 | (
41 | "https://iconfactory.world/@Iconfactory/115650922400392083",
42 | "iconfactory.world",
43 | "Iconfactory",
44 | "115650922400392083",
45 | ),
46 | (
47 | "https://social.alexwlchan.net/@chris__martin@functional.cafe/113369395383537892",
48 | "functional.cafe",
49 | "chris__martin",
50 | "113369395383537892",
51 | ),
52 | ],
53 | )
54 | def test_parse_mastodon_post_url(
55 | url: str, server: str, acct: str, post_id: str
56 | ) -> None:
57 | """
58 | Mastodon post URLs are parsed correctly.
59 | """
60 | assert parse_mastodon_post_url(url) == (server, acct, post_id)
61 |
62 |
63 | @pytest.mark.parametrize(
64 | "url, error",
65 | [
66 | ("https://mastodon.social/", "Cannot parse Mastodon URL"),
67 | ("https://mastodon.social/about", "Cannot parse Mastodon URL"),
68 | ("https://mastodon.social/about/subdir", "Cannot find `acct`"),
69 | ("https://mastodon.social/@example/about", "Mastodon post ID is not numeric"),
70 | ],
71 | )
72 | def test_parse_mastodon_post_url_errors(url: str, error: str) -> None:
73 | """
74 | parse_mastodon_post_url returns a useful error if it can't parse the URL.
75 | """
76 | with pytest.raises(ValueError, match=error):
77 | parse_mastodon_post_url(url)
78 |
79 |
80 | @pytest.mark.parametrize(
81 | "url, blog_identifier, post_id",
82 | [
83 | (
84 | "https://www.tumblr.com/kynvillingur/792473255236796416/",
85 | "kynvillingur",
86 | "792473255236796416",
87 | ),
88 | (
89 | "https://cut3panda.tumblr.com/post/94093772689/for-some-people-the-more-you-get-to-know-them",
90 | "cut3panda",
91 | "94093772689",
92 | ),
93 | ],
94 | )
95 | def test_parse_tumblr_post_url(url: str, blog_identifier: str, post_id: str) -> None:
96 | """
97 | Tumblr URLs are parsed correctly.
98 | """
99 | assert parse_tumblr_post_url(url) == (blog_identifier, post_id)
100 |
101 |
102 | class TestIsMastodonHost:
103 | """
104 | Tests for `is_mastodon_host`.
105 | """
106 |
107 | @pytest.mark.parametrize(
108 | "host", ["mastodon.social", "hachyderm.io", "social.jvns.ca"]
109 | )
110 | def test_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None:
111 | """
112 | It correctly identifies real Mastodon servers.
113 | """
114 | assert is_mastodon_host(host)
115 |
116 | @pytest.mark.parametrize(
117 | "host",
118 | [
119 | # These are regular Internet websites which don't expose
120 | # the /.well-known/nodeinfo endpoint
121 | "example.com",
122 | "alexwlchan.net",
123 | #
124 | # PeerTube exposes /.well-known/nodeinfo, but it's running
125 | # different software.
126 | "peertube.tv",
127 | ],
128 | )
129 | def test_non_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None:
130 | """
131 | Other websites are not Mastodon servers.
132 | """
133 | assert not is_mastodon_host(host)
134 |
135 |
136 | class TestIsUrlSafe:
137 | """
138 | Tests for `is_url_safe`.
139 | """
140 |
141 | @pytest.mark.parametrize("path", ["example.txt", Path("a/b/cat.jpg")])
142 | def test_safe(self, path: str | Path) -> None:
143 | """Paths which are URL safe."""
144 | assert is_url_safe(path)
145 |
146 | @pytest.mark.parametrize("path", ["is it?", Path("cat%c.jpg"), "a#b"])
147 | def test_unsafe(self, path: str | Path) -> None:
148 | """Paths which are not URL safe."""
149 | assert not is_url_safe(path)
150 |
--------------------------------------------------------------------------------
/tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[peertube.tv].yml:
--------------------------------------------------------------------------------
1 | interactions:
2 | - request:
3 | body: ''
4 | headers:
5 | Accept:
6 | - '*/*'
7 | Accept-Encoding:
8 | - gzip, deflate
9 | Connection:
10 | - keep-alive
11 | Host:
12 | - peertube.tv
13 | User-Agent:
14 | - python-httpx/0.28.1
15 | method: GET
16 | uri: https://peertube.tv/.well-known/nodeinfo
17 | response:
18 | body:
19 | string: '{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://peertube.tv/nodeinfo/2.0.json"}]}'
20 | headers:
21 | Connection:
22 | - keep-alive
23 | Content-Length:
24 | - '116'
25 | Content-Type:
26 | - application/json; charset=utf-8
27 | Date:
28 | - Thu, 04 Dec 2025 12:17:46 GMT
29 | Server:
30 | - nginx/1.18.0 (Ubuntu)
31 | access-control-allow-origin:
32 | - '*'
33 | cache-control:
34 | - max-age=548
35 | etag:
36 | - W/"74-uYd/TxZEF87Urak29pxyd08PwVE"
37 | tk:
38 | - N
39 | x-frame-options:
40 | - DENY
41 | x-powered-by:
42 | - PeerTube
43 | status:
44 | code: 200
45 | message: OK
46 | - request:
47 | body: ''
48 | headers:
49 | Accept:
50 | - '*/*'
51 | Accept-Encoding:
52 | - gzip, deflate
53 | Connection:
54 | - keep-alive
55 | Host:
56 | - peertube.tv
57 | User-Agent:
58 | - python-httpx/0.28.1
59 | method: GET
60 | uri: https://peertube.tv/nodeinfo/2.0.json
61 | response:
62 | body:
63 | string: '{"version":"2.0","software":{"name":"peertube","version":"5.2.0"},"protocols":["activitypub"],"services":{"inbound":[],"outbound":["atom1.0","rss2.0"]},"openRegistrations":false,"usage":{"users":{"total":609,"activeMonth":8,"activeHalfyear":35},"localPosts":18598,"localComments":93},"metadata":{"taxonomy":{"postsName":"Videos"},"nodeName":"PeerTube.TV","nodeDescription":"Videos
64 | sharing & live streaming on free open source software PeerTube! No ads, no
65 | tracking, no spam.","nodeConfig":{"search":{"remoteUri":{"users":true,"anonymous":false}},"plugin":{"registered":[{"npmName":"peertube-plugin-upload-instructions","name":"upload-instructions","version":"0.1.1","description":"Show
66 | an instructions modal right before uploading","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-custom-links","name":"custom-links","version":"0.0.10","description":"PeerTube
67 | plugin that allows you to add custom links on the bottom of the menu","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-glavliiit","name":"glavliiit","version":"0.0.10","description":"Enhanced
68 | moderation tool for PeerTube","clientScripts":{}},{"npmName":"peertube-plugin-categories","name":"categories","version":"1.2.7","description":"Manage
69 | video categories.","clientScripts":{"src/client/admin-plugin-settings.js":{"script":"src/client/admin-plugin-settings.js","scopes":["admin-plugin"]}}},{"npmName":"peertube-plugin-creative-commons","name":"creative-commons","version":"1.2.0","description":"Standardized
70 | display of Creative Commons licenses. Uses short identifiers like CC BY-SA
71 | 4.0 instead of descriptive text.","clientScripts":{"client/video-watch-client-plugin.js":{"script":"client/video-watch-client-plugin.js","scopes":["video-watch"]}}},{"npmName":"peertube-plugin-social-sharing-rus","name":"social-sharing-rus","version":"0.11.0","description":"Share
72 | a video or playlist URL on social media (Mastodon, WordPress, reddit, Twitter,
73 | etc.)","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-menu-items","name":"menu-items","version":"0.0.4","description":"PeerTube
74 | plugin menu-items","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-chapters","name":"chapters","version":"1.1.3","description":"PeerTube
75 | chapter plugin","clientScripts":{"dist/client/video-watch-client-plugin.js":{"script":"dist/client/video-watch-client-plugin.js","scopes":["video-watch","embed"]},"dist/client/video-edit-client-plugin.js":{"script":"dist/client/video-edit-client-plugin.js","scopes":["video-edit"]}}},{"npmName":"peertube-plugin-simplelogo","name":"simplelogo","version":"0.0.5","description":"Plugin
76 | that let you change logo and favicon on your PeerTube instance.","clientScripts":{"client/common-client-plugin.js":{"script":"client/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-video-annotation","name":"video-annotation","version":"0.0.7","description":"PeerTube
77 | plugin video annotation","clientScripts":{"dist/embed-client-plugin.js":{"script":"dist/embed-client-plugin.js","scopes":["embed"]},"dist/video-edit-client-plugin.js":{"script":"dist/video-edit-client-plugin.js","scopes":["video-edit"]},"dist/video-watch-client-plugin.js":{"script":"dist/video-watch-client-plugin.js","scopes":["video-watch"]}}},{"npmName":"peertube-plugin-livechat","name":"livechat","version":"7.2.1","description":"PeerTube
78 | plugin livechat: offers a way to embed a chat system into Peertube.","clientScripts":{"dist/client/videowatch-client-plugin.js":{"script":"dist/client/videowatch-client-plugin.js","scopes":["video-watch"]},"dist/client/common-client-plugin.js":{"script":"dist/client/common-client-plugin.js","scopes":["common"]},"dist/client/admin-plugin-client-plugin.js":{"script":"dist/client/admin-plugin-client-plugin.js","scopes":["admin-plugin"]}}}]},"theme":{"registered":[{"npmName":"peertube-theme-dark-evolution","name":"dark-evolution","version":"1.0.4","description":"Evolution
79 | of the official PeerTube dark theme","css":["assets/style.css"],"clientScripts":{}},{"npmName":"peertube-theme-dark","name":"dark","version":"2.5.0","description":"PeerTube
80 | dark theme","css":["assets/style.css"],"clientScripts":{}}],"default":"dark-evolution"},"email":{"enabled":true},"contactForm":{"enabled":true},"transcoding":{"hls":{"enabled":true},"webtorrent":{"enabled":true},"enabledResolutions":[144,240,360,480,720,1080]},"live":{"enabled":true,"transcoding":{"enabled":true,"enabledResolutions":[144,480,720,1080]}},"import":{"videos":{"http":{"enabled":true},"torrent":{"enabled":false}}},"autoBlacklist":{"videos":{"ofUsers":{"enabled":false}}},"avatar":{"file":{"size":{"max":4194304},"extensions":[".png",".jpeg",".jpg",".gif",".webp"]}},"video":{"image":{"extensions":[".png",".jpg",".jpeg",".webp"],"size":{"max":4194304}},"file":{"extensions":[".webm",".ogv",".ogg",".mp4",".mkv",".mov",".qt",".mqv",".m4v",".flv",".f4v",".wmv",".avi",".3gp",".3gpp",".3g2",".3gpp2",".nut",".mts",".m2ts",".mpv",".m2v",".m1v",".mpg",".mpe",".mpeg",".vob",".mxf",".mp3",".wma",".wav",".flac",".aac",".m4a",".ac3"]}},"videoCaption":{"file":{"size":{"max":20971520},"extensions":[".vtt",".srt"]}},"user":{"videoQuota":53687091200,"videoQuotaDaily":5368709120},"trending":{"videos":{"intervalDays":7}},"tracker":{"enabled":true}}}}'
81 | headers:
82 | Access-Control-Allow-Origin:
83 | - '*'
84 | Connection:
85 | - keep-alive
86 | Content-Length:
87 | - '5567'
88 | Content-Type:
89 | - application/json; charset=utf-8; profile="http://nodeinfo.diaspora.software/ns/schema/2.0#"
90 | Date:
91 | - Thu, 04 Dec 2025 12:17:47 GMT
92 | ETag:
93 | - W/"15bf-UHcLfIV97HliD7E2eKuWJsf3iEQ"
94 | Server:
95 | - nginx/1.18.0 (Ubuntu)
96 | Tk:
97 | - N
98 | X-Frame-Options:
99 | - DENY
100 | cache-control:
101 | - max-age=600
102 | x-powered-by:
103 | - PeerTube
104 | status:
105 | code: 200
106 | message: OK
107 | version: 1
108 |
--------------------------------------------------------------------------------
/src/chives/static_site_tests.py:
--------------------------------------------------------------------------------
1 | """
2 | Defines a set of common tests and test helpers used for all my static sites.
3 | """
4 |
5 | from abc import ABC, abstractmethod
6 | import collections
7 | from collections.abc import Iterator
8 | import concurrent.futures
9 | import glob
10 | import itertools
11 | import os
12 | from pathlib import Path
13 | import subprocess
14 | from typing import TypeVar
15 |
16 | import pytest
17 | from rapidfuzz import fuzz
18 |
19 | from chives.dates import date_matches_any_format, find_all_dates
20 | from chives.media import is_av1_video
21 | from chives.urls import is_url_safe
22 |
23 |
24 | T = TypeVar("T")
25 |
26 |
27 | class StaticSiteTestSuite[M](ABC):
28 | """
29 | Defines a base set of tests to run against any of my static sites.
30 |
31 | This should be subclassed as a Test* class, which allows you to use
32 | the fixtures and write site-specific tests.
33 | """
34 |
35 | @abstractmethod
36 | @pytest.fixture
37 | def site_root(self) -> Path:
38 | """
39 | Returns the path to the folder at the root of the site.
40 | """
41 | ...
42 |
43 | @abstractmethod
44 | @pytest.fixture
45 | def metadata(self, site_root: Path) -> M:
46 | """
47 | Returns all the metadata for this project.
48 | """
49 | ...
50 |
51 | @abstractmethod
52 | def list_paths_in_metadata(self, metadata: M) -> set[Path]:
53 | """
54 | Returns a set of paths described in the metadata.
55 | """
56 | ...
57 |
58 | def list_tags_in_metadata(self, metadata: M) -> Iterator[str]: # pragma: no cover
59 | """
60 | Returns all the tags used in the metadata, once for every usage.
61 |
62 | For example, if three documents use the same tag, the tag will
63 | be returned three times.
64 |
65 | This method should be overriden in projects that use keyword tags;
66 | there are some sites that don't use tags.
67 | """
68 | yield from []
69 |
70 | def test_no_uncommitted_git_changes(self, site_root: Path) -> None:
71 | """
72 | There are no changes which haven't been committed to Git.
73 |
74 | This is especially useful when I run a script that tests all
75 | my static sites, that none of them have unsaved changes.
76 | """
77 | rc = subprocess.call(["git", "diff", "--exit-code", "--quiet"], cwd=site_root)
78 |
79 | assert rc == 0, "There are uncommitted changes!"
80 |
81 | def list_paths_saved_locally(self, site_root: Path) -> set[Path]:
82 | """
83 | Returns a set of paths saved locally.
84 | """
85 | paths_saved_locally = set()
86 |
87 | for root, _, filenames in site_root.walk():
88 | # Ignore certain top-level folders I don't care about.
89 | try:
90 | top_level_folder = root.relative_to(site_root).parts[0]
91 | except IndexError:
92 | pass
93 | else:
94 | if top_level_folder in {
95 | ".git",
96 | ".mypy_cache",
97 | ".pytest_cache",
98 | ".ruff_cache",
99 | ".venv",
100 | "data",
101 | "scripts",
102 | "static",
103 | "tests",
104 | "viewer",
105 | }:
106 | continue
107 |
108 | for f in filenames:
109 | if f == ".DS_Store":
110 | continue
111 |
112 | if root == site_root and f in {
113 | "Icon\r",
114 | ".gitignore",
115 | "index.html",
116 | "README.md",
117 | "TODO.md",
118 | }:
119 | continue
120 |
121 | if root == site_root and f.endswith(".js"):
122 | continue
123 |
124 | paths_saved_locally.add((root / f).relative_to(site_root))
125 |
126 | return paths_saved_locally
127 |
128 | def test_every_file_in_metadata_is_saved_locally(
129 | self, metadata: M, site_root: Path
130 | ) -> None:
131 | """
132 | Every file described in the metadata is saved locally.
133 | """
134 | paths_in_metadata = self.list_paths_in_metadata(metadata)
135 | paths_saved_locally = self.list_paths_saved_locally(site_root)
136 |
137 | assert paths_in_metadata - paths_saved_locally == set(), (
138 | f"Paths in metadata not saved locally: "
139 | f"{paths_in_metadata - paths_saved_locally}"
140 | )
141 |
142 | def test_every_local_file_is_in_metadata(
143 | self, metadata: M, site_root: Path
144 | ) -> None:
145 | """
146 | Every file saved locally is described in the metadata.
147 | """
148 | paths_in_metadata = self.list_paths_in_metadata(metadata)
149 | paths_saved_locally = self.list_paths_saved_locally(site_root)
150 |
151 | assert paths_saved_locally - paths_in_metadata == set(), (
152 | f"Paths saved locally not in metadata: "
153 | f"{paths_saved_locally - paths_in_metadata}"
154 | )
155 |
156 | def test_every_path_is_url_safe(self, site_root: Path) -> None:
157 | """
158 | Every path has a URL-safe path.
159 | """
160 | bad_paths = set()
161 |
162 | for root, _, filenames in site_root.walk():
163 | for f in filenames:
164 | p = site_root / root / f
165 | if not is_url_safe(p):
166 | bad_paths.add(p)
167 |
168 | assert bad_paths == set(), f"Found paths which aren't URL safe: {bad_paths}"
169 |
170 | @pytest.mark.skipif("SKIP_AV1" in os.environ, reason="skip slow test")
171 | def test_no_videos_are_av1(self, site_root: Path) -> None:
172 | """
173 | No videos are encoded in AV1 (which doesn't play on my iPhone).
174 |
175 | This test can be removed when I upgrade all my devices to ones with
176 | hardware AV1 decoding support.
177 |
178 | See https://alexwlchan.net/2025/av1-on-my-iphone/
179 | """
180 | av1_videos = set()
181 |
182 | with concurrent.futures.ThreadPoolExecutor() as executor:
183 | futures = {
184 | executor.submit(is_av1_video, site_root / p): p
185 | for p in glob.glob("**/*.mp4", root_dir=site_root, recursive=True)
186 | }
187 |
188 | concurrent.futures.wait(futures)
189 |
190 | av1_videos = {p for fut, p in futures.items() if fut.result()}
191 |
192 | assert av1_videos == set(), f"Found videos encoded with AV1: {av1_videos}"
193 |
194 | date_formats = [
195 | "%Y-%m-%dT%H:%M:%SZ",
196 | "%Y-%m-%d",
197 | ]
198 |
199 | def test_all_timestamps_are_consistent(self, metadata: M) -> None:
200 | """
201 | All the timestamps in my JSON use a consistent format.
202 |
203 | See https://alexwlchan.net/2025/messy-dates-in-json/
204 | """
205 | bad_date_strings = {
206 | date_string
207 | for _, _, date_string in find_all_dates(metadata)
208 | if not date_matches_any_format(date_string, self.date_formats)
209 | }
210 |
211 | assert bad_date_strings == set(), (
212 | f"Found incorrectly-formatted dates: {bad_date_strings}"
213 | )
214 |
215 | @staticmethod
216 | def find_similar_pairs(tags: dict[str, int]) -> Iterator[tuple[str, str]]:
217 | """
218 | Find pairs of similar-looking tags in the collection `tags`.
219 | """
220 | for t1, t2 in itertools.combinations(sorted(tags), 2):
221 | if fuzz.ratio(t1, t2) > 80:
222 | yield (t1, t2)
223 |
224 | known_similar_tags: set[tuple[str, str]] = set()
225 |
226 | def test_no_similar_tags(self, metadata: M) -> None:
227 | """
228 | There are no similar/misspelt tags.
229 | """
230 | tags = collections.Counter(self.list_tags_in_metadata(metadata))
231 |
232 | similar_tags = [
233 | f"{t1} ({tags[t1]}) / {t2} ({tags[t2]})"
234 | for t1, t2 in self.find_similar_pairs(tags)
235 | if (t1, t2) not in self.known_similar_tags
236 | ]
237 |
238 | assert similar_tags == [], f"Found similar tags: {similar_tags}"
239 |
--------------------------------------------------------------------------------
/tests/test_static_site_tests.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for `chives.static_site_tests`.
3 | """
4 |
5 | from collections.abc import Iterator
6 | from pathlib import Path
7 | import shutil
8 | import subprocess
9 | from typing import Any, TypeVar
10 |
11 | import pytest
12 |
13 | from chives import dates
14 | from chives.static_site_tests import StaticSiteTestSuite
15 |
16 |
17 | M = TypeVar("M")
18 |
19 |
20 | @pytest.fixture
21 | def site_root(tmp_path: Path) -> Path:
22 | """
23 | Return a temp directory to use as a site root.
24 | """
25 | return tmp_path
26 |
27 |
28 | def create_test_suite[M](
29 | site_root: Path,
30 | metadata: M,
31 | *,
32 | paths_in_metadata: set[Path] | None = None,
33 | tags_in_metadata: set[str] | None = None,
34 | ) -> StaticSiteTestSuite[M]:
35 | """
36 | Create a new instance of StaticSiteTestSuite with the hard-coded data
37 | provided.
38 | """
39 |
40 | class TestSuite(StaticSiteTestSuite[M]):
41 | def site_root(self) -> Path: # pragma: no cover
42 | return site_root
43 |
44 | def metadata(self, site_root: Path) -> M: # pragma: no cover
45 | return metadata
46 |
47 | def list_paths_in_metadata(self, metadata: M) -> set[Path]:
48 | return paths_in_metadata or set()
49 |
50 | def list_tags_in_metadata(self, metadata: M) -> Iterator[str]:
51 | yield from (tags_in_metadata or set())
52 |
53 | return TestSuite()
54 |
55 |
56 | def test_paths_saved_locally_match_metadata(site_root: Path) -> None:
57 | """
58 | The tests check that the set of paths saved locally match the metadata.
59 | """
60 | # Create a series of paths in tmp_path.
61 | for filename in [
62 | "index.html",
63 | "metadata.js",
64 | "media/cat.jpg",
65 | "media/dog.png",
66 | "media/emu.gif",
67 | "viewer/index.html",
68 | ".DS_Store",
69 | ]:
70 | p = site_root / filename
71 | p.parent.mkdir(exist_ok=True)
72 | p.write_text("test")
73 |
74 | metadata = [Path("media/cat.jpg"), Path("media/dog.png"), Path("media/emu.gif")]
75 |
76 | t = create_test_suite(site_root, metadata, paths_in_metadata=set(metadata))
77 | t.test_every_file_in_metadata_is_saved_locally(metadata, site_root)
78 | t.test_every_local_file_is_in_metadata(metadata, site_root)
79 |
80 | # Add a new file locally, and check the test starts failing.
81 | (site_root / "media/fish.tiff").write_text("test")
82 |
83 | with pytest.raises(AssertionError):
84 | t.test_every_local_file_is_in_metadata(metadata, site_root)
85 |
86 | (site_root / "media/fish.tiff").unlink()
87 |
88 | # Delete one of the local files, and check the test starts failing.
89 | (site_root / "media/cat.jpg").unlink()
90 |
91 | with pytest.raises(AssertionError):
92 | t.test_every_file_in_metadata_is_saved_locally(metadata, site_root)
93 |
94 |
95 | def test_checks_for_git_changes(site_root: Path) -> None:
96 | """
97 | The tests check that there are no uncommitted Git changes.
98 | """
99 | t = create_test_suite(site_root, metadata=[1, 2, 3])
100 |
101 | # Initially this should fail, because there isn't a Git repo in
102 | # the folder.
103 | with pytest.raises(AssertionError):
104 | t.test_no_uncommitted_git_changes(site_root)
105 |
106 | # Create a Git repo, add a file, and commit it.
107 | (site_root / "README.md").write_text("hello world")
108 | subprocess.check_call(["git", "init"], cwd=site_root)
109 | subprocess.check_call(["git", "add", "README.md"], cwd=site_root)
110 | subprocess.check_call(["git", "commit", "-m", "initial commit"], cwd=site_root)
111 |
112 | # Check there are no uncommitted Git changes
113 | t.test_no_uncommitted_git_changes(site_root)
114 |
115 | # Make a new change, and check it's spotted
116 | (site_root / "README.md").write_text("a different hello world")
117 |
118 | with pytest.raises(AssertionError):
119 | t.test_no_uncommitted_git_changes(site_root)
120 |
121 |
122 | def test_checks_for_url_safe_paths(site_root: Path) -> None:
123 | """
124 | The tests check for URL-safe paths.
125 | """
126 | t = create_test_suite(site_root, metadata=[1, 2, 3])
127 |
128 | # This should pass trivially when the site is empty.
129 | t.test_every_path_is_url_safe(site_root)
130 |
131 | # Now write some files with URL-safe names, and check it's still okay.
132 | for filename in [
133 | "index.html",
134 | "metadata.js",
135 | ".DS_Store",
136 | ]:
137 | (site_root / filename).write_text("test")
138 |
139 | t.test_every_path_is_url_safe(site_root)
140 |
141 | # Write another file with a URL-unsafe name, and check it's caught
142 | # by the test.
143 | (site_root / "a#b#c").write_text("test")
144 |
145 | with pytest.raises(AssertionError):
146 | t.test_every_path_is_url_safe(site_root)
147 |
148 |
149 | def test_checks_for_av1_videos(site_root: Path) -> None:
150 | """
151 | The tests check for AV1-encoded videos.
152 | """
153 | t = create_test_suite(site_root, metadata=[1, 2, 3])
154 |
155 | # This should pass trivially when the site is empty.
156 | t.test_no_videos_are_av1(site_root)
157 |
158 | # Copy in an H.264-encoded video, and check it's not flagged.
159 | shutil.copyfile(
160 | "tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4",
161 | site_root / "Sintel_360_10s_1MB_H264.mp4",
162 | )
163 | t.test_no_videos_are_av1(site_root)
164 |
165 | # Copy in an AV1-encoded video, and check it's caught by the test
166 | shutil.copyfile(
167 | "tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4",
168 | site_root / "Sintel_360_10s_1MB_AV1.mp4",
169 | )
170 | with pytest.raises(AssertionError):
171 | t.test_no_videos_are_av1(site_root)
172 |
173 |
174 | class TestAllTimestampsAreConsistent:
175 | """
176 | Tests for the `test_all_timestamps_are_consistent` method.
177 | """
178 |
179 | @pytest.mark.parametrize(
180 | "metadata",
181 | [
182 | {"date_saved": "2025-12-06"},
183 | {"date_saved": dates.now()},
184 | ],
185 | )
186 | def test_allows_correct_date_formats(self, site_root: Path, metadata: Any) -> None:
187 | """
188 | The tests pass if all the dates are in the correct format.
189 | """
190 | t = create_test_suite(site_root, metadata)
191 | t.test_all_timestamps_are_consistent(metadata)
192 |
193 | @pytest.mark.parametrize("metadata", [{"date_saved": "AAAA-BB-CC"}])
194 | def test_rejects_incorrect_date_formats(
195 | self, site_root: Path, metadata: Any
196 | ) -> None:
197 | """
198 | The tests fail if the metadata has inconsistent date formats.
199 | """
200 | t = create_test_suite(site_root, metadata)
201 | with pytest.raises(AssertionError):
202 | t.test_all_timestamps_are_consistent(metadata)
203 |
204 | def test_can_override_date_formats(self, site_root: Path) -> None:
205 | """
206 | A previously-blocked date format is allowed if you add it to
207 | the `date_formats` list.
208 | """
209 | metadata = {"date_saved": "2025"}
210 | t = create_test_suite(site_root, metadata)
211 |
212 | # It fails with the default settings
213 | with pytest.raises(AssertionError):
214 | t.test_all_timestamps_are_consistent(metadata)
215 |
216 | # It passes if we add the format to `date_formats`
217 | t.date_formats.append("%Y")
218 | t.test_all_timestamps_are_consistent(metadata)
219 |
220 |
221 | def test_checks_for_similar_tags(site_root: Path) -> None:
222 | """
223 | The tests check for similar and misspelt tags.
224 | """
225 | metadata = [1, 2, 3]
226 |
227 | # Check a site with distinct tags.
228 | t1 = create_test_suite(
229 | site_root, metadata, tags_in_metadata={"red", "green", "blue"}
230 | )
231 | t1.test_no_similar_tags(metadata)
232 |
233 | # Check a site with similar tags.
234 | t2 = create_test_suite(
235 | site_root, metadata, tags_in_metadata={"red robot", "rod robot", "rid robot"}
236 | )
237 | with pytest.raises(AssertionError):
238 | t2.test_no_similar_tags(metadata)
239 |
240 | # Check a site with similar tags, but marked as known-similar.
241 | t3 = create_test_suite(
242 | site_root,
243 | metadata,
244 | tags_in_metadata={"red robot", "rod robot", "green", "blue"},
245 | )
246 | t3.known_similar_tags = {("red robot", "rod robot")}
247 | t3.test_no_similar_tags(metadata)
248 |
--------------------------------------------------------------------------------
/src/chives/media.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for interacting with images/videos.
3 |
4 | Dependencies:
5 | * ffprobe
6 | * https://github.com/alexwlchan/create_thumbnail
7 | * https://github.com/alexwlchan/dominant_colours
8 | * https://github.com/alexwlchan/get_live_text
9 |
10 | References:
11 | * https://alexwlchan.net/2021/dominant-colours/
12 | * https://alexwlchan.net/2025/detecting-av1-videos/
13 | * https://stackoverflow.com/a/58567453
14 |
15 | """
16 |
17 | from fractions import Fraction
18 | import json
19 | from pathlib import Path
20 | import subprocess
21 | from typing import Literal, NotRequired, TypedDict, TYPE_CHECKING
22 |
23 | if TYPE_CHECKING:
24 | import PIL
25 |
26 |
27 | __all__ = [
28 | "create_image_entity",
29 | "create_video_entity",
30 | "get_media_paths",
31 | "get_tint_colour",
32 | "is_av1_video",
33 | "ImageEntity",
34 | "MediaEntity",
35 | "VideoEntity",
36 | ]
37 |
38 |
39 | def is_av1_video(path: str | Path) -> bool:
40 | """
41 | Returns True if a video is encoded with AV1, False otherwise.
42 | """
43 | # fmt: off
44 | cmd = [
45 | "ffprobe",
46 | #
47 | # Set the logging level
48 | "-loglevel", "error",
49 | #
50 | # Select the first video stream
51 | "-select_streams", "v:0",
52 | #
53 | # Print the codec_name (e.g. av1)
54 | "-show_entries", "stream=codec_name",
55 | #
56 | # Print just the value
57 | "-output_format", "default=noprint_wrappers=1:nokey=1",
58 | #
59 | # Name of the video to check
60 | str(path),
61 | ]
62 | # fmt: on
63 |
64 | output = subprocess.check_output(cmd, text=True)
65 |
66 | return output.strip() == "av1"
67 |
68 |
69 | class ImageEntity(TypedDict):
70 | """
71 | ImageEntity contains all the fields I need to render an image
72 | in a web page.
73 | """
74 |
75 | type: Literal["image"]
76 |
77 | # The path to the image on disk
78 | path: str
79 |
80 | # The path to a low-resolution thumbnail
81 | thumbnail_path: NotRequired[str]
82 |
83 | # The display resolution of the image
84 | width: int
85 | height: int
86 |
87 | # A hex-encoded colour which is prominent in this image.
88 | tint_colour: str
89 |
90 | # Whether the image is animated (GIF and WebP only)
91 | is_animated: NotRequired[Literal[True]]
92 |
93 | # Whether the image has transparent pixels
94 | has_transparency: NotRequired[Literal[True]]
95 |
96 | # The alt text of the image, if available
97 | alt_text: NotRequired[str]
98 |
99 | # The source URL of the image, if available
100 | source_url: NotRequired[str]
101 |
102 |
103 | class VideoEntity(TypedDict):
104 | """
105 | VideoEntity contains all the fields I need to render a video
106 | in a web page.
107 | """
108 |
109 | type: Literal["video"]
110 |
111 | # The path to the video on disk
112 | path: str
113 |
114 | # The poster image for the video
115 | poster: ImageEntity
116 |
117 | # The display resolution of the video
118 | width: int
119 | height: int
120 |
121 | # The duration of the video, as an HOURS:MM:SS.MICROSECONDS string
122 | duration: str
123 |
124 | # Path to the subtitles for the video, if available
125 | subtitles_path: NotRequired[str]
126 |
127 | # The source URL of the image, if available
128 | source_url: NotRequired[str]
129 |
130 | # Whether the video should play automatically. This is used for
131 | # videos that are substituting for animated GIFs.
132 | autoplay: NotRequired[Literal[True]]
133 |
134 |
135 | MediaEntity = ImageEntity | VideoEntity
136 |
137 |
138 | def get_media_paths(e: MediaEntity) -> set[Path]:
139 | """
140 | Returns a list of all media paths represented by this media entity.
141 | """
142 | result: set[str | Path] = set()
143 |
144 | try:
145 | e["type"]
146 | except KeyError:
147 | raise TypeError(f"Entity does not have a type: {e}")
148 |
149 | if e["type"] == "video":
150 | result.add(e["path"])
151 | try:
152 | result.add(e["subtitles_path"])
153 | except KeyError:
154 | pass
155 | for p in get_media_paths(e["poster"]):
156 | result.add(p)
157 | elif e["type"] == "image":
158 | result.add(e["path"])
159 | try:
160 | result.add(e["thumbnail_path"])
161 | except KeyError:
162 | pass
163 | else:
164 | raise TypeError(f"Unrecognised entity type: {e['type']}")
165 |
166 | return {Path(p) for p in result}
167 |
168 |
169 | class ThumbnailConfig(TypedDict):
170 | out_dir: Path | str
171 | width: NotRequired[int]
172 | height: NotRequired[int]
173 |
174 |
175 | def create_image_entity(
176 | path: str | Path,
177 | *,
178 | background: str = "#ffffff",
179 | alt_text: str | None = None,
180 | source_url: str | None = None,
181 | thumbnail_config: ThumbnailConfig | None = None,
182 | generate_transcript: bool = False,
183 | ) -> ImageEntity:
184 | """
185 | Create an ImageEntity for a saved image.
186 | """
187 | from PIL import Image, ImageOps
188 |
189 | with Image.open(path) as im:
190 | # Account for EXIF orientation in the dimensions.
191 | # See https://alexwlchan.net/til/2024/photos-can-have-orientation-in-exif/
192 | transposed_im = ImageOps.exif_transpose(im)
193 |
194 | entity: ImageEntity = {
195 | "type": "image",
196 | "path": str(path),
197 | "tint_colour": get_tint_colour(path, background=background),
198 | "width": transposed_im.width,
199 | "height": transposed_im.height,
200 | }
201 |
202 | if _is_animated(im):
203 | entity["is_animated"] = True
204 |
205 | if _has_transparency(im):
206 | entity["has_transparency"] = True
207 |
208 | if thumbnail_config is not None:
209 | entity["thumbnail_path"] = _create_thumbnail(path, thumbnail_config)
210 |
211 | if alt_text is not None and generate_transcript:
212 | raise TypeError("You cannot set alt_text and generate_transcript=True!")
213 |
214 | elif alt_text is not None:
215 | entity["alt_text"] = alt_text
216 | elif generate_transcript:
217 | transcript = _get_transcript(path)
218 | if transcript is not None:
219 | entity["alt_text"] = transcript
220 |
221 | if source_url is not None:
222 | entity["source_url"] = source_url
223 |
224 | return entity
225 |
226 |
227 | def create_video_entity(
228 | video_path: str | Path,
229 | *,
230 | poster_path: str | Path,
231 | subtitles_path: str | Path | None = None,
232 | source_url: str | None = None,
233 | autoplay: bool = False,
234 | thumbnail_config: ThumbnailConfig | None = None,
235 | background: str = "#ffffff",
236 | ) -> VideoEntity:
237 | """
238 | Create a video entity for files on disk.
239 | """
240 | width, height, duration = _get_video_data(video_path)
241 | poster = create_image_entity(
242 | poster_path, thumbnail_config=thumbnail_config, background=background
243 | )
244 |
245 | entity: VideoEntity = {
246 | "type": "video",
247 | "path": str(video_path),
248 | "width": width,
249 | "height": height,
250 | "duration": duration,
251 | "poster": poster,
252 | }
253 |
254 | if subtitles_path:
255 | entity["subtitles_path"] = str(subtitles_path)
256 |
257 | if source_url:
258 | entity["source_url"] = source_url
259 |
260 | if autoplay:
261 | entity["autoplay"] = autoplay
262 |
263 | return entity
264 |
265 |
266 | def _is_animated(im: "PIL.Image.Image") -> bool:
267 | """
268 | Returns True if an image is animated, False otherwise.
269 | """
270 | return getattr(im, "is_animated", False)
271 |
272 |
273 | def _has_transparency(im: "PIL.Image.Image") -> bool:
274 | """
275 | Returns True if an image has transparent pixels, False otherwise.
276 |
277 | By Vinyl Da.i'gyu-Kazotetsu on Stack Overflow:
278 | https://stackoverflow.com/a/58567453
279 | """
280 | if im.info.get("transparency", None) is not None:
281 | return True
282 | if im.mode == "P":
283 | transparent = im.info.get("transparency", -1)
284 | for _, index in im.getcolors(): # type: ignore
285 | # TODO: Find an image that hits this branch, so I can
286 | # include it in the test suite.
287 | if index == transparent: # pragma: no cover
288 | return True
289 | elif im.mode == "RGBA":
290 | extrema = im.getextrema()
291 | if extrema[3][0] < 255: # type: ignore
292 | return True
293 | return False
294 |
295 |
296 | def get_tint_colour(path: str | Path, *, background: str) -> str:
297 | """
298 | Get the tint colour for an image.
299 | """
300 | if background == "white":
301 | background = "#ffffff"
302 | elif background == "black":
303 | background = "#000000"
304 |
305 | result = subprocess.check_output(
306 | ["dominant_colours", str(path), "--best-against-bg", background], text=True
307 | )
308 | return result.strip()
309 |
310 |
311 | def _get_transcript(path: str | Path) -> str | None:
312 | """
313 | Get the transcript for an image (if any).
314 | """
315 | result = subprocess.check_output(["get_live_text", str(path)], text=True)
316 |
317 | return result.strip() or None
318 |
319 |
320 | def _create_thumbnail(path: str | Path, thumbnail_config: ThumbnailConfig) -> str:
321 | """
322 | Create a thumbnail for an image and return the path.
323 | """
324 | cmd = ["create_thumbnail", str(path), "--out-dir", thumbnail_config["out_dir"]]
325 |
326 | if "width" in thumbnail_config:
327 | cmd.extend(["--width", str(thumbnail_config["width"])])
328 |
329 | if "height" in thumbnail_config:
330 | cmd.extend(["--height", str(thumbnail_config["height"])])
331 |
332 | return subprocess.check_output(cmd, text=True)
333 |
334 |
335 | def _get_video_data(video_path: str | Path) -> tuple[int, int, str]:
336 | """
337 | Returns the dimensions and duration of a video, as a width/height fraction.
338 | """
339 | cmd = [
340 | "ffprobe",
341 | #
342 | # verbosity level = error
343 | "-v",
344 | "error",
345 | #
346 | # only get information about the first video stream
347 | "-select_streams",
348 | "v:0",
349 | #
350 | # only gather the entries I'm interested in
351 | "-show_entries",
352 | "stream=width,height,sample_aspect_ratio,duration",
353 | #
354 | # print the duration in HH:MM:SS.microseconds format
355 | "-sexagesimal",
356 | #
357 | # print output in JSON, which is easier to parse
358 | "-print_format",
359 | "json",
360 | #
361 | # input file
362 | str(video_path),
363 | ]
364 |
365 | output = subprocess.check_output(cmd)
366 | ffprobe_resp = json.loads(output)
367 |
368 | # The output will be structured something like:
369 | #
370 | # {
371 | # "streams": [
372 | # {
373 | # "width": 1920,
374 | # "height": 1080,
375 | # "sample_aspect_ratio": "45:64"
376 | # }
377 | # ],
378 | # …
379 | # }
380 | #
381 | # If the video doesn't specify a pixel aspect ratio, then it won't
382 | # have a `sample_aspect_ratio` key.
383 | video_stream = ffprobe_resp["streams"][0]
384 |
385 | try:
386 | pixel_aspect_ratio = Fraction(
387 | video_stream["sample_aspect_ratio"].replace(":", "/")
388 | )
389 | except KeyError:
390 | pixel_aspect_ratio = Fraction(1)
391 |
392 | width = round(video_stream["width"] * pixel_aspect_ratio)
393 | height = video_stream["height"]
394 | duration = video_stream["duration"]
395 |
396 | return width, height, duration
397 |
--------------------------------------------------------------------------------
/tests/test_media.py:
--------------------------------------------------------------------------------
1 | """Tests for `chives.media`."""
2 |
3 | from pathlib import Path
4 | from typing import Any
5 |
6 | from PIL import Image
7 | import pytest
8 |
9 | from chives.media import (
10 | create_image_entity,
11 | create_video_entity,
12 | get_media_paths,
13 | is_av1_video,
14 | )
15 |
16 |
17 | @pytest.fixture
18 | def fixtures_dir() -> Path:
19 | """
20 | Returns the directory where media fixtures are stored.
21 | """
22 | return Path("tests/fixtures/media")
23 |
24 |
25 | def test_is_av1_video(fixtures_dir: Path) -> None:
26 | """is_av1_video correctly detects AV1 videos."""
27 | # These two videos were downloaded from
28 | # https://test-videos.co.uk/sintel/mp4-h264 and
29 | # https://test-videos.co.uk/sintel/mp4-av1
30 | assert not is_av1_video(fixtures_dir / "Sintel_360_10s_1MB_H264.mp4")
31 | assert is_av1_video(fixtures_dir / "Sintel_360_10s_1MB_AV1.mp4")
32 |
33 |
34 | class TestCreateImageEntity:
35 | """
36 | Tests for create_image_entity().
37 | """
38 |
39 | def test_basic_image(self, fixtures_dir: Path) -> None:
40 | """
41 | Get an image entity for a basic blue square.
42 | """
43 | entity = create_image_entity(fixtures_dir / "blue.png")
44 | assert entity == {
45 | "type": "image",
46 | "path": "tests/fixtures/media/blue.png",
47 | "width": 32,
48 | "height": 16,
49 | "tint_colour": "#0000ff",
50 | }
51 |
52 | @pytest.mark.parametrize(
53 | "filename",
54 | [
55 | # This is a solid blue image with a section in the middle deleted
56 | "blue_with_hole.png",
57 | #
58 | # An asteroid belt drawn in TikZ by TeX.SE user Qrrbrbirlbel,
59 | # which has `transparency` in its im.info.
60 | # Downloaded from http://tex.stackexchange.com/a/111974/9668
61 | "asteroid_belt.png",
62 | ],
63 | )
64 | def test_image_with_transparency(self, fixtures_dir: Path, filename: str) -> None:
65 | """
66 | If an image has transparent pixels, then the entity has
67 | `has_transparency=True`.
68 | """
69 | entity = create_image_entity(fixtures_dir / filename)
70 | assert entity["has_transparency"]
71 |
72 | @pytest.mark.parametrize(
73 | "filename",
74 | [
75 | "blue.png",
76 | "space.jpg",
77 | #
78 | # An animated electric field drawn in TikZ.
79 | # Downloaded from https://tex.stackexchange.com/a/158930/9668
80 | "electric_field.gif",
81 | ],
82 | )
83 | def test_image_without_transparency(
84 | self, fixtures_dir: Path, filename: str
85 | ) -> None:
86 | """
87 | If an image has no transparent pixels, then the entity doesn't
88 | have a `has_transparency` key.
89 | """
90 | entity = create_image_entity(fixtures_dir / filename)
91 | assert "has_transparency" not in entity
92 |
93 | # These test files were downloaded from Dave Perrett repo:
94 | # https://github.com/recurser/exif-orientation-examples
95 |
96 | @pytest.mark.parametrize(
97 | "filename",
98 | [
99 | "Landscape_0.jpg",
100 | "Landscape_1.jpg",
101 | "Landscape_2.jpg",
102 | "Landscape_3.jpg",
103 | "Landscape_4.jpg",
104 | "Landscape_5.jpg",
105 | "Landscape_6.jpg",
106 | "Landscape_7.jpg",
107 | "Landscape_8.jpg",
108 | ],
109 | )
110 | def test_accounts_for_exif_orientation(
111 | self, fixtures_dir: Path, filename: str
112 | ) -> None:
113 | """
114 | The dimensions are the display dimensions, which accounts for
115 | the EXIF orientation.
116 | """
117 | entity = create_image_entity(fixtures_dir / filename)
118 | assert (entity["width"], entity["height"]) == (1800, 1200)
119 |
120 | def test_animated_image(self, fixtures_dir: Path) -> None:
121 | """
122 | If an image is animated, the entity has `is_animated=True`.
123 | """
124 | # An animated electric field drawn in TikZ.
125 | # Downloaded from https://tex.stackexchange.com/a/158930/9668
126 | entity = create_image_entity(fixtures_dir / "electric_field.gif")
127 | assert entity["is_animated"]
128 |
129 | def test_other_attrs_are_forwarded(self, fixtures_dir: Path) -> None:
130 | """
131 | The `alt_text` and `source_url` values are forwarded to the
132 | final entity.
133 | """
134 | entity = create_image_entity(
135 | fixtures_dir / "blue.png",
136 | alt_text="This is the alt text",
137 | source_url="https://example.com/blue.png",
138 | )
139 |
140 | assert entity["alt_text"] == "This is the alt text"
141 | assert entity["source_url"] == "https://example.com/blue.png"
142 |
143 | def test_alt_text_and_generate_transcript_is_error(
144 | self, fixtures_dir: Path
145 | ) -> None:
146 | """
147 | You can't pass `alt_text` and `generate_transcript` at the same time.
148 | """
149 | with pytest.raises(TypeError):
150 | create_image_entity(
151 | fixtures_dir / "blue.png",
152 | alt_text="This is the alt text",
153 | generate_transcript=True,
154 | )
155 |
156 | def test_generate_transcript(self, fixtures_dir: Path) -> None:
157 | """
158 | If you pass `generate_transcript=True`, the image is OCR'd for alt text.
159 | """
160 | entity = create_image_entity(
161 | fixtures_dir / "underlined_text.png", generate_transcript=True
162 | )
163 | assert entity["alt_text"] == "I visited Berlin in Germany."
164 |
165 | def test_generate_transcript_if_no_text(self, fixtures_dir: Path) -> None:
166 | """
167 | If you pass `generate_transcript=True` for an image with no text,
168 | you don't get any alt text.
169 | """
170 | entity = create_image_entity(
171 | fixtures_dir / "blue.png", generate_transcript=True
172 | )
173 | assert "alt_text" not in entity
174 |
175 | def test_create_thumbnail_by_width(
176 | self, fixtures_dir: Path, tmp_path: Path
177 | ) -> None:
178 | """
179 | Create a thumbnail by width.
180 | """
181 | entity = create_image_entity(
182 | fixtures_dir / "blue.png",
183 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 10},
184 | )
185 |
186 | assert Path(entity["thumbnail_path"]).exists()
187 |
188 | with Image.open(entity["thumbnail_path"]) as im:
189 | assert im.width == 10
190 |
191 | def test_create_thumbnail_by_height(
192 | self, fixtures_dir: Path, tmp_path: Path
193 | ) -> None:
194 | """
195 | Create a thumbnail by height.
196 | """
197 | entity = create_image_entity(
198 | fixtures_dir / "blue.png",
199 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "height": 5},
200 | )
201 |
202 | assert Path(entity["thumbnail_path"]).exists()
203 |
204 | with Image.open(entity["thumbnail_path"]) as im:
205 | assert im.height == 5
206 |
207 | @pytest.mark.parametrize(
208 | "background, tint_colour",
209 | [
210 | ("white", "#005493"),
211 | ("black", "#b3fdff"),
212 | ("#111111", "#b3fdff"),
213 | ],
214 | )
215 | def test_tint_colour_is_based_on_background(
216 | self, fixtures_dir: Path, background: str, tint_colour: str
217 | ) -> None:
218 | """
219 | The tint colour is based to suit the background.
220 | """
221 | # This is a checkerboard pattern made of 2 different shades of
222 | # turquoise, a light and a dark.
223 | entity = create_image_entity(
224 | fixtures_dir / "checkerboard.png", background=background
225 | )
226 | assert entity["tint_colour"] == tint_colour
227 |
228 |
229 | class TestCreateVideoEntity:
230 | """
231 | Tests for `create_video_entity()`.
232 | """
233 |
234 | def test_basic_video(self, fixtures_dir: Path) -> None:
235 | """
236 | Get a video entity for a basic video.
237 | """
238 | # This video was downloaded from
239 | # https://test-videos.co.uk/sintel/mp4-h264
240 | entity = create_video_entity(
241 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
242 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
243 | )
244 | assert entity == {
245 | "type": "video",
246 | "path": "tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4",
247 | "width": 640,
248 | "height": 360,
249 | "duration": "0:00:10.000000",
250 | "poster": {
251 | "type": "image",
252 | "path": "tests/fixtures/media/Sintel_360_10s_1MB_H264.png",
253 | "tint_colour": "#020202",
254 | "width": 640,
255 | "height": 360,
256 | },
257 | }
258 |
259 | def test_other_attrs_are_forwarded(self, fixtures_dir: Path) -> None:
260 | """
261 | The `subtitles_path`, `source_url` and `autoplay` values are
262 | forwarded to the final entity.
263 | """
264 | entity = create_video_entity(
265 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
266 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
267 | subtitles_path=fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt",
268 | source_url="https://test-videos.co.uk/sintel/mp4-h264",
269 | autoplay=True,
270 | )
271 |
272 | assert (
273 | entity["subtitles_path"]
274 | == "tests/fixtures/media/Sintel_360_10s_1MB_H264.en.vtt"
275 | )
276 | assert entity["source_url"] == "https://test-videos.co.uk/sintel/mp4-h264"
277 | assert entity["autoplay"]
278 |
279 | def test_gets_display_dimensions(self, fixtures_dir: Path) -> None:
280 | """
281 | The width/height dimensions are based on the display aspect ratio,
282 | not the storage aspect ratio.
283 |
284 | See https://alexwlchan.net/2025/square-pixels/
285 | """
286 | # This is a short clip of https://www.youtube.com/watch?v=HHhyznZ2u4E
287 | entity = create_video_entity(
288 | fixtures_dir / "Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4",
289 | poster_path=fixtures_dir / "Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg",
290 | )
291 |
292 | assert entity["width"] == 1350
293 | assert entity["height"] == 1080
294 |
295 | def test_video_without_sample_aspect_ratio(self, fixtures_dir: Path) -> None:
296 | """
297 | Get the width/height dimensions of a video that doesn't have
298 | `sample_aspect_ratio` in its metadata.
299 | """
300 | # This is a short clip from Wings (1927).
301 | entity = create_video_entity(
302 | fixtures_dir / "wings_tracking_shot.mp4",
303 | poster_path=fixtures_dir / "wings_tracking_shot.jpg",
304 | )
305 |
306 | assert entity["width"] == 960
307 | assert entity["height"] == 720
308 |
309 | @pytest.mark.parametrize(
310 | "background, tint_colour",
311 | [
312 | ("white", "#005493"),
313 | ("black", "#b3fdff"),
314 | ("#111111", "#b3fdff"),
315 | ],
316 | )
317 | def test_tint_colour_is_based_on_background(
318 | self, fixtures_dir: Path, background: str, tint_colour: str
319 | ) -> None:
320 | """
321 | The tint colour is based to suit the background.
322 | """
323 | # The poster image is a checkerboard pattern made of 2 different
324 | # shades of turquoise, a light and a dark.
325 | entity = create_video_entity(
326 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
327 | poster_path=fixtures_dir / "checkerboard.png",
328 | background=background,
329 | )
330 | assert entity["poster"]["tint_colour"] == tint_colour
331 |
332 | def test_video_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None:
333 | """
334 | Create a low-resolution thumbnail of the poster image.
335 | """
336 | entity = create_video_entity(
337 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
338 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
339 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300},
340 | )
341 |
342 | assert entity["poster"]["thumbnail_path"] == str(
343 | tmp_path / "thumbnails/Sintel_360_10s_1MB_H264.png"
344 | )
345 | assert Path(entity["poster"]["thumbnail_path"]).exists()
346 |
347 |
348 | class TestGetMediaPaths:
349 | """
350 | Tests for `get_media_paths`.
351 | """
352 |
353 | def test_basic_image(self, fixtures_dir: Path) -> None:
354 | """
355 | An image with no thumbnail only has one path: the image.
356 | """
357 | entity = create_image_entity(fixtures_dir / "blue.png")
358 | assert get_media_paths(entity) == {fixtures_dir / "blue.png"}
359 |
360 | def test_image_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None:
361 | """
362 | An image with a thumbnail has two paths: the video and the
363 | thumbnail.
364 | """
365 | entity = create_image_entity(
366 | fixtures_dir / "blue.png",
367 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300},
368 | )
369 | assert get_media_paths(entity) == {
370 | fixtures_dir / "blue.png",
371 | tmp_path / "thumbnails/blue.png",
372 | }
373 |
374 | def test_video(self, fixtures_dir: Path) -> None:
375 | """
376 | A video has two paths: the video and the poster image.
377 | """
378 | entity = create_video_entity(
379 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
380 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
381 | )
382 | assert get_media_paths(entity) == {
383 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
384 | fixtures_dir / "Sintel_360_10s_1MB_H264.png",
385 | }
386 |
387 | def test_video_with_subtitles(self, fixtures_dir: Path) -> None:
388 | """
389 | A video with subtitles has three paths: the video, the subtitles,
390 | and the poster image.
391 | """
392 | entity = create_video_entity(
393 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
394 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
395 | subtitles_path=fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt",
396 | )
397 | assert get_media_paths(entity) == {
398 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
399 | fixtures_dir / "Sintel_360_10s_1MB_H264.png",
400 | fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt",
401 | }
402 |
403 | def test_video_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None:
404 | """
405 | A video with a poster thumbnail has three paths: the video,
406 | the poster image, and the poster thumbnail.
407 | """
408 | entity = create_video_entity(
409 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
410 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png",
411 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300},
412 | )
413 | assert get_media_paths(entity) == {
414 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4",
415 | fixtures_dir / "Sintel_360_10s_1MB_H264.png",
416 | tmp_path / "thumbnails/Sintel_360_10s_1MB_H264.png",
417 | }
418 |
419 | @pytest.mark.parametrize("bad_entity", [{}, {"type": "shape"}])
420 | def test_unrecognised_entity_is_error(self, bad_entity: Any) -> None:
421 | """
422 | Getting media paths for an unrecognised entity type is a TypeError.
423 | """
424 | with pytest.raises(TypeError):
425 | get_media_paths(bad_entity)
426 |
--------------------------------------------------------------------------------