├── src └── chives │ ├── py.typed │ ├── __init__.py │ ├── dates.py │ ├── urls.py │ ├── static_site_tests.py │ └── media.py ├── tests ├── stubs │ └── vcr.cassette.pyi ├── fixtures │ ├── media │ │ ├── blue.png │ │ ├── space.jpg │ │ ├── Landscape_0.jpg │ │ ├── Landscape_1.jpg │ │ ├── Landscape_2.jpg │ │ ├── Landscape_3.jpg │ │ ├── Landscape_4.jpg │ │ ├── Landscape_5.jpg │ │ ├── Landscape_6.jpg │ │ ├── Landscape_7.jpg │ │ ├── Landscape_8.jpg │ │ ├── checkerboard.png │ │ ├── asteroid_belt.png │ │ ├── asteroid_belt_P.png │ │ ├── blue_with_hole.png │ │ ├── electric_field.gif │ │ ├── underlined_text.png │ │ ├── wings_tracking_shot.jpg │ │ ├── wings_tracking_shot.mp4 │ │ ├── Sintel_360_10s_1MB_AV1.mp4 │ │ ├── Sintel_360_10s_1MB_H264.mp4 │ │ ├── Sintel_360_10s_1MB_H264.png │ │ ├── Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg │ │ └── Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4 │ └── cassettes │ │ ├── TestIsMastodonHost.test_non_mastodon_servers[alexwlchan.net].yml │ │ ├── TestIsMastodonHost.test_non_mastodon_servers[example.com].yml │ │ ├── TestIsMastodonHost.test_mastodon_servers[social.jvns.ca].yml │ │ └── TestIsMastodonHost.test_non_mastodon_servers[peertube.tv].yml ├── conftest.py ├── test_dates.py ├── test_urls.py ├── test_static_site_tests.py └── test_media.py ├── .gitignore ├── dev_requirements.in ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── LICENSE ├── CONTRIBUTING.md ├── pyproject.toml ├── README.md ├── CHANGELOG.md └── dev_requirements.txt /src/chives/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/stubs/vcr.cassette.pyi: -------------------------------------------------------------------------------- 1 | class Cassette: ... 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | .coverage 4 | dist 5 | -------------------------------------------------------------------------------- /tests/fixtures/media/blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/blue.png -------------------------------------------------------------------------------- /tests/fixtures/media/space.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/space.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_0.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_1.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_2.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_3.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_4.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_5.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_6.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_7.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Landscape_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Landscape_8.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/checkerboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/checkerboard.png -------------------------------------------------------------------------------- /tests/fixtures/media/asteroid_belt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/asteroid_belt.png -------------------------------------------------------------------------------- /tests/fixtures/media/asteroid_belt_P.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/asteroid_belt_P.png -------------------------------------------------------------------------------- /tests/fixtures/media/blue_with_hole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/blue_with_hole.png -------------------------------------------------------------------------------- /tests/fixtures/media/electric_field.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/electric_field.gif -------------------------------------------------------------------------------- /tests/fixtures/media/underlined_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/underlined_text.png -------------------------------------------------------------------------------- /tests/fixtures/media/wings_tracking_shot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/wings_tracking_shot.jpg -------------------------------------------------------------------------------- /tests/fixtures/media/wings_tracking_shot.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/wings_tracking_shot.mp4 -------------------------------------------------------------------------------- /tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4 -------------------------------------------------------------------------------- /dev_requirements.in: -------------------------------------------------------------------------------- 1 | -e file:.[media,static_site_tests,urls] 2 | 3 | build 4 | mypy 5 | pytest-cov 6 | ruff 7 | silver-nitrate[cassettes] 8 | twine 9 | -------------------------------------------------------------------------------- /tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4 -------------------------------------------------------------------------------- /tests/fixtures/media/Sintel_360_10s_1MB_H264.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Sintel_360_10s_1MB_H264.png -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | requirements.txt linguist-generated=true 2 | dev_requirements.txt linguist-generated=true 3 | 4 | tests/fixtures/cassettes/*.yml linguist-generated=true 5 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Shared helpers and test fixtures.""" 2 | 3 | from nitrate.cassettes import cassette_name, vcr_cassette 4 | 5 | __all__ = ["cassette_name", "vcr_cassette"] 6 | -------------------------------------------------------------------------------- /tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg -------------------------------------------------------------------------------- /tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/chives/main/tests/fixtures/media/Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4 -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | day: monday 8 | time: "09:00" 9 | - package-ecosystem: "pip" 10 | directory: "/" 11 | schedule: 12 | interval: weekly 13 | day: monday 14 | time: "09:00" 15 | -------------------------------------------------------------------------------- /src/chives/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | chives is a collection of Python functions for working with my local 3 | media archives. 4 | 5 | I store a lot of media archives as static websites [1][2], and I use 6 | Python scripts to manage my media. This package has some functions 7 | I share across multiple sites. 8 | 9 | [1]: https://alexwlchan.net/2024/static-websites/ 10 | [2]: https://alexwlchan.net/2025/mildly-dynamic-websites/ 11 | 12 | """ 13 | 14 | __version__ = "21" 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Alex Chan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 17 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 19 | OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | You can set up a local development environment by cloning the repo and installing dependencies: 4 | 5 | ```shell 6 | git clone https://github.com/alexwlchan/chives.git 7 | cd chives 8 | python3 -m venv .venv 9 | source .venv/bin/activate 10 | pip install -e . 11 | ``` 12 | 13 | If you want to run tests, install the dev dependencies and run the tests: 14 | 15 | ```shell 16 | # Activate the virtualenv and install dev dependencies 17 | source .venv/bin/activate 18 | pip install -r dev_requirements.txt 19 | 20 | # Check formatting 21 | ruff check . 22 | ruff format --check . 23 | 24 | # Check docstrings 25 | interrogate -vv 26 | 27 | # Check types 28 | mypy src tests 29 | 30 | # Run tests 31 | coverage run -m pytest tests 32 | coverage report 33 | ``` 34 | 35 | To make changes: 36 | 37 | 1. Create a new branch 38 | 2. Push your changes to GitHub 39 | 3. Open a pull request 40 | 4. Fix any issues flagged by GitHub Actions (including tests, code linting, and type checking) 41 | 6. Merge it! 42 | 43 | To create a new version on PyPI: 44 | 45 | 1. Update the version in `src/chives/__init__.py` 46 | 2. Add release notes in `CHANGELOG.md` and push a new tag to GitHub 47 | 3. Deploy the release using twine: 48 | 49 | ```console 50 | $ python3 -m build 51 | $ python3 -m twine upload dist/* --username=__token__ 52 | ``` 53 | 54 | You will need [a PyPI API token](https://pypi.org/help/#apitoken) to publish packages. 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools >= 65", 4 | ] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "alexwlchan-chives" 9 | description = "Utility functions for working with my local media archives" 10 | readme = "README.md" 11 | authors = [ 12 | {name = "Alex Chan", email = "alex@alexwlchan.net"}, 13 | ] 14 | maintainers = [ 15 | {name = "Alex Chan", email="alex@alexwlchan.net"}, 16 | ] 17 | classifiers = [ 18 | "Development Status :: 4 - Beta", 19 | "Programming Language :: Python :: 3.13", 20 | ] 21 | requires-python = ">=3.13" 22 | dependencies = [] 23 | dynamic = ["version"] 24 | license = "MIT" 25 | 26 | [project.optional-dependencies] 27 | media = ["Pillow"] 28 | static_site_tests = ["pytest", "rapidfuzz"] 29 | urls = ["httpx", "hyperlink"] 30 | 31 | [project.urls] 32 | "Homepage" = "https://github.com/alexwlchan/chives" 33 | "Changelog" = "https://github.com/alexwlchan/chives/blob/main/CHANGELOG.md" 34 | 35 | [tool.setuptools.dynamic] 36 | version = {attr = "chives.__version__"} 37 | 38 | [tool.setuptools.packages.find] 39 | where = ["src"] 40 | 41 | [tool.setuptools.package-data] 42 | nitrate = ["py.typed"] 43 | 44 | [tool.coverage.run] 45 | branch = true 46 | source = ["chives", "tests",] 47 | 48 | [tool.coverage.report] 49 | show_missing = true 50 | skip_covered = true 51 | fail_under = 100 52 | 53 | [tool.pytest.ini_options] 54 | filterwarnings = ["error"] 55 | 56 | [tool.mypy] 57 | mypy_path = "src" 58 | strict = true 59 | 60 | [tool.ruff.lint] 61 | select = ["D", "E", "F"] 62 | ignore = ["D200", "D203", "D204", "D205", "D212", "D401"] 63 | -------------------------------------------------------------------------------- /tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[alexwlchan.net].yml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '' 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | Host: 12 | - alexwlchan.net 13 | User-Agent: 14 | - python-httpx/0.28.1 15 | method: GET 16 | uri: https://alexwlchan.net/.well-known/nodeinfo 17 | response: 18 | body: 19 | string: '' 20 | headers: 21 | Alt-Svc: 22 | - h3=":443"; ma=2592000 23 | Content-Length: 24 | - '0' 25 | Content-Security-Policy: 26 | - 'default-src ''self'' ''unsafe-inline'' https://youtube-nocookie.com https://www.youtube-nocookie.com; 27 | script-src ''self'' ''unsafe-inline''; connect-src https://analytics.alexwlchan.net; 28 | img-src ''self'' ''unsafe-inline'' data:' 29 | Date: 30 | - Thu, 04 Dec 2025 12:15:34 GMT 31 | Location: 32 | - https://social.alexwlchan.net/.well-known/nodeinfo 33 | Permissions-Policy: 34 | - geolocation=(), midi=(), notifications=(), push=(), sync-xhr=(), microphone=(), 35 | camera=(), magnetometer=(), gyroscope=(), vibrate=(), payment=() 36 | Referrer-Policy: 37 | - no-referrer-when-downgrade 38 | Server: 39 | - Caddy 40 | Strict-Transport-Security: 41 | - max-age=31536000; includeSubDomains 42 | X-Content-Type-Options: 43 | - nosniff 44 | X-Frame-Options: 45 | - ALLOWALL 46 | X-Xss-Protection: 47 | - 1; mode=block 48 | status: 49 | code: 301 50 | message: Moved Permanently 51 | version: 1 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chives 2 | 3 | chives is a collection of Python functions for working with my local 4 | media archives. 5 | 6 | I store a lot of media archives as [static websites][static-sites], and I use Python scripts to manage the sites. 7 | This includes: 8 | 9 | * Verifying every file that's described in the metadata is stored correctly 10 | * Downloading pages from sites I want to bookmark 11 | * Checking the quality and consistency of my metadata 12 | 13 | This package has some functions I share across multiple archives/sites. 14 | 15 | [static-sites]: https://alexwlchan.net/2024/static-websites/ 16 | 17 | ## References 18 | 19 | I've written blog posts about some of the code in this repo: 20 | 21 | * [Cleaning up messy dates in JSON](https://alexwlchan.net/2025/messy-dates-in-json/) 22 | * [Detecting AV1-encoded videos with Python](https://alexwlchan.net/2025/detecting-av1-videos/) 23 | 24 | ## Versioning 25 | 26 | This library is monotically versioned. 27 | I'll try not to break anything between releases, but I make no guarantees of back-compatibility. 28 | 29 | I'm making this public because it's convenient for me, and you might find useful code here, but be aware this may not be entirely stable. 30 | 31 | ## Usage 32 | 33 | See the docstrings on individual functions for usage descriptions. 34 | 35 | ## Installation 36 | 37 | If you want to use this in your project, I recommend copying the relevant function and test into your codebase (with a link back to this repo). 38 | 39 | Alternatively, you can install the package from PyPI: 40 | 41 | ```console 42 | $ pip install alexwlchan-chives 43 | ``` 44 | 45 | ## Development 46 | 47 | If you want to make changes to the library, there are instructions in [CONTRIBUTING.md](./CONTRIBUTING.md). 48 | 49 | ## License 50 | 51 | MIT. 52 | -------------------------------------------------------------------------------- /tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[example.com].yml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '' 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | Host: 12 | - example.com 13 | User-Agent: 14 | - python-httpx/0.28.1 15 | method: GET 16 | uri: https://example.com/.well-known/nodeinfo 17 | response: 18 | body: 19 | string: 'Example Domain

Example 22 | Domain

This domain is for use in documentation examples without needing 23 | permission. Avoid use in operations.

Learn 24 | more

25 | 26 | ' 27 | headers: 28 | Accept-Ranges: 29 | - bytes 30 | Alt-Svc: 31 | - h3=":443"; ma=93600 32 | Cache-Control: 33 | - max-age=0, no-cache, no-store 34 | Connection: 35 | - keep-alive 36 | Content-Length: 37 | - '513' 38 | Content-Type: 39 | - text/html 40 | Date: 41 | - Thu, 04 Dec 2025 12:15:34 GMT 42 | ETag: 43 | - '"bc2473a18e003bdb249eba5ce893033f:1760028122.592274"' 44 | Expires: 45 | - Thu, 04 Dec 2025 12:15:34 GMT 46 | Last-Modified: 47 | - Thu, 09 Oct 2025 16:42:02 GMT 48 | Pragma: 49 | - no-cache 50 | Server: 51 | - AkamaiNetStorage 52 | status: 53 | code: 404 54 | message: Not Found 55 | version: 1 56 | -------------------------------------------------------------------------------- /tests/test_dates.py: -------------------------------------------------------------------------------- 1 | """Tests for `chives.dates`.""" 2 | 3 | import json 4 | 5 | import pytest 6 | 7 | from chives.dates import date_matches_any_format, find_all_dates, reformat_date 8 | 9 | 10 | def test_find_all_dates() -> None: 11 | """find_all_dates finds all the nested dates in a JSON object.""" 12 | json_value = json.loads("""{ 13 | "doc1": {"id": "1", "date_created": "2025-10-14T05:34:07+0000"}, 14 | "shapes": [ 15 | {"color": "blue", "date_saved": "2015-03-01 23:34:39 +00:00"}, 16 | {"color": "yellow", "date_saved": "2013-9-21 13:43:00Z", "is_square": true}, 17 | {"color": "green", "date_saved": null} 18 | ], 19 | "date_verified": "2024-08-30" 20 | }""") 21 | 22 | assert list(find_all_dates(json_value)) == [ 23 | ( 24 | {"id": "1", "date_created": "2025-10-14T05:34:07+0000"}, 25 | "date_created", 26 | "2025-10-14T05:34:07+0000", 27 | ), 28 | ( 29 | {"color": "blue", "date_saved": "2015-03-01 23:34:39 +00:00"}, 30 | "date_saved", 31 | "2015-03-01 23:34:39 +00:00", 32 | ), 33 | ( 34 | {"color": "yellow", "date_saved": "2013-9-21 13:43:00Z", "is_square": True}, 35 | "date_saved", 36 | "2013-9-21 13:43:00Z", 37 | ), 38 | (json_value, "date_verified", "2024-08-30"), 39 | ] 40 | 41 | 42 | def test_date_matches_any_format() -> None: 43 | """ 44 | Tests for `date_matches_any_format`. 45 | """ 46 | assert date_matches_any_format( 47 | "2001-01-01", formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S%z"] 48 | ) 49 | assert not date_matches_any_format("2001-01-01", formats=["%Y-%m-%dT%H:%M:%S%z"]) 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "s, orig_fmt, formatted_date", 54 | [ 55 | ("2025-11-12T15:34:39.570Z", "%Y-%m-%dT%H:%M:%S.%fZ", "2025-11-12T15:34:39Z"), 56 | ("2025-03-12 09:57:03", "%Y-%m-%d %H:%M:%S", "2025-03-12T09:57:03Z"), 57 | ("2016-02-25 05:28:35 GMT", "%Y-%m-%d %H:%M:%S %Z", "2016-02-25T05:28:35Z"), 58 | ("2011-12-06T10:45:15-08:00", "%Y-%m-%dT%H:%M:%S%z", "2011-12-06T18:45:15Z"), 59 | ], 60 | ) 61 | def test_reformat_date(s: str, orig_fmt: str, formatted_date: str) -> None: 62 | """Tests for `reformat_date`.""" 63 | assert reformat_date(s, orig_fmt) == formatted_date 64 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | test: 14 | runs-on: macos-latest 15 | strategy: 16 | matrix: 17 | python-version: ["3.13", "3.14"] 18 | 19 | steps: 20 | - uses: actions/checkout@v6 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v6 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | cache: pip 27 | 28 | - name: Install Python dependencies 29 | run: pip install -r dev_requirements.txt 30 | 31 | - name: Install create_thumbnail 32 | env: 33 | GH_TOKEN: ${{ github.token }} 34 | run: | 35 | gh release download \ 36 | --repo alexwlchan/create_thumbnail \ 37 | --pattern create_thumbnail-aarch64-apple-darwin.tar.gz \ 38 | --output create_thumbnail.tar.gz 39 | tar -xzf create_thumbnail.tar.gz --directory /usr/local/bin 40 | chmod +x /usr/local/bin/create_thumbnail 41 | which create_thumbnail 42 | 43 | - name: Install dominant_colours 44 | env: 45 | GH_TOKEN: ${{ github.token }} 46 | run: | 47 | gh release download \ 48 | --repo alexwlchan/dominant_colours \ 49 | --pattern dominant_colours-aarch64-apple-darwin.tar.gz \ 50 | --output dominant_colours.tar.gz 51 | tar -xzf dominant_colours.tar.gz --directory /usr/local/bin 52 | chmod +x /usr/local/bin/dominant_colours 53 | which dominant_colours 54 | 55 | - name: Install get_live_text 56 | env: 57 | GH_TOKEN: ${{ github.token }} 58 | run: | 59 | gh release download \ 60 | --repo alexwlchan/get_live_text \ 61 | --pattern get_live_text.aarch64-apple-darwin.zip \ 62 | --output get_live_text.tar.gz 63 | tar -xzf get_live_text.tar.gz --directory /usr/local/bin 64 | chmod +x /usr/local/bin/get_live_text 65 | which get_live_text 66 | 67 | - name: Install ffprobe 68 | run: | 69 | curl -O https://evermeet.cx/ffmpeg/ffprobe-8.0.1.7z 70 | tar -xzf ffprobe-8.0.1.7z --directory /usr/local/bin 71 | chmod +x /usr/local/bin/ffprobe 72 | which ffprobe 73 | 74 | - name: Check formatting 75 | run: | 76 | ruff check . 77 | ruff format --check . 78 | 79 | - name: Check types 80 | run: mypy src tests 81 | 82 | - name: Run tests 83 | run: | 84 | coverage run -m pytest tests 85 | coverage report 86 | -------------------------------------------------------------------------------- /src/chives/dates.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for interacting with timestamps and date strings. 3 | 4 | References: 5 | * https://alexwlchan.net/2025/messy-dates-in-json/ 6 | 7 | """ 8 | 9 | from collections.abc import Iterable, Iterator 10 | from datetime import datetime, timezone 11 | from typing import Any 12 | 13 | 14 | def now() -> str: 15 | """ 16 | Returns the current time in the standard format used by my static sites. 17 | """ 18 | return ( 19 | datetime.now(tz=timezone.utc) 20 | .replace(microsecond=0) 21 | .isoformat() 22 | .replace("+00:00", "Z") 23 | ) 24 | 25 | 26 | def find_all_dates(json_value: Any) -> Iterator[tuple[dict[str, Any], str, str]]: 27 | """ 28 | Find all the timestamps in a heavily nested JSON object. 29 | 30 | This function looks for any JSON objects with a key-value pair 31 | where the key starts with `date_` and the value is a string, and 32 | emits a 3-tuple: 33 | 34 | * the JSON object 35 | * the key 36 | * the value 37 | 38 | """ 39 | if isinstance(json_value, dict): 40 | for key, value in json_value.items(): 41 | if ( 42 | isinstance(key, str) 43 | and key.startswith("date_") 44 | and isinstance(value, str) 45 | ): 46 | yield json_value, key, value 47 | else: 48 | yield from find_all_dates(value) 49 | elif isinstance(json_value, list): 50 | for value in json_value: 51 | yield from find_all_dates(value) 52 | 53 | 54 | def date_matches_format(date_string: str, format: str) -> bool: 55 | """ 56 | Returns True if `date_string` can be parsed as a datetime 57 | using `format`, False otherwise. 58 | """ 59 | try: 60 | datetime.strptime(date_string, format) 61 | return True 62 | except ValueError: 63 | return False 64 | 65 | 66 | def date_matches_any_format(date_string: str, formats: Iterable[str]) -> bool: 67 | """ 68 | Returns True if `date_string` can be parsed as a datetime 69 | with any of the `formats`, False otherwise. 70 | """ 71 | return any(date_matches_format(date_string, fmt) for fmt in formats) 72 | 73 | 74 | def reformat_date(s: str, /, orig_fmt: str) -> str: 75 | """ 76 | Reformat a date to one of my desired formats. 77 | """ 78 | if "%Z" in orig_fmt: 79 | d = datetime.strptime(s, orig_fmt) 80 | else: 81 | d = datetime.strptime(s.replace("Z", "+0000"), orig_fmt.replace("Z", "%z")) 82 | d = d.replace(microsecond=0) 83 | if d.tzinfo is None: 84 | d = d.replace(tzinfo=timezone.utc) 85 | else: 86 | d = d.astimezone(tz=timezone.utc) 87 | return d.strftime("%Y-%m-%dT%H:%M:%S%z").replace("+0000", "Z") 88 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## v21 - 2025-12-22 4 | 5 | Add a method `dates.now()` to return the current time in the timestamp used by all my static sites. 6 | 7 | ## v20 - 2025-12-10 8 | 9 | Use concurrency in `test_no_videos_are_av1`, which can make it faster for 10 | larger media collections. 11 | 12 | ## v19 - 2025-12-09 13 | 14 | Allow passing both `width` and `height` as part of `ThumbnailConfig`, to constrain a thumbnail to a bounding box. 15 | 16 | ## v18 - 2025-12-08 17 | 18 | Expose `get_tint_colour()` as a public function from `chives.media`. 19 | 20 | ## v17 - 2025-12-07 21 | 22 | Account for [EXIF orientation](https://alexwlchan.net/til/2024/photos-can-have-orientation-in-exif/) when getting the width/height of image entities. 23 | 24 | ## v16 - 2025-12-06 25 | 26 | Don't require defining `list_tags_in_metadata()` in projects that don't use tags. 27 | 28 | ## v15 - 2025-12-06 29 | 30 | Fix a bunch of lints from ruff; remove an unused dependency. 31 | 32 | ## v14 - 2025-12-06 33 | 34 | Improve the error message on failed assertions in `StaticSiteTestSuite`. 35 | 36 | ## v13 - 2025-12-06 37 | 38 | Mark a couple more folders/files as ignored in `StaticSiteTestSuite`. 39 | 40 | ## v12 - 2025-12-06 41 | 42 | Add checks for fuzzy tag matching to `StaticSiteTestSuite`. 43 | 44 | ## v11 - 2025-12-06 45 | 46 | Add a new class `StaticSiteTestSuite` which runs my standard set of tests for a static site, e.g. checking every file is saved, checking timestamps use the correct format. 47 | 48 | ## v10 - 2025-12-05 49 | 50 | Add a new `is_url_safe()` function for checking if a path can be safely used in a URL. 51 | 52 | ## v9 - 2025-12-05 53 | 54 | This adds three models to `chives.media`: `ImageEntity`, `VideoEntity`, and `ImageEntity`. 55 | These have all the information I need to show an image/video in a web page. 56 | 57 | It also includes functions `create_image_entity` and `create_video_entity` which construct instances of these models. 58 | 59 | ## v8 - 2025-12-04 60 | 61 | Add the `is_mastodon_host()` function. 62 | 63 | ## v7 - 2025-12-03 64 | 65 | Add the `parse_tumblr_post_url()` function. 66 | 67 | ## v6 - 2025-12-03 68 | 69 | Add the `parse_mastodon_post_url()` function. 70 | 71 | ## v5 - 2025-12-01 72 | 73 | When calling `reformat_date()`, ensure all dates are converted to UTC. 74 | 75 | ## v4 - 2025-11-29 76 | 77 | Rename `chives.timestamps` to `chives.dates`. 78 | 79 | ## v3 - 2025-11-29 80 | 81 | Add the `clean_youtube_url()` function and `urls` extra. 82 | Rearrange the package structure slightly, to allow optional dependencies. 83 | 84 | ## v2 - 2025-11-28 85 | 86 | Add the `is_av1_video()` function for [detecting AV1-encoded videos](https://alexwlchan.net/2025/detecting-av1-videos/). 87 | 88 | ## v1 - 2025-11-28 89 | 90 | Initial release. Included functions: 91 | 92 | * `date_matches_any_format` 93 | * `date_matches_format` 94 | * `find_all_dates` 95 | * `reformat_date` 96 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile dev_requirements.in --output-file dev_requirements.txt 3 | -e file:. 4 | # via -r dev_requirements.in 5 | annotated-types==0.7.0 6 | # via pydantic 7 | anyio==4.12.0 8 | # via httpx 9 | build==1.3.0 10 | # via -r dev_requirements.in 11 | certifi==2025.11.12 12 | # via 13 | # httpcore 14 | # httpx 15 | # requests 16 | charset-normalizer==3.4.4 17 | # via requests 18 | coverage==7.12.0 19 | # via pytest-cov 20 | docutils==0.22.3 21 | # via readme-renderer 22 | h11==0.16.0 23 | # via httpcore 24 | httpcore==1.0.9 25 | # via httpx 26 | httpx==0.28.1 27 | # via alexwlchan-chives 28 | hyperlink==21.0.0 29 | # via alexwlchan-chives 30 | id==1.5.0 31 | # via twine 32 | idna==3.11 33 | # via 34 | # anyio 35 | # httpx 36 | # hyperlink 37 | # requests 38 | iniconfig==2.3.0 39 | # via pytest 40 | jaraco-classes==3.4.0 41 | # via keyring 42 | jaraco-context==6.0.1 43 | # via keyring 44 | jaraco-functools==4.3.0 45 | # via keyring 46 | javascript-data-files==1.4.1 47 | # via alexwlchan-chives 48 | keyring==25.7.0 49 | # via twine 50 | librt==0.7.3 51 | # via mypy 52 | markdown-it-py==4.0.0 53 | # via rich 54 | mdurl==0.1.2 55 | # via markdown-it-py 56 | more-itertools==10.8.0 57 | # via 58 | # jaraco-classes 59 | # jaraco-functools 60 | mypy==1.19.0 61 | # via -r dev_requirements.in 62 | mypy-extensions==1.1.0 63 | # via mypy 64 | nh3==0.3.2 65 | # via readme-renderer 66 | packaging==25.0 67 | # via 68 | # build 69 | # pytest 70 | # twine 71 | pathspec==0.12.1 72 | # via mypy 73 | pillow==12.0.0 74 | # via alexwlchan-chives 75 | pluggy==1.6.0 76 | # via 77 | # pytest 78 | # pytest-cov 79 | pydantic==2.12.5 80 | # via javascript-data-files 81 | pydantic-core==2.41.5 82 | # via pydantic 83 | pygments==2.19.2 84 | # via 85 | # pytest 86 | # readme-renderer 87 | # rich 88 | pyproject-hooks==1.2.0 89 | # via build 90 | pytest==9.0.2 91 | # via 92 | # alexwlchan-chives 93 | # pytest-cov 94 | # pytest-vcr 95 | # silver-nitrate 96 | pytest-cov==7.0.0 97 | # via -r dev_requirements.in 98 | pytest-vcr==1.0.2 99 | # via silver-nitrate 100 | pyyaml==6.0.3 101 | # via vcrpy 102 | rapidfuzz==3.14.3 103 | # via alexwlchan-chives 104 | readme-renderer==44.0 105 | # via twine 106 | requests==2.32.5 107 | # via 108 | # id 109 | # requests-toolbelt 110 | # twine 111 | requests-toolbelt==1.0.0 112 | # via twine 113 | rfc3986==2.0.0 114 | # via twine 115 | rich==14.2.0 116 | # via twine 117 | ruff==0.14.8 118 | # via -r dev_requirements.in 119 | silver-nitrate==1.8.1 120 | # via -r dev_requirements.in 121 | twine==6.2.0 122 | # via -r dev_requirements.in 123 | typing-extensions==4.15.0 124 | # via 125 | # mypy 126 | # pydantic 127 | # pydantic-core 128 | # typing-inspection 129 | typing-inspection==0.4.2 130 | # via pydantic 131 | urllib3==2.6.0 132 | # via 133 | # requests 134 | # twine 135 | vcrpy==8.0.0 136 | # via pytest-vcr 137 | wrapt==2.0.1 138 | # via vcrpy 139 | -------------------------------------------------------------------------------- /src/chives/urls.py: -------------------------------------------------------------------------------- 1 | """Code for manipulating and tidying URLs.""" 2 | 3 | from pathlib import Path 4 | import re 5 | 6 | 7 | __all__ = [ 8 | "clean_youtube_url", 9 | "is_mastodon_host", 10 | "is_url_safe", 11 | "parse_mastodon_post_url", 12 | "parse_tumblr_post_url", 13 | ] 14 | 15 | 16 | def clean_youtube_url(url: str) -> str: 17 | """ 18 | Remove any query parameters from a YouTube URL that I don't 19 | want to include. 20 | """ 21 | import hyperlink 22 | 23 | u = hyperlink.parse(url) 24 | 25 | u = u.remove("list") 26 | u = u.remove("index") 27 | u = u.remove("t") 28 | 29 | return str(u) 30 | 31 | 32 | def is_mastodon_host(hostname: str) -> bool: 33 | """ 34 | Check if a hostname is a Mastodon server. 35 | """ 36 | if hostname in { 37 | "hachyderm.io", 38 | "iconfactory.world", 39 | "mas.to", 40 | "mastodon.social", 41 | "social.alexwlchan.net", 42 | }: 43 | return True 44 | 45 | # See https://github.com/mastodon/mastodon/discussions/30547 46 | # 47 | # Fist we look at /.well-known/nodeinfo, which returns a response 48 | # like this for Mastodon servers: 49 | # 50 | # { 51 | # "links": [ 52 | # { 53 | # "rel": "http://nodeinfo.diaspora.software/ns/schema/2.0", 54 | # "href": "https://mastodon.online/nodeinfo/2.0" 55 | # } 56 | # ] 57 | # } 58 | # 59 | import httpx 60 | 61 | nodeinfo_resp = httpx.get(f"https://{hostname}/.well-known/nodeinfo") 62 | try: 63 | nodeinfo_resp.raise_for_status() 64 | except Exception: 65 | return False 66 | 67 | # Then we try to call $.links[0].href, which should return something 68 | # like: 69 | # 70 | # { 71 | # "version": "2.0", 72 | # "software": {"name": "mastodon", "version": "4.5.2"}, 73 | # … 74 | # 75 | try: 76 | href = nodeinfo_resp.json()["links"][0]["href"] 77 | except (KeyError, IndexError): # pragma: no cover 78 | return False 79 | 80 | link_resp = httpx.get(href) 81 | try: 82 | link_resp.raise_for_status() 83 | except Exception: # pragma: no cover 84 | return False 85 | 86 | try: 87 | return bool(link_resp.json()["software"]["name"] == "mastodon") 88 | except (KeyError, IndexError): # pragma: no cover 89 | return False 90 | 91 | 92 | def parse_mastodon_post_url(url: str) -> tuple[str, str, str]: 93 | """ 94 | Parse a Mastodon post URL into its component parts: 95 | server, account, post ID. 96 | """ 97 | import hyperlink 98 | 99 | u = hyperlink.parse(url) 100 | 101 | if len(u.path) != 2: 102 | raise ValueError("Cannot parse Mastodon URL!") 103 | 104 | if not u.path[0].startswith("@"): 105 | raise ValueError("Cannot find `acct` in Mastodon URL!") 106 | 107 | if not re.fullmatch(r"^[0-9]+$", u.path[1]): 108 | raise ValueError("Mastodon post ID is not numeric!") 109 | 110 | if u.host == "social.alexwlchan.net": 111 | _, acct, server = u.path[0].split("@") 112 | else: 113 | server = u.host 114 | acct = u.path[0].replace("@", "") 115 | 116 | return server, acct, u.path[1] 117 | 118 | 119 | def parse_tumblr_post_url(url: str) -> tuple[str, str]: 120 | """ 121 | Parse a Tumblr URL into its component parts. 122 | 123 | Returns a tuple (blog_identifier, post ID). 124 | """ 125 | import hyperlink 126 | 127 | u = hyperlink.parse(url) 128 | 129 | if u.host == "www.tumblr.com": 130 | return u.path[0], u.path[1] 131 | 132 | if u.host.endswith(".tumblr.com") and len(u.path) >= 3 and u.path[0] == "post": 133 | return u.host.replace(".tumblr.com", ""), u.path[1] 134 | 135 | raise ValueError("Cannot parse Tumblr URL!") # pragma: no cover 136 | 137 | 138 | def is_url_safe(path: str | Path) -> bool: 139 | """ 140 | Returns True if a path is safe to use in a URL, False otherwise. 141 | """ 142 | p = str(path) 143 | return not ("?" in p or "#" in p or "%" in p) 144 | -------------------------------------------------------------------------------- /tests/fixtures/cassettes/TestIsMastodonHost.test_mastodon_servers[social.jvns.ca].yml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '' 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | Host: 12 | - social.jvns.ca 13 | User-Agent: 14 | - python-httpx/0.28.1 15 | method: GET 16 | uri: https://social.jvns.ca/.well-known/nodeinfo 17 | response: 18 | body: 19 | string: '{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://social.jvns.ca/nodeinfo/2.0"}]}' 20 | headers: 21 | Connection: 22 | - keep-alive 23 | Content-Type: 24 | - application/json; charset=utf-8 25 | Date: 26 | - Thu, 04 Dec 2025 12:14:49 GMT 27 | Strict-Transport-Security: 28 | - max-age=31536000 29 | Transfer-Encoding: 30 | - chunked 31 | cache-control: 32 | - max-age=259200, public 33 | content-length: 34 | - '114' 35 | content-security-policy: 36 | - 'base-uri ''none''; default-src ''none''; frame-ancestors ''none''; font-src 37 | ''self'' https://social.jvns.ca; img-src ''self'' data: blob: https://social.jvns.ca 38 | https://cdn.masto.host; media-src ''self'' data: https://social.jvns.ca https://cdn.masto.host; 39 | manifest-src ''self'' https://social.jvns.ca; form-action ''self''; child-src 40 | ''self'' blob: https://social.jvns.ca; worker-src ''self'' blob: https://social.jvns.ca; 41 | connect-src ''self'' data: blob: https://social.jvns.ca https://cdn.masto.host 42 | wss://social.jvns.ca; script-src ''self'' https://social.jvns.ca ''wasm-unsafe-eval''; 43 | frame-src ''self'' https:; style-src ''self'' https://social.jvns.ca ''nonce-cYXJpX/juTVw0Sc+MAA7BQ==''' 44 | etag: 45 | - W/"41981c7ccfa1674c1535b6eea835d7e5" 46 | referrer-policy: 47 | - same-origin 48 | server: 49 | - Mastodon 50 | vary: 51 | - Origin 52 | x-content-type-options: 53 | - nosniff 54 | x-frame-options: 55 | - DENY 56 | x-request-id: 57 | - d4888141-9cb3-4305-b5dd-a8b0842a2d05 58 | x-runtime: 59 | - '0.003398' 60 | x-xss-protection: 61 | - '0' 62 | status: 63 | code: 200 64 | message: OK 65 | - request: 66 | body: '' 67 | headers: 68 | Accept: 69 | - '*/*' 70 | Accept-Encoding: 71 | - gzip, deflate 72 | Connection: 73 | - keep-alive 74 | Host: 75 | - social.jvns.ca 76 | User-Agent: 77 | - python-httpx/0.28.1 78 | method: GET 79 | uri: https://social.jvns.ca/nodeinfo/2.0 80 | response: 81 | body: 82 | string: '{"version":"2.0","software":{"name":"mastodon","version":"4.5.2"},"protocols":["activitypub"],"services":{"outbound":[],"inbound":[]},"usage":{"users":{"total":4,"activeMonth":4,"activeHalfyear":4},"localPosts":7409},"openRegistrations":false,"metadata":{"nodeName":"Mastodon","nodeDescription":""}}' 83 | headers: 84 | Connection: 85 | - keep-alive 86 | Content-Type: 87 | - application/json; charset=utf-8 88 | Date: 89 | - Thu, 04 Dec 2025 12:14:49 GMT 90 | Strict-Transport-Security: 91 | - max-age=31536000 92 | Transfer-Encoding: 93 | - chunked 94 | cache-control: 95 | - max-age=1800, public 96 | content-length: 97 | - '299' 98 | content-security-policy: 99 | - 'base-uri ''none''; default-src ''none''; frame-ancestors ''none''; font-src 100 | ''self'' https://social.jvns.ca; img-src ''self'' data: blob: https://social.jvns.ca 101 | https://cdn.masto.host; media-src ''self'' data: https://social.jvns.ca https://cdn.masto.host; 102 | manifest-src ''self'' https://social.jvns.ca; form-action ''self''; child-src 103 | ''self'' blob: https://social.jvns.ca; worker-src ''self'' blob: https://social.jvns.ca; 104 | connect-src ''self'' data: blob: https://social.jvns.ca https://cdn.masto.host 105 | wss://social.jvns.ca; script-src ''self'' https://social.jvns.ca ''wasm-unsafe-eval''; 106 | frame-src ''self'' https:; style-src ''self'' https://social.jvns.ca ''nonce-KudwCpfFUyr8bIzc9hLCuA==''' 107 | etag: 108 | - W/"def238b77fc5db88a115321ee60e49e7" 109 | referrer-policy: 110 | - same-origin 111 | server: 112 | - Mastodon 113 | vary: 114 | - Accept, Origin 115 | x-content-type-options: 116 | - nosniff 117 | x-frame-options: 118 | - DENY 119 | x-request-id: 120 | - 3ef92cee-53b7-41e1-b1f1-5b9b094c5616 121 | x-runtime: 122 | - '0.008559' 123 | x-xss-protection: 124 | - '0' 125 | status: 126 | code: 200 127 | message: OK 128 | version: 1 129 | -------------------------------------------------------------------------------- /tests/test_urls.py: -------------------------------------------------------------------------------- 1 | """Tests for `chives.urls`.""" 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | from vcr.cassette import Cassette 7 | 8 | from chives.urls import ( 9 | clean_youtube_url, 10 | is_mastodon_host, 11 | is_url_safe, 12 | parse_mastodon_post_url, 13 | parse_tumblr_post_url, 14 | ) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "url, cleaned_url", 19 | [ 20 | ( 21 | "https://www.youtube.com/watch?v=2OHPPSew2nY&list=WL&index=6&t=193s", 22 | "https://www.youtube.com/watch?v=2OHPPSew2nY", 23 | ), 24 | ( 25 | "https://www.youtube.com/watch?v=2OHPPSew2nY", 26 | "https://www.youtube.com/watch?v=2OHPPSew2nY", 27 | ), 28 | ], 29 | ) 30 | def test_clean_youtube_url(url: str, cleaned_url: str) -> None: 31 | """ 32 | All the query parameters get stripped from YouTube URLs correctly. 33 | """ 34 | assert clean_youtube_url(url) == cleaned_url 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "url, server, acct, post_id", 39 | [ 40 | ( 41 | "https://iconfactory.world/@Iconfactory/115650922400392083", 42 | "iconfactory.world", 43 | "Iconfactory", 44 | "115650922400392083", 45 | ), 46 | ( 47 | "https://social.alexwlchan.net/@chris__martin@functional.cafe/113369395383537892", 48 | "functional.cafe", 49 | "chris__martin", 50 | "113369395383537892", 51 | ), 52 | ], 53 | ) 54 | def test_parse_mastodon_post_url( 55 | url: str, server: str, acct: str, post_id: str 56 | ) -> None: 57 | """ 58 | Mastodon post URLs are parsed correctly. 59 | """ 60 | assert parse_mastodon_post_url(url) == (server, acct, post_id) 61 | 62 | 63 | @pytest.mark.parametrize( 64 | "url, error", 65 | [ 66 | ("https://mastodon.social/", "Cannot parse Mastodon URL"), 67 | ("https://mastodon.social/about", "Cannot parse Mastodon URL"), 68 | ("https://mastodon.social/about/subdir", "Cannot find `acct`"), 69 | ("https://mastodon.social/@example/about", "Mastodon post ID is not numeric"), 70 | ], 71 | ) 72 | def test_parse_mastodon_post_url_errors(url: str, error: str) -> None: 73 | """ 74 | parse_mastodon_post_url returns a useful error if it can't parse the URL. 75 | """ 76 | with pytest.raises(ValueError, match=error): 77 | parse_mastodon_post_url(url) 78 | 79 | 80 | @pytest.mark.parametrize( 81 | "url, blog_identifier, post_id", 82 | [ 83 | ( 84 | "https://www.tumblr.com/kynvillingur/792473255236796416/", 85 | "kynvillingur", 86 | "792473255236796416", 87 | ), 88 | ( 89 | "https://cut3panda.tumblr.com/post/94093772689/for-some-people-the-more-you-get-to-know-them", 90 | "cut3panda", 91 | "94093772689", 92 | ), 93 | ], 94 | ) 95 | def test_parse_tumblr_post_url(url: str, blog_identifier: str, post_id: str) -> None: 96 | """ 97 | Tumblr URLs are parsed correctly. 98 | """ 99 | assert parse_tumblr_post_url(url) == (blog_identifier, post_id) 100 | 101 | 102 | class TestIsMastodonHost: 103 | """ 104 | Tests for `is_mastodon_host`. 105 | """ 106 | 107 | @pytest.mark.parametrize( 108 | "host", ["mastodon.social", "hachyderm.io", "social.jvns.ca"] 109 | ) 110 | def test_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None: 111 | """ 112 | It correctly identifies real Mastodon servers. 113 | """ 114 | assert is_mastodon_host(host) 115 | 116 | @pytest.mark.parametrize( 117 | "host", 118 | [ 119 | # These are regular Internet websites which don't expose 120 | # the /.well-known/nodeinfo endpoint 121 | "example.com", 122 | "alexwlchan.net", 123 | # 124 | # PeerTube exposes /.well-known/nodeinfo, but it's running 125 | # different software. 126 | "peertube.tv", 127 | ], 128 | ) 129 | def test_non_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None: 130 | """ 131 | Other websites are not Mastodon servers. 132 | """ 133 | assert not is_mastodon_host(host) 134 | 135 | 136 | class TestIsUrlSafe: 137 | """ 138 | Tests for `is_url_safe`. 139 | """ 140 | 141 | @pytest.mark.parametrize("path", ["example.txt", Path("a/b/cat.jpg")]) 142 | def test_safe(self, path: str | Path) -> None: 143 | """Paths which are URL safe.""" 144 | assert is_url_safe(path) 145 | 146 | @pytest.mark.parametrize("path", ["is it?", Path("cat%c.jpg"), "a#b"]) 147 | def test_unsafe(self, path: str | Path) -> None: 148 | """Paths which are not URL safe.""" 149 | assert not is_url_safe(path) 150 | -------------------------------------------------------------------------------- /tests/fixtures/cassettes/TestIsMastodonHost.test_non_mastodon_servers[peertube.tv].yml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '' 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | Host: 12 | - peertube.tv 13 | User-Agent: 14 | - python-httpx/0.28.1 15 | method: GET 16 | uri: https://peertube.tv/.well-known/nodeinfo 17 | response: 18 | body: 19 | string: '{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://peertube.tv/nodeinfo/2.0.json"}]}' 20 | headers: 21 | Connection: 22 | - keep-alive 23 | Content-Length: 24 | - '116' 25 | Content-Type: 26 | - application/json; charset=utf-8 27 | Date: 28 | - Thu, 04 Dec 2025 12:17:46 GMT 29 | Server: 30 | - nginx/1.18.0 (Ubuntu) 31 | access-control-allow-origin: 32 | - '*' 33 | cache-control: 34 | - max-age=548 35 | etag: 36 | - W/"74-uYd/TxZEF87Urak29pxyd08PwVE" 37 | tk: 38 | - N 39 | x-frame-options: 40 | - DENY 41 | x-powered-by: 42 | - PeerTube 43 | status: 44 | code: 200 45 | message: OK 46 | - request: 47 | body: '' 48 | headers: 49 | Accept: 50 | - '*/*' 51 | Accept-Encoding: 52 | - gzip, deflate 53 | Connection: 54 | - keep-alive 55 | Host: 56 | - peertube.tv 57 | User-Agent: 58 | - python-httpx/0.28.1 59 | method: GET 60 | uri: https://peertube.tv/nodeinfo/2.0.json 61 | response: 62 | body: 63 | string: '{"version":"2.0","software":{"name":"peertube","version":"5.2.0"},"protocols":["activitypub"],"services":{"inbound":[],"outbound":["atom1.0","rss2.0"]},"openRegistrations":false,"usage":{"users":{"total":609,"activeMonth":8,"activeHalfyear":35},"localPosts":18598,"localComments":93},"metadata":{"taxonomy":{"postsName":"Videos"},"nodeName":"PeerTube.TV","nodeDescription":"Videos 64 | sharing & live streaming on free open source software PeerTube! No ads, no 65 | tracking, no spam.","nodeConfig":{"search":{"remoteUri":{"users":true,"anonymous":false}},"plugin":{"registered":[{"npmName":"peertube-plugin-upload-instructions","name":"upload-instructions","version":"0.1.1","description":"Show 66 | an instructions modal right before uploading","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-custom-links","name":"custom-links","version":"0.0.10","description":"PeerTube 67 | plugin that allows you to add custom links on the bottom of the menu","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-glavliiit","name":"glavliiit","version":"0.0.10","description":"Enhanced 68 | moderation tool for PeerTube","clientScripts":{}},{"npmName":"peertube-plugin-categories","name":"categories","version":"1.2.7","description":"Manage 69 | video categories.","clientScripts":{"src/client/admin-plugin-settings.js":{"script":"src/client/admin-plugin-settings.js","scopes":["admin-plugin"]}}},{"npmName":"peertube-plugin-creative-commons","name":"creative-commons","version":"1.2.0","description":"Standardized 70 | display of Creative Commons licenses. Uses short identifiers like CC BY-SA 71 | 4.0 instead of descriptive text.","clientScripts":{"client/video-watch-client-plugin.js":{"script":"client/video-watch-client-plugin.js","scopes":["video-watch"]}}},{"npmName":"peertube-plugin-social-sharing-rus","name":"social-sharing-rus","version":"0.11.0","description":"Share 72 | a video or playlist URL on social media (Mastodon, WordPress, reddit, Twitter, 73 | etc.)","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-menu-items","name":"menu-items","version":"0.0.4","description":"PeerTube 74 | plugin menu-items","clientScripts":{"dist/common-client-plugin.js":{"script":"dist/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-chapters","name":"chapters","version":"1.1.3","description":"PeerTube 75 | chapter plugin","clientScripts":{"dist/client/video-watch-client-plugin.js":{"script":"dist/client/video-watch-client-plugin.js","scopes":["video-watch","embed"]},"dist/client/video-edit-client-plugin.js":{"script":"dist/client/video-edit-client-plugin.js","scopes":["video-edit"]}}},{"npmName":"peertube-plugin-simplelogo","name":"simplelogo","version":"0.0.5","description":"Plugin 76 | that let you change logo and favicon on your PeerTube instance.","clientScripts":{"client/common-client-plugin.js":{"script":"client/common-client-plugin.js","scopes":["common"]}}},{"npmName":"peertube-plugin-video-annotation","name":"video-annotation","version":"0.0.7","description":"PeerTube 77 | plugin video annotation","clientScripts":{"dist/embed-client-plugin.js":{"script":"dist/embed-client-plugin.js","scopes":["embed"]},"dist/video-edit-client-plugin.js":{"script":"dist/video-edit-client-plugin.js","scopes":["video-edit"]},"dist/video-watch-client-plugin.js":{"script":"dist/video-watch-client-plugin.js","scopes":["video-watch"]}}},{"npmName":"peertube-plugin-livechat","name":"livechat","version":"7.2.1","description":"PeerTube 78 | plugin livechat: offers a way to embed a chat system into Peertube.","clientScripts":{"dist/client/videowatch-client-plugin.js":{"script":"dist/client/videowatch-client-plugin.js","scopes":["video-watch"]},"dist/client/common-client-plugin.js":{"script":"dist/client/common-client-plugin.js","scopes":["common"]},"dist/client/admin-plugin-client-plugin.js":{"script":"dist/client/admin-plugin-client-plugin.js","scopes":["admin-plugin"]}}}]},"theme":{"registered":[{"npmName":"peertube-theme-dark-evolution","name":"dark-evolution","version":"1.0.4","description":"Evolution 79 | of the official PeerTube dark theme","css":["assets/style.css"],"clientScripts":{}},{"npmName":"peertube-theme-dark","name":"dark","version":"2.5.0","description":"PeerTube 80 | dark theme","css":["assets/style.css"],"clientScripts":{}}],"default":"dark-evolution"},"email":{"enabled":true},"contactForm":{"enabled":true},"transcoding":{"hls":{"enabled":true},"webtorrent":{"enabled":true},"enabledResolutions":[144,240,360,480,720,1080]},"live":{"enabled":true,"transcoding":{"enabled":true,"enabledResolutions":[144,480,720,1080]}},"import":{"videos":{"http":{"enabled":true},"torrent":{"enabled":false}}},"autoBlacklist":{"videos":{"ofUsers":{"enabled":false}}},"avatar":{"file":{"size":{"max":4194304},"extensions":[".png",".jpeg",".jpg",".gif",".webp"]}},"video":{"image":{"extensions":[".png",".jpg",".jpeg",".webp"],"size":{"max":4194304}},"file":{"extensions":[".webm",".ogv",".ogg",".mp4",".mkv",".mov",".qt",".mqv",".m4v",".flv",".f4v",".wmv",".avi",".3gp",".3gpp",".3g2",".3gpp2",".nut",".mts",".m2ts",".mpv",".m2v",".m1v",".mpg",".mpe",".mpeg",".vob",".mxf",".mp3",".wma",".wav",".flac",".aac",".m4a",".ac3"]}},"videoCaption":{"file":{"size":{"max":20971520},"extensions":[".vtt",".srt"]}},"user":{"videoQuota":53687091200,"videoQuotaDaily":5368709120},"trending":{"videos":{"intervalDays":7}},"tracker":{"enabled":true}}}}' 81 | headers: 82 | Access-Control-Allow-Origin: 83 | - '*' 84 | Connection: 85 | - keep-alive 86 | Content-Length: 87 | - '5567' 88 | Content-Type: 89 | - application/json; charset=utf-8; profile="http://nodeinfo.diaspora.software/ns/schema/2.0#" 90 | Date: 91 | - Thu, 04 Dec 2025 12:17:47 GMT 92 | ETag: 93 | - W/"15bf-UHcLfIV97HliD7E2eKuWJsf3iEQ" 94 | Server: 95 | - nginx/1.18.0 (Ubuntu) 96 | Tk: 97 | - N 98 | X-Frame-Options: 99 | - DENY 100 | cache-control: 101 | - max-age=600 102 | x-powered-by: 103 | - PeerTube 104 | status: 105 | code: 200 106 | message: OK 107 | version: 1 108 | -------------------------------------------------------------------------------- /src/chives/static_site_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines a set of common tests and test helpers used for all my static sites. 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | import collections 7 | from collections.abc import Iterator 8 | import concurrent.futures 9 | import glob 10 | import itertools 11 | import os 12 | from pathlib import Path 13 | import subprocess 14 | from typing import TypeVar 15 | 16 | import pytest 17 | from rapidfuzz import fuzz 18 | 19 | from chives.dates import date_matches_any_format, find_all_dates 20 | from chives.media import is_av1_video 21 | from chives.urls import is_url_safe 22 | 23 | 24 | T = TypeVar("T") 25 | 26 | 27 | class StaticSiteTestSuite[M](ABC): 28 | """ 29 | Defines a base set of tests to run against any of my static sites. 30 | 31 | This should be subclassed as a Test* class, which allows you to use 32 | the fixtures and write site-specific tests. 33 | """ 34 | 35 | @abstractmethod 36 | @pytest.fixture 37 | def site_root(self) -> Path: 38 | """ 39 | Returns the path to the folder at the root of the site. 40 | """ 41 | ... 42 | 43 | @abstractmethod 44 | @pytest.fixture 45 | def metadata(self, site_root: Path) -> M: 46 | """ 47 | Returns all the metadata for this project. 48 | """ 49 | ... 50 | 51 | @abstractmethod 52 | def list_paths_in_metadata(self, metadata: M) -> set[Path]: 53 | """ 54 | Returns a set of paths described in the metadata. 55 | """ 56 | ... 57 | 58 | def list_tags_in_metadata(self, metadata: M) -> Iterator[str]: # pragma: no cover 59 | """ 60 | Returns all the tags used in the metadata, once for every usage. 61 | 62 | For example, if three documents use the same tag, the tag will 63 | be returned three times. 64 | 65 | This method should be overriden in projects that use keyword tags; 66 | there are some sites that don't use tags. 67 | """ 68 | yield from [] 69 | 70 | def test_no_uncommitted_git_changes(self, site_root: Path) -> None: 71 | """ 72 | There are no changes which haven't been committed to Git. 73 | 74 | This is especially useful when I run a script that tests all 75 | my static sites, that none of them have unsaved changes. 76 | """ 77 | rc = subprocess.call(["git", "diff", "--exit-code", "--quiet"], cwd=site_root) 78 | 79 | assert rc == 0, "There are uncommitted changes!" 80 | 81 | def list_paths_saved_locally(self, site_root: Path) -> set[Path]: 82 | """ 83 | Returns a set of paths saved locally. 84 | """ 85 | paths_saved_locally = set() 86 | 87 | for root, _, filenames in site_root.walk(): 88 | # Ignore certain top-level folders I don't care about. 89 | try: 90 | top_level_folder = root.relative_to(site_root).parts[0] 91 | except IndexError: 92 | pass 93 | else: 94 | if top_level_folder in { 95 | ".git", 96 | ".mypy_cache", 97 | ".pytest_cache", 98 | ".ruff_cache", 99 | ".venv", 100 | "data", 101 | "scripts", 102 | "static", 103 | "tests", 104 | "viewer", 105 | }: 106 | continue 107 | 108 | for f in filenames: 109 | if f == ".DS_Store": 110 | continue 111 | 112 | if root == site_root and f in { 113 | "Icon\r", 114 | ".gitignore", 115 | "index.html", 116 | "README.md", 117 | "TODO.md", 118 | }: 119 | continue 120 | 121 | if root == site_root and f.endswith(".js"): 122 | continue 123 | 124 | paths_saved_locally.add((root / f).relative_to(site_root)) 125 | 126 | return paths_saved_locally 127 | 128 | def test_every_file_in_metadata_is_saved_locally( 129 | self, metadata: M, site_root: Path 130 | ) -> None: 131 | """ 132 | Every file described in the metadata is saved locally. 133 | """ 134 | paths_in_metadata = self.list_paths_in_metadata(metadata) 135 | paths_saved_locally = self.list_paths_saved_locally(site_root) 136 | 137 | assert paths_in_metadata - paths_saved_locally == set(), ( 138 | f"Paths in metadata not saved locally: " 139 | f"{paths_in_metadata - paths_saved_locally}" 140 | ) 141 | 142 | def test_every_local_file_is_in_metadata( 143 | self, metadata: M, site_root: Path 144 | ) -> None: 145 | """ 146 | Every file saved locally is described in the metadata. 147 | """ 148 | paths_in_metadata = self.list_paths_in_metadata(metadata) 149 | paths_saved_locally = self.list_paths_saved_locally(site_root) 150 | 151 | assert paths_saved_locally - paths_in_metadata == set(), ( 152 | f"Paths saved locally not in metadata: " 153 | f"{paths_saved_locally - paths_in_metadata}" 154 | ) 155 | 156 | def test_every_path_is_url_safe(self, site_root: Path) -> None: 157 | """ 158 | Every path has a URL-safe path. 159 | """ 160 | bad_paths = set() 161 | 162 | for root, _, filenames in site_root.walk(): 163 | for f in filenames: 164 | p = site_root / root / f 165 | if not is_url_safe(p): 166 | bad_paths.add(p) 167 | 168 | assert bad_paths == set(), f"Found paths which aren't URL safe: {bad_paths}" 169 | 170 | @pytest.mark.skipif("SKIP_AV1" in os.environ, reason="skip slow test") 171 | def test_no_videos_are_av1(self, site_root: Path) -> None: 172 | """ 173 | No videos are encoded in AV1 (which doesn't play on my iPhone). 174 | 175 | This test can be removed when I upgrade all my devices to ones with 176 | hardware AV1 decoding support. 177 | 178 | See https://alexwlchan.net/2025/av1-on-my-iphone/ 179 | """ 180 | av1_videos = set() 181 | 182 | with concurrent.futures.ThreadPoolExecutor() as executor: 183 | futures = { 184 | executor.submit(is_av1_video, site_root / p): p 185 | for p in glob.glob("**/*.mp4", root_dir=site_root, recursive=True) 186 | } 187 | 188 | concurrent.futures.wait(futures) 189 | 190 | av1_videos = {p for fut, p in futures.items() if fut.result()} 191 | 192 | assert av1_videos == set(), f"Found videos encoded with AV1: {av1_videos}" 193 | 194 | date_formats = [ 195 | "%Y-%m-%dT%H:%M:%SZ", 196 | "%Y-%m-%d", 197 | ] 198 | 199 | def test_all_timestamps_are_consistent(self, metadata: M) -> None: 200 | """ 201 | All the timestamps in my JSON use a consistent format. 202 | 203 | See https://alexwlchan.net/2025/messy-dates-in-json/ 204 | """ 205 | bad_date_strings = { 206 | date_string 207 | for _, _, date_string in find_all_dates(metadata) 208 | if not date_matches_any_format(date_string, self.date_formats) 209 | } 210 | 211 | assert bad_date_strings == set(), ( 212 | f"Found incorrectly-formatted dates: {bad_date_strings}" 213 | ) 214 | 215 | @staticmethod 216 | def find_similar_pairs(tags: dict[str, int]) -> Iterator[tuple[str, str]]: 217 | """ 218 | Find pairs of similar-looking tags in the collection `tags`. 219 | """ 220 | for t1, t2 in itertools.combinations(sorted(tags), 2): 221 | if fuzz.ratio(t1, t2) > 80: 222 | yield (t1, t2) 223 | 224 | known_similar_tags: set[tuple[str, str]] = set() 225 | 226 | def test_no_similar_tags(self, metadata: M) -> None: 227 | """ 228 | There are no similar/misspelt tags. 229 | """ 230 | tags = collections.Counter(self.list_tags_in_metadata(metadata)) 231 | 232 | similar_tags = [ 233 | f"{t1} ({tags[t1]}) / {t2} ({tags[t2]})" 234 | for t1, t2 in self.find_similar_pairs(tags) 235 | if (t1, t2) not in self.known_similar_tags 236 | ] 237 | 238 | assert similar_tags == [], f"Found similar tags: {similar_tags}" 239 | -------------------------------------------------------------------------------- /tests/test_static_site_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for `chives.static_site_tests`. 3 | """ 4 | 5 | from collections.abc import Iterator 6 | from pathlib import Path 7 | import shutil 8 | import subprocess 9 | from typing import Any, TypeVar 10 | 11 | import pytest 12 | 13 | from chives import dates 14 | from chives.static_site_tests import StaticSiteTestSuite 15 | 16 | 17 | M = TypeVar("M") 18 | 19 | 20 | @pytest.fixture 21 | def site_root(tmp_path: Path) -> Path: 22 | """ 23 | Return a temp directory to use as a site root. 24 | """ 25 | return tmp_path 26 | 27 | 28 | def create_test_suite[M]( 29 | site_root: Path, 30 | metadata: M, 31 | *, 32 | paths_in_metadata: set[Path] | None = None, 33 | tags_in_metadata: set[str] | None = None, 34 | ) -> StaticSiteTestSuite[M]: 35 | """ 36 | Create a new instance of StaticSiteTestSuite with the hard-coded data 37 | provided. 38 | """ 39 | 40 | class TestSuite(StaticSiteTestSuite[M]): 41 | def site_root(self) -> Path: # pragma: no cover 42 | return site_root 43 | 44 | def metadata(self, site_root: Path) -> M: # pragma: no cover 45 | return metadata 46 | 47 | def list_paths_in_metadata(self, metadata: M) -> set[Path]: 48 | return paths_in_metadata or set() 49 | 50 | def list_tags_in_metadata(self, metadata: M) -> Iterator[str]: 51 | yield from (tags_in_metadata or set()) 52 | 53 | return TestSuite() 54 | 55 | 56 | def test_paths_saved_locally_match_metadata(site_root: Path) -> None: 57 | """ 58 | The tests check that the set of paths saved locally match the metadata. 59 | """ 60 | # Create a series of paths in tmp_path. 61 | for filename in [ 62 | "index.html", 63 | "metadata.js", 64 | "media/cat.jpg", 65 | "media/dog.png", 66 | "media/emu.gif", 67 | "viewer/index.html", 68 | ".DS_Store", 69 | ]: 70 | p = site_root / filename 71 | p.parent.mkdir(exist_ok=True) 72 | p.write_text("test") 73 | 74 | metadata = [Path("media/cat.jpg"), Path("media/dog.png"), Path("media/emu.gif")] 75 | 76 | t = create_test_suite(site_root, metadata, paths_in_metadata=set(metadata)) 77 | t.test_every_file_in_metadata_is_saved_locally(metadata, site_root) 78 | t.test_every_local_file_is_in_metadata(metadata, site_root) 79 | 80 | # Add a new file locally, and check the test starts failing. 81 | (site_root / "media/fish.tiff").write_text("test") 82 | 83 | with pytest.raises(AssertionError): 84 | t.test_every_local_file_is_in_metadata(metadata, site_root) 85 | 86 | (site_root / "media/fish.tiff").unlink() 87 | 88 | # Delete one of the local files, and check the test starts failing. 89 | (site_root / "media/cat.jpg").unlink() 90 | 91 | with pytest.raises(AssertionError): 92 | t.test_every_file_in_metadata_is_saved_locally(metadata, site_root) 93 | 94 | 95 | def test_checks_for_git_changes(site_root: Path) -> None: 96 | """ 97 | The tests check that there are no uncommitted Git changes. 98 | """ 99 | t = create_test_suite(site_root, metadata=[1, 2, 3]) 100 | 101 | # Initially this should fail, because there isn't a Git repo in 102 | # the folder. 103 | with pytest.raises(AssertionError): 104 | t.test_no_uncommitted_git_changes(site_root) 105 | 106 | # Create a Git repo, add a file, and commit it. 107 | (site_root / "README.md").write_text("hello world") 108 | subprocess.check_call(["git", "init"], cwd=site_root) 109 | subprocess.check_call(["git", "add", "README.md"], cwd=site_root) 110 | subprocess.check_call(["git", "commit", "-m", "initial commit"], cwd=site_root) 111 | 112 | # Check there are no uncommitted Git changes 113 | t.test_no_uncommitted_git_changes(site_root) 114 | 115 | # Make a new change, and check it's spotted 116 | (site_root / "README.md").write_text("a different hello world") 117 | 118 | with pytest.raises(AssertionError): 119 | t.test_no_uncommitted_git_changes(site_root) 120 | 121 | 122 | def test_checks_for_url_safe_paths(site_root: Path) -> None: 123 | """ 124 | The tests check for URL-safe paths. 125 | """ 126 | t = create_test_suite(site_root, metadata=[1, 2, 3]) 127 | 128 | # This should pass trivially when the site is empty. 129 | t.test_every_path_is_url_safe(site_root) 130 | 131 | # Now write some files with URL-safe names, and check it's still okay. 132 | for filename in [ 133 | "index.html", 134 | "metadata.js", 135 | ".DS_Store", 136 | ]: 137 | (site_root / filename).write_text("test") 138 | 139 | t.test_every_path_is_url_safe(site_root) 140 | 141 | # Write another file with a URL-unsafe name, and check it's caught 142 | # by the test. 143 | (site_root / "a#b#c").write_text("test") 144 | 145 | with pytest.raises(AssertionError): 146 | t.test_every_path_is_url_safe(site_root) 147 | 148 | 149 | def test_checks_for_av1_videos(site_root: Path) -> None: 150 | """ 151 | The tests check for AV1-encoded videos. 152 | """ 153 | t = create_test_suite(site_root, metadata=[1, 2, 3]) 154 | 155 | # This should pass trivially when the site is empty. 156 | t.test_no_videos_are_av1(site_root) 157 | 158 | # Copy in an H.264-encoded video, and check it's not flagged. 159 | shutil.copyfile( 160 | "tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4", 161 | site_root / "Sintel_360_10s_1MB_H264.mp4", 162 | ) 163 | t.test_no_videos_are_av1(site_root) 164 | 165 | # Copy in an AV1-encoded video, and check it's caught by the test 166 | shutil.copyfile( 167 | "tests/fixtures/media/Sintel_360_10s_1MB_AV1.mp4", 168 | site_root / "Sintel_360_10s_1MB_AV1.mp4", 169 | ) 170 | with pytest.raises(AssertionError): 171 | t.test_no_videos_are_av1(site_root) 172 | 173 | 174 | class TestAllTimestampsAreConsistent: 175 | """ 176 | Tests for the `test_all_timestamps_are_consistent` method. 177 | """ 178 | 179 | @pytest.mark.parametrize( 180 | "metadata", 181 | [ 182 | {"date_saved": "2025-12-06"}, 183 | {"date_saved": dates.now()}, 184 | ], 185 | ) 186 | def test_allows_correct_date_formats(self, site_root: Path, metadata: Any) -> None: 187 | """ 188 | The tests pass if all the dates are in the correct format. 189 | """ 190 | t = create_test_suite(site_root, metadata) 191 | t.test_all_timestamps_are_consistent(metadata) 192 | 193 | @pytest.mark.parametrize("metadata", [{"date_saved": "AAAA-BB-CC"}]) 194 | def test_rejects_incorrect_date_formats( 195 | self, site_root: Path, metadata: Any 196 | ) -> None: 197 | """ 198 | The tests fail if the metadata has inconsistent date formats. 199 | """ 200 | t = create_test_suite(site_root, metadata) 201 | with pytest.raises(AssertionError): 202 | t.test_all_timestamps_are_consistent(metadata) 203 | 204 | def test_can_override_date_formats(self, site_root: Path) -> None: 205 | """ 206 | A previously-blocked date format is allowed if you add it to 207 | the `date_formats` list. 208 | """ 209 | metadata = {"date_saved": "2025"} 210 | t = create_test_suite(site_root, metadata) 211 | 212 | # It fails with the default settings 213 | with pytest.raises(AssertionError): 214 | t.test_all_timestamps_are_consistent(metadata) 215 | 216 | # It passes if we add the format to `date_formats` 217 | t.date_formats.append("%Y") 218 | t.test_all_timestamps_are_consistent(metadata) 219 | 220 | 221 | def test_checks_for_similar_tags(site_root: Path) -> None: 222 | """ 223 | The tests check for similar and misspelt tags. 224 | """ 225 | metadata = [1, 2, 3] 226 | 227 | # Check a site with distinct tags. 228 | t1 = create_test_suite( 229 | site_root, metadata, tags_in_metadata={"red", "green", "blue"} 230 | ) 231 | t1.test_no_similar_tags(metadata) 232 | 233 | # Check a site with similar tags. 234 | t2 = create_test_suite( 235 | site_root, metadata, tags_in_metadata={"red robot", "rod robot", "rid robot"} 236 | ) 237 | with pytest.raises(AssertionError): 238 | t2.test_no_similar_tags(metadata) 239 | 240 | # Check a site with similar tags, but marked as known-similar. 241 | t3 = create_test_suite( 242 | site_root, 243 | metadata, 244 | tags_in_metadata={"red robot", "rod robot", "green", "blue"}, 245 | ) 246 | t3.known_similar_tags = {("red robot", "rod robot")} 247 | t3.test_no_similar_tags(metadata) 248 | -------------------------------------------------------------------------------- /src/chives/media.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for interacting with images/videos. 3 | 4 | Dependencies: 5 | * ffprobe 6 | * https://github.com/alexwlchan/create_thumbnail 7 | * https://github.com/alexwlchan/dominant_colours 8 | * https://github.com/alexwlchan/get_live_text 9 | 10 | References: 11 | * https://alexwlchan.net/2021/dominant-colours/ 12 | * https://alexwlchan.net/2025/detecting-av1-videos/ 13 | * https://stackoverflow.com/a/58567453 14 | 15 | """ 16 | 17 | from fractions import Fraction 18 | import json 19 | from pathlib import Path 20 | import subprocess 21 | from typing import Literal, NotRequired, TypedDict, TYPE_CHECKING 22 | 23 | if TYPE_CHECKING: 24 | import PIL 25 | 26 | 27 | __all__ = [ 28 | "create_image_entity", 29 | "create_video_entity", 30 | "get_media_paths", 31 | "get_tint_colour", 32 | "is_av1_video", 33 | "ImageEntity", 34 | "MediaEntity", 35 | "VideoEntity", 36 | ] 37 | 38 | 39 | def is_av1_video(path: str | Path) -> bool: 40 | """ 41 | Returns True if a video is encoded with AV1, False otherwise. 42 | """ 43 | # fmt: off 44 | cmd = [ 45 | "ffprobe", 46 | # 47 | # Set the logging level 48 | "-loglevel", "error", 49 | # 50 | # Select the first video stream 51 | "-select_streams", "v:0", 52 | # 53 | # Print the codec_name (e.g. av1) 54 | "-show_entries", "stream=codec_name", 55 | # 56 | # Print just the value 57 | "-output_format", "default=noprint_wrappers=1:nokey=1", 58 | # 59 | # Name of the video to check 60 | str(path), 61 | ] 62 | # fmt: on 63 | 64 | output = subprocess.check_output(cmd, text=True) 65 | 66 | return output.strip() == "av1" 67 | 68 | 69 | class ImageEntity(TypedDict): 70 | """ 71 | ImageEntity contains all the fields I need to render an image 72 | in a web page. 73 | """ 74 | 75 | type: Literal["image"] 76 | 77 | # The path to the image on disk 78 | path: str 79 | 80 | # The path to a low-resolution thumbnail 81 | thumbnail_path: NotRequired[str] 82 | 83 | # The display resolution of the image 84 | width: int 85 | height: int 86 | 87 | # A hex-encoded colour which is prominent in this image. 88 | tint_colour: str 89 | 90 | # Whether the image is animated (GIF and WebP only) 91 | is_animated: NotRequired[Literal[True]] 92 | 93 | # Whether the image has transparent pixels 94 | has_transparency: NotRequired[Literal[True]] 95 | 96 | # The alt text of the image, if available 97 | alt_text: NotRequired[str] 98 | 99 | # The source URL of the image, if available 100 | source_url: NotRequired[str] 101 | 102 | 103 | class VideoEntity(TypedDict): 104 | """ 105 | VideoEntity contains all the fields I need to render a video 106 | in a web page. 107 | """ 108 | 109 | type: Literal["video"] 110 | 111 | # The path to the video on disk 112 | path: str 113 | 114 | # The poster image for the video 115 | poster: ImageEntity 116 | 117 | # The display resolution of the video 118 | width: int 119 | height: int 120 | 121 | # The duration of the video, as an HOURS:MM:SS.MICROSECONDS string 122 | duration: str 123 | 124 | # Path to the subtitles for the video, if available 125 | subtitles_path: NotRequired[str] 126 | 127 | # The source URL of the image, if available 128 | source_url: NotRequired[str] 129 | 130 | # Whether the video should play automatically. This is used for 131 | # videos that are substituting for animated GIFs. 132 | autoplay: NotRequired[Literal[True]] 133 | 134 | 135 | MediaEntity = ImageEntity | VideoEntity 136 | 137 | 138 | def get_media_paths(e: MediaEntity) -> set[Path]: 139 | """ 140 | Returns a list of all media paths represented by this media entity. 141 | """ 142 | result: set[str | Path] = set() 143 | 144 | try: 145 | e["type"] 146 | except KeyError: 147 | raise TypeError(f"Entity does not have a type: {e}") 148 | 149 | if e["type"] == "video": 150 | result.add(e["path"]) 151 | try: 152 | result.add(e["subtitles_path"]) 153 | except KeyError: 154 | pass 155 | for p in get_media_paths(e["poster"]): 156 | result.add(p) 157 | elif e["type"] == "image": 158 | result.add(e["path"]) 159 | try: 160 | result.add(e["thumbnail_path"]) 161 | except KeyError: 162 | pass 163 | else: 164 | raise TypeError(f"Unrecognised entity type: {e['type']}") 165 | 166 | return {Path(p) for p in result} 167 | 168 | 169 | class ThumbnailConfig(TypedDict): 170 | out_dir: Path | str 171 | width: NotRequired[int] 172 | height: NotRequired[int] 173 | 174 | 175 | def create_image_entity( 176 | path: str | Path, 177 | *, 178 | background: str = "#ffffff", 179 | alt_text: str | None = None, 180 | source_url: str | None = None, 181 | thumbnail_config: ThumbnailConfig | None = None, 182 | generate_transcript: bool = False, 183 | ) -> ImageEntity: 184 | """ 185 | Create an ImageEntity for a saved image. 186 | """ 187 | from PIL import Image, ImageOps 188 | 189 | with Image.open(path) as im: 190 | # Account for EXIF orientation in the dimensions. 191 | # See https://alexwlchan.net/til/2024/photos-can-have-orientation-in-exif/ 192 | transposed_im = ImageOps.exif_transpose(im) 193 | 194 | entity: ImageEntity = { 195 | "type": "image", 196 | "path": str(path), 197 | "tint_colour": get_tint_colour(path, background=background), 198 | "width": transposed_im.width, 199 | "height": transposed_im.height, 200 | } 201 | 202 | if _is_animated(im): 203 | entity["is_animated"] = True 204 | 205 | if _has_transparency(im): 206 | entity["has_transparency"] = True 207 | 208 | if thumbnail_config is not None: 209 | entity["thumbnail_path"] = _create_thumbnail(path, thumbnail_config) 210 | 211 | if alt_text is not None and generate_transcript: 212 | raise TypeError("You cannot set alt_text and generate_transcript=True!") 213 | 214 | elif alt_text is not None: 215 | entity["alt_text"] = alt_text 216 | elif generate_transcript: 217 | transcript = _get_transcript(path) 218 | if transcript is not None: 219 | entity["alt_text"] = transcript 220 | 221 | if source_url is not None: 222 | entity["source_url"] = source_url 223 | 224 | return entity 225 | 226 | 227 | def create_video_entity( 228 | video_path: str | Path, 229 | *, 230 | poster_path: str | Path, 231 | subtitles_path: str | Path | None = None, 232 | source_url: str | None = None, 233 | autoplay: bool = False, 234 | thumbnail_config: ThumbnailConfig | None = None, 235 | background: str = "#ffffff", 236 | ) -> VideoEntity: 237 | """ 238 | Create a video entity for files on disk. 239 | """ 240 | width, height, duration = _get_video_data(video_path) 241 | poster = create_image_entity( 242 | poster_path, thumbnail_config=thumbnail_config, background=background 243 | ) 244 | 245 | entity: VideoEntity = { 246 | "type": "video", 247 | "path": str(video_path), 248 | "width": width, 249 | "height": height, 250 | "duration": duration, 251 | "poster": poster, 252 | } 253 | 254 | if subtitles_path: 255 | entity["subtitles_path"] = str(subtitles_path) 256 | 257 | if source_url: 258 | entity["source_url"] = source_url 259 | 260 | if autoplay: 261 | entity["autoplay"] = autoplay 262 | 263 | return entity 264 | 265 | 266 | def _is_animated(im: "PIL.Image.Image") -> bool: 267 | """ 268 | Returns True if an image is animated, False otherwise. 269 | """ 270 | return getattr(im, "is_animated", False) 271 | 272 | 273 | def _has_transparency(im: "PIL.Image.Image") -> bool: 274 | """ 275 | Returns True if an image has transparent pixels, False otherwise. 276 | 277 | By Vinyl Da.i'gyu-Kazotetsu on Stack Overflow: 278 | https://stackoverflow.com/a/58567453 279 | """ 280 | if im.info.get("transparency", None) is not None: 281 | return True 282 | if im.mode == "P": 283 | transparent = im.info.get("transparency", -1) 284 | for _, index in im.getcolors(): # type: ignore 285 | # TODO: Find an image that hits this branch, so I can 286 | # include it in the test suite. 287 | if index == transparent: # pragma: no cover 288 | return True 289 | elif im.mode == "RGBA": 290 | extrema = im.getextrema() 291 | if extrema[3][0] < 255: # type: ignore 292 | return True 293 | return False 294 | 295 | 296 | def get_tint_colour(path: str | Path, *, background: str) -> str: 297 | """ 298 | Get the tint colour for an image. 299 | """ 300 | if background == "white": 301 | background = "#ffffff" 302 | elif background == "black": 303 | background = "#000000" 304 | 305 | result = subprocess.check_output( 306 | ["dominant_colours", str(path), "--best-against-bg", background], text=True 307 | ) 308 | return result.strip() 309 | 310 | 311 | def _get_transcript(path: str | Path) -> str | None: 312 | """ 313 | Get the transcript for an image (if any). 314 | """ 315 | result = subprocess.check_output(["get_live_text", str(path)], text=True) 316 | 317 | return result.strip() or None 318 | 319 | 320 | def _create_thumbnail(path: str | Path, thumbnail_config: ThumbnailConfig) -> str: 321 | """ 322 | Create a thumbnail for an image and return the path. 323 | """ 324 | cmd = ["create_thumbnail", str(path), "--out-dir", thumbnail_config["out_dir"]] 325 | 326 | if "width" in thumbnail_config: 327 | cmd.extend(["--width", str(thumbnail_config["width"])]) 328 | 329 | if "height" in thumbnail_config: 330 | cmd.extend(["--height", str(thumbnail_config["height"])]) 331 | 332 | return subprocess.check_output(cmd, text=True) 333 | 334 | 335 | def _get_video_data(video_path: str | Path) -> tuple[int, int, str]: 336 | """ 337 | Returns the dimensions and duration of a video, as a width/height fraction. 338 | """ 339 | cmd = [ 340 | "ffprobe", 341 | # 342 | # verbosity level = error 343 | "-v", 344 | "error", 345 | # 346 | # only get information about the first video stream 347 | "-select_streams", 348 | "v:0", 349 | # 350 | # only gather the entries I'm interested in 351 | "-show_entries", 352 | "stream=width,height,sample_aspect_ratio,duration", 353 | # 354 | # print the duration in HH:MM:SS.microseconds format 355 | "-sexagesimal", 356 | # 357 | # print output in JSON, which is easier to parse 358 | "-print_format", 359 | "json", 360 | # 361 | # input file 362 | str(video_path), 363 | ] 364 | 365 | output = subprocess.check_output(cmd) 366 | ffprobe_resp = json.loads(output) 367 | 368 | # The output will be structured something like: 369 | # 370 | # { 371 | # "streams": [ 372 | # { 373 | # "width": 1920, 374 | # "height": 1080, 375 | # "sample_aspect_ratio": "45:64" 376 | # } 377 | # ], 378 | # … 379 | # } 380 | # 381 | # If the video doesn't specify a pixel aspect ratio, then it won't 382 | # have a `sample_aspect_ratio` key. 383 | video_stream = ffprobe_resp["streams"][0] 384 | 385 | try: 386 | pixel_aspect_ratio = Fraction( 387 | video_stream["sample_aspect_ratio"].replace(":", "/") 388 | ) 389 | except KeyError: 390 | pixel_aspect_ratio = Fraction(1) 391 | 392 | width = round(video_stream["width"] * pixel_aspect_ratio) 393 | height = video_stream["height"] 394 | duration = video_stream["duration"] 395 | 396 | return width, height, duration 397 | -------------------------------------------------------------------------------- /tests/test_media.py: -------------------------------------------------------------------------------- 1 | """Tests for `chives.media`.""" 2 | 3 | from pathlib import Path 4 | from typing import Any 5 | 6 | from PIL import Image 7 | import pytest 8 | 9 | from chives.media import ( 10 | create_image_entity, 11 | create_video_entity, 12 | get_media_paths, 13 | is_av1_video, 14 | ) 15 | 16 | 17 | @pytest.fixture 18 | def fixtures_dir() -> Path: 19 | """ 20 | Returns the directory where media fixtures are stored. 21 | """ 22 | return Path("tests/fixtures/media") 23 | 24 | 25 | def test_is_av1_video(fixtures_dir: Path) -> None: 26 | """is_av1_video correctly detects AV1 videos.""" 27 | # These two videos were downloaded from 28 | # https://test-videos.co.uk/sintel/mp4-h264 and 29 | # https://test-videos.co.uk/sintel/mp4-av1 30 | assert not is_av1_video(fixtures_dir / "Sintel_360_10s_1MB_H264.mp4") 31 | assert is_av1_video(fixtures_dir / "Sintel_360_10s_1MB_AV1.mp4") 32 | 33 | 34 | class TestCreateImageEntity: 35 | """ 36 | Tests for create_image_entity(). 37 | """ 38 | 39 | def test_basic_image(self, fixtures_dir: Path) -> None: 40 | """ 41 | Get an image entity for a basic blue square. 42 | """ 43 | entity = create_image_entity(fixtures_dir / "blue.png") 44 | assert entity == { 45 | "type": "image", 46 | "path": "tests/fixtures/media/blue.png", 47 | "width": 32, 48 | "height": 16, 49 | "tint_colour": "#0000ff", 50 | } 51 | 52 | @pytest.mark.parametrize( 53 | "filename", 54 | [ 55 | # This is a solid blue image with a section in the middle deleted 56 | "blue_with_hole.png", 57 | # 58 | # An asteroid belt drawn in TikZ by TeX.SE user Qrrbrbirlbel, 59 | # which has `transparency` in its im.info. 60 | # Downloaded from http://tex.stackexchange.com/a/111974/9668 61 | "asteroid_belt.png", 62 | ], 63 | ) 64 | def test_image_with_transparency(self, fixtures_dir: Path, filename: str) -> None: 65 | """ 66 | If an image has transparent pixels, then the entity has 67 | `has_transparency=True`. 68 | """ 69 | entity = create_image_entity(fixtures_dir / filename) 70 | assert entity["has_transparency"] 71 | 72 | @pytest.mark.parametrize( 73 | "filename", 74 | [ 75 | "blue.png", 76 | "space.jpg", 77 | # 78 | # An animated electric field drawn in TikZ. 79 | # Downloaded from https://tex.stackexchange.com/a/158930/9668 80 | "electric_field.gif", 81 | ], 82 | ) 83 | def test_image_without_transparency( 84 | self, fixtures_dir: Path, filename: str 85 | ) -> None: 86 | """ 87 | If an image has no transparent pixels, then the entity doesn't 88 | have a `has_transparency` key. 89 | """ 90 | entity = create_image_entity(fixtures_dir / filename) 91 | assert "has_transparency" not in entity 92 | 93 | # These test files were downloaded from Dave Perrett repo: 94 | # https://github.com/recurser/exif-orientation-examples 95 | 96 | @pytest.mark.parametrize( 97 | "filename", 98 | [ 99 | "Landscape_0.jpg", 100 | "Landscape_1.jpg", 101 | "Landscape_2.jpg", 102 | "Landscape_3.jpg", 103 | "Landscape_4.jpg", 104 | "Landscape_5.jpg", 105 | "Landscape_6.jpg", 106 | "Landscape_7.jpg", 107 | "Landscape_8.jpg", 108 | ], 109 | ) 110 | def test_accounts_for_exif_orientation( 111 | self, fixtures_dir: Path, filename: str 112 | ) -> None: 113 | """ 114 | The dimensions are the display dimensions, which accounts for 115 | the EXIF orientation. 116 | """ 117 | entity = create_image_entity(fixtures_dir / filename) 118 | assert (entity["width"], entity["height"]) == (1800, 1200) 119 | 120 | def test_animated_image(self, fixtures_dir: Path) -> None: 121 | """ 122 | If an image is animated, the entity has `is_animated=True`. 123 | """ 124 | # An animated electric field drawn in TikZ. 125 | # Downloaded from https://tex.stackexchange.com/a/158930/9668 126 | entity = create_image_entity(fixtures_dir / "electric_field.gif") 127 | assert entity["is_animated"] 128 | 129 | def test_other_attrs_are_forwarded(self, fixtures_dir: Path) -> None: 130 | """ 131 | The `alt_text` and `source_url` values are forwarded to the 132 | final entity. 133 | """ 134 | entity = create_image_entity( 135 | fixtures_dir / "blue.png", 136 | alt_text="This is the alt text", 137 | source_url="https://example.com/blue.png", 138 | ) 139 | 140 | assert entity["alt_text"] == "This is the alt text" 141 | assert entity["source_url"] == "https://example.com/blue.png" 142 | 143 | def test_alt_text_and_generate_transcript_is_error( 144 | self, fixtures_dir: Path 145 | ) -> None: 146 | """ 147 | You can't pass `alt_text` and `generate_transcript` at the same time. 148 | """ 149 | with pytest.raises(TypeError): 150 | create_image_entity( 151 | fixtures_dir / "blue.png", 152 | alt_text="This is the alt text", 153 | generate_transcript=True, 154 | ) 155 | 156 | def test_generate_transcript(self, fixtures_dir: Path) -> None: 157 | """ 158 | If you pass `generate_transcript=True`, the image is OCR'd for alt text. 159 | """ 160 | entity = create_image_entity( 161 | fixtures_dir / "underlined_text.png", generate_transcript=True 162 | ) 163 | assert entity["alt_text"] == "I visited Berlin in Germany." 164 | 165 | def test_generate_transcript_if_no_text(self, fixtures_dir: Path) -> None: 166 | """ 167 | If you pass `generate_transcript=True` for an image with no text, 168 | you don't get any alt text. 169 | """ 170 | entity = create_image_entity( 171 | fixtures_dir / "blue.png", generate_transcript=True 172 | ) 173 | assert "alt_text" not in entity 174 | 175 | def test_create_thumbnail_by_width( 176 | self, fixtures_dir: Path, tmp_path: Path 177 | ) -> None: 178 | """ 179 | Create a thumbnail by width. 180 | """ 181 | entity = create_image_entity( 182 | fixtures_dir / "blue.png", 183 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 10}, 184 | ) 185 | 186 | assert Path(entity["thumbnail_path"]).exists() 187 | 188 | with Image.open(entity["thumbnail_path"]) as im: 189 | assert im.width == 10 190 | 191 | def test_create_thumbnail_by_height( 192 | self, fixtures_dir: Path, tmp_path: Path 193 | ) -> None: 194 | """ 195 | Create a thumbnail by height. 196 | """ 197 | entity = create_image_entity( 198 | fixtures_dir / "blue.png", 199 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "height": 5}, 200 | ) 201 | 202 | assert Path(entity["thumbnail_path"]).exists() 203 | 204 | with Image.open(entity["thumbnail_path"]) as im: 205 | assert im.height == 5 206 | 207 | @pytest.mark.parametrize( 208 | "background, tint_colour", 209 | [ 210 | ("white", "#005493"), 211 | ("black", "#b3fdff"), 212 | ("#111111", "#b3fdff"), 213 | ], 214 | ) 215 | def test_tint_colour_is_based_on_background( 216 | self, fixtures_dir: Path, background: str, tint_colour: str 217 | ) -> None: 218 | """ 219 | The tint colour is based to suit the background. 220 | """ 221 | # This is a checkerboard pattern made of 2 different shades of 222 | # turquoise, a light and a dark. 223 | entity = create_image_entity( 224 | fixtures_dir / "checkerboard.png", background=background 225 | ) 226 | assert entity["tint_colour"] == tint_colour 227 | 228 | 229 | class TestCreateVideoEntity: 230 | """ 231 | Tests for `create_video_entity()`. 232 | """ 233 | 234 | def test_basic_video(self, fixtures_dir: Path) -> None: 235 | """ 236 | Get a video entity for a basic video. 237 | """ 238 | # This video was downloaded from 239 | # https://test-videos.co.uk/sintel/mp4-h264 240 | entity = create_video_entity( 241 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 242 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 243 | ) 244 | assert entity == { 245 | "type": "video", 246 | "path": "tests/fixtures/media/Sintel_360_10s_1MB_H264.mp4", 247 | "width": 640, 248 | "height": 360, 249 | "duration": "0:00:10.000000", 250 | "poster": { 251 | "type": "image", 252 | "path": "tests/fixtures/media/Sintel_360_10s_1MB_H264.png", 253 | "tint_colour": "#020202", 254 | "width": 640, 255 | "height": 360, 256 | }, 257 | } 258 | 259 | def test_other_attrs_are_forwarded(self, fixtures_dir: Path) -> None: 260 | """ 261 | The `subtitles_path`, `source_url` and `autoplay` values are 262 | forwarded to the final entity. 263 | """ 264 | entity = create_video_entity( 265 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 266 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 267 | subtitles_path=fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt", 268 | source_url="https://test-videos.co.uk/sintel/mp4-h264", 269 | autoplay=True, 270 | ) 271 | 272 | assert ( 273 | entity["subtitles_path"] 274 | == "tests/fixtures/media/Sintel_360_10s_1MB_H264.en.vtt" 275 | ) 276 | assert entity["source_url"] == "https://test-videos.co.uk/sintel/mp4-h264" 277 | assert entity["autoplay"] 278 | 279 | def test_gets_display_dimensions(self, fixtures_dir: Path) -> None: 280 | """ 281 | The width/height dimensions are based on the display aspect ratio, 282 | not the storage aspect ratio. 283 | 284 | See https://alexwlchan.net/2025/square-pixels/ 285 | """ 286 | # This is a short clip of https://www.youtube.com/watch?v=HHhyznZ2u4E 287 | entity = create_video_entity( 288 | fixtures_dir / "Mars 2020 EDL Remastered [HHhyznZ2u4E].mp4", 289 | poster_path=fixtures_dir / "Mars 2020 EDL Remastered [HHhyznZ2u4E].jpg", 290 | ) 291 | 292 | assert entity["width"] == 1350 293 | assert entity["height"] == 1080 294 | 295 | def test_video_without_sample_aspect_ratio(self, fixtures_dir: Path) -> None: 296 | """ 297 | Get the width/height dimensions of a video that doesn't have 298 | `sample_aspect_ratio` in its metadata. 299 | """ 300 | # This is a short clip from Wings (1927). 301 | entity = create_video_entity( 302 | fixtures_dir / "wings_tracking_shot.mp4", 303 | poster_path=fixtures_dir / "wings_tracking_shot.jpg", 304 | ) 305 | 306 | assert entity["width"] == 960 307 | assert entity["height"] == 720 308 | 309 | @pytest.mark.parametrize( 310 | "background, tint_colour", 311 | [ 312 | ("white", "#005493"), 313 | ("black", "#b3fdff"), 314 | ("#111111", "#b3fdff"), 315 | ], 316 | ) 317 | def test_tint_colour_is_based_on_background( 318 | self, fixtures_dir: Path, background: str, tint_colour: str 319 | ) -> None: 320 | """ 321 | The tint colour is based to suit the background. 322 | """ 323 | # The poster image is a checkerboard pattern made of 2 different 324 | # shades of turquoise, a light and a dark. 325 | entity = create_video_entity( 326 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 327 | poster_path=fixtures_dir / "checkerboard.png", 328 | background=background, 329 | ) 330 | assert entity["poster"]["tint_colour"] == tint_colour 331 | 332 | def test_video_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None: 333 | """ 334 | Create a low-resolution thumbnail of the poster image. 335 | """ 336 | entity = create_video_entity( 337 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 338 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 339 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300}, 340 | ) 341 | 342 | assert entity["poster"]["thumbnail_path"] == str( 343 | tmp_path / "thumbnails/Sintel_360_10s_1MB_H264.png" 344 | ) 345 | assert Path(entity["poster"]["thumbnail_path"]).exists() 346 | 347 | 348 | class TestGetMediaPaths: 349 | """ 350 | Tests for `get_media_paths`. 351 | """ 352 | 353 | def test_basic_image(self, fixtures_dir: Path) -> None: 354 | """ 355 | An image with no thumbnail only has one path: the image. 356 | """ 357 | entity = create_image_entity(fixtures_dir / "blue.png") 358 | assert get_media_paths(entity) == {fixtures_dir / "blue.png"} 359 | 360 | def test_image_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None: 361 | """ 362 | An image with a thumbnail has two paths: the video and the 363 | thumbnail. 364 | """ 365 | entity = create_image_entity( 366 | fixtures_dir / "blue.png", 367 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300}, 368 | ) 369 | assert get_media_paths(entity) == { 370 | fixtures_dir / "blue.png", 371 | tmp_path / "thumbnails/blue.png", 372 | } 373 | 374 | def test_video(self, fixtures_dir: Path) -> None: 375 | """ 376 | A video has two paths: the video and the poster image. 377 | """ 378 | entity = create_video_entity( 379 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 380 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 381 | ) 382 | assert get_media_paths(entity) == { 383 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 384 | fixtures_dir / "Sintel_360_10s_1MB_H264.png", 385 | } 386 | 387 | def test_video_with_subtitles(self, fixtures_dir: Path) -> None: 388 | """ 389 | A video with subtitles has three paths: the video, the subtitles, 390 | and the poster image. 391 | """ 392 | entity = create_video_entity( 393 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 394 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 395 | subtitles_path=fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt", 396 | ) 397 | assert get_media_paths(entity) == { 398 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 399 | fixtures_dir / "Sintel_360_10s_1MB_H264.png", 400 | fixtures_dir / "Sintel_360_10s_1MB_H264.en.vtt", 401 | } 402 | 403 | def test_video_with_thumbnail(self, fixtures_dir: Path, tmp_path: Path) -> None: 404 | """ 405 | A video with a poster thumbnail has three paths: the video, 406 | the poster image, and the poster thumbnail. 407 | """ 408 | entity = create_video_entity( 409 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 410 | poster_path=fixtures_dir / "Sintel_360_10s_1MB_H264.png", 411 | thumbnail_config={"out_dir": tmp_path / "thumbnails", "width": 300}, 412 | ) 413 | assert get_media_paths(entity) == { 414 | fixtures_dir / "Sintel_360_10s_1MB_H264.mp4", 415 | fixtures_dir / "Sintel_360_10s_1MB_H264.png", 416 | tmp_path / "thumbnails/Sintel_360_10s_1MB_H264.png", 417 | } 418 | 419 | @pytest.mark.parametrize("bad_entity", [{}, {"type": "shape"}]) 420 | def test_unrecognised_entity_is_error(self, bad_entity: Any) -> None: 421 | """ 422 | Getting media paths for an unrecognised entity type is a TypeError. 423 | """ 424 | with pytest.raises(TypeError): 425 | get_media_paths(bad_entity) 426 | --------------------------------------------------------------------------------