├── docs
    ├── imgs
    ├── examples
    ├── _static
    │   ├── favicon.png
    │   ├── README.md
    │   ├── mathjax.js
    │   └── custom_css.css
    ├── citation.md
    ├── further_details
    │   ├── benchmarks.md
    │   └── acknowledgements.md
    ├── _overrides
    │   └── partials
    │   │   └── source.html
    ├── requirements.txt
    └── index.md
├── test
    ├── __init__.py
    ├── test_question_answering
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_uasquad_core.py
    │   ├── test_uasquad.py
    │   └── test_uasquad_hardening.py
    ├── test_text_classification
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_classification.py
    │   └── test_news_dataset_hardening.py
    ├── test_token_classification
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_token.py
    │   └── test_pos_dataset_hardening.py
    └── conftest.py
├── imgs
    └── NaUKMA.png
├── ua_datasets
    ├── question_answering
    │   ├── __init__.py
    │   └── uasquad_question_answering.py
    ├── token_classification
    │   ├── __init__.py
    │   └── part_of_speech.py
    ├── text_classification
    │   ├── __init__.py
    │   └── news_classification.py
    ├── __init__.py
    └── utils.py
├── CITATION.cff
├── .github
    └── workflows
    │   ├── ci.yml
    │   ├── build_docs.yml
    │   └── release.yml
├── .pre-commit-config.yaml
├── LICENSE
├── pyproject.toml
├── examples
    ├── mova_pos.md
    ├── ua_news.md
    └── ua_squad.md
├── .gitignore
├── mkdocs.yml
├── README.md
└── uv.lock


/docs/imgs:
--------------------------------------------------------------------------------
1 | ../imgs/


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/examples:
--------------------------------------------------------------------------------
1 | ../examples/


--------------------------------------------------------------------------------
/test/test_question_answering/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/test_text_classification/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/test_token_classification/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/imgs/NaUKMA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fido-ai/ua-datasets/HEAD/imgs/NaUKMA.png


--------------------------------------------------------------------------------
/docs/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fido-ai/ua-datasets/HEAD/docs/_static/favicon.png


--------------------------------------------------------------------------------
/ua_datasets/question_answering/__init__.py:
--------------------------------------------------------------------------------
1 | from ua_datasets.question_answering.uasquad_question_answering import UaSquadDataset
2 | 
3 | __all__ = ["UaSquadDataset"]
4 | 


--------------------------------------------------------------------------------
/ua_datasets/token_classification/__init__.py:
--------------------------------------------------------------------------------
1 | from ua_datasets.token_classification.part_of_speech import MovaInstitutePOSDataset
2 | 
3 | __all__ = ["MovaInstitutePOSDataset"]
4 | 


--------------------------------------------------------------------------------
/ua_datasets/text_classification/__init__.py:
--------------------------------------------------------------------------------
1 | from ua_datasets.text_classification.news_classification import NewsClassificationDataset
2 | 
3 | __all__ = ["NewsClassificationDataset"]
4 | 


--------------------------------------------------------------------------------
/docs/_static/README.md:
--------------------------------------------------------------------------------
1 | The favicon is adapted from `math-integral-box` from https://materialdesignicons.com, found by way of https://pictogrammers.com. Specifically it has been adapted by filling in the integral with black. (Originally it has 100% alpha.)
2 | 


--------------------------------------------------------------------------------
/ua_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from ua_datasets.question_answering import UaSquadDataset
 2 | from ua_datasets.text_classification import NewsClassificationDataset
 3 | from ua_datasets.token_classification import MovaInstitutePOSDataset
 4 | 
 5 | __all__ = [
 6 |     "MovaInstitutePOSDataset",
 7 |     "NewsClassificationDataset",
 8 |     "UaSquadDataset",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/test/test_token_classification/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ua_datasets.token_classification.part_of_speech import MovaInstitutePOSDataset
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def dataset(dataset_root: Path) -> MovaInstitutePOSDataset:
10 |     return MovaInstitutePOSDataset(root=dataset_root, download=True)
11 | 


--------------------------------------------------------------------------------
/docs/_static/mathjax.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |   tex: {
 3 |     inlineMath: [["\\(", "\\)"]],
 4 |     displayMath: [["\\[", "\\]"]],
 5 |     processEscapes: true,
 6 |     processEnvironments: true
 7 |   },
 8 |   options: {
 9 |     ignoreHtmlClass: ".*|",
10 |     processHtmlClass: "arithmatex"
11 |   }
12 | };
13 | 
14 | document$.subscribe(() => {
15 |   MathJax.typesetPromise()
16 | })
17 | 


--------------------------------------------------------------------------------
/docs/citation.md:
--------------------------------------------------------------------------------
 1 | If you found this library useful in academic research, please cite:
 2 | 
 3 | ```bibtex
 4 | @software{ua_datasets_2021,
 5 |   author = {Ivanyuk-Skulskiy, Bogdan and Zaliznyi, Anton and Reshetar, Oleksand and Protsyk, Oleksiy and Romanchuk, Bohdan and Shpihanovych, Vladyslav},
 6 |   month = oct,
 7 |   title = {ua_datasets: a collection of Ukrainian language datasets},
 8 |   url = {https://github.com/fido-ai/ua-datasets},
 9 |   version = {1.0.0},
10 |   year = {2021}
11 | }
12 | ```
13 | 
14 | (Also consider starring the project [on GitHub](https://github.com/fido-ai/ua-datasets)!)
15 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Ivanyuk-Skulskiy"
 5 |   given-names: "Bogdan"
 6 | - family-names: "Zaliznyi"
 7 |   given-names: "Anton"
 8 | - family-names: "Reshetar"
 9 |   given-names: "Oleksand"
10 | - family-names: "Protsyk"
11 |   given-names: "Oleksiy"
12 | - family-names: "Romanchuk"
13 |   given-names: "Bohdan"
14 | - family-names: "Shpihanovych"
15 |   given-names: "Vladyslav"
16 | title: "ua_datasets"
17 | version: 0.0.1
18 | date-released: 2021-10-09
19 | url: "https://github.com/fido-ai/ua-datasets"
20 | 


--------------------------------------------------------------------------------
/test/test_text_classification/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ua_datasets import NewsClassificationDataset
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def train_dataset(dataset_root: Path) -> NewsClassificationDataset:
10 |     # Pass Path directly to satisfy type checker (was str via as_posix()).
11 |     return NewsClassificationDataset(root=dataset_root, split="train")
12 | 
13 | 
14 | @pytest.fixture(scope="module")
15 | def test_dataset(dataset_root: Path) -> NewsClassificationDataset:
16 |     return NewsClassificationDataset(root=dataset_root, split="test")
17 | 


--------------------------------------------------------------------------------
/docs/further_details/benchmarks.md:
--------------------------------------------------------------------------------
 1 | # UA-Bench
 2 | 
 3 | The goal of __UA-Bench__ is to track the real progress in Ukrainian language model developments.
 4 | 
 5 | ## UA-SQuAD
 6 | 
 7 | | Method | Test results | Extra data | Arichitecture | Venue
 8 | | ------------- |:--------:|:-------:|:------:|:------:|
 9 | 
10 | - [robinhda/ukrainian-qa](https://github.com/robinhad/ukrainian-qa)
11 | 
12 | ## UA-News
13 | 
14 | | Method | Test results | Extra data | Arichitecture | Venue
15 | | ------------- |:--------:|:-------:|:------:|:------:|
16 | 
17 | 
18 | ## Mova Institute POS
19 | 
20 | | Method | Test results | Extra data | Arichitecture | Venue
21 | | ------------- |:--------:|:-------:|:------:|:------:|
22 | 


--------------------------------------------------------------------------------
/docs/_overrides/partials/source.html:
--------------------------------------------------------------------------------
 1 | {% import "partials/language.html" as lang with context %}
 2 | <a href="{{ config.repo_url }}" title="{{ lang.t('source.link.title') }}" class="md-source" data-md-component="source">
 3 |   <div class="md-source__icon md-icon">
 4 |     {% set icon = config.theme.icon.repo or "fontawesome/brands/git-alt" %}
 5 |     {% include ".icons/" ~ icon ~ ".svg" %}
 6 |   </div>
 7 |   <div class="md-source__repository">
 8 |     {{ config.repo_name }}
 9 |   </div>
10 | </a>
11 | {% if config.theme.twitter_url %}
12 | <a href="{{ config.theme.twitter_url }}" title="Go to Twitter" class="md-source">
13 |   <div class="md-source__icon md-icon">
14 |     {% include ".icons/fontawesome/brands/twitter.svg" %}
15 |   </div>
16 |   <div class="md-source__repository">
17 |     {{ config.theme.twitter_name }}
18 |   </div>
19 | </a>
20 | {% endif %}
21 | 


--------------------------------------------------------------------------------
/docs/further_details/acknowledgements.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | ## We thank our contributors for UA-SQuAD dataset
4 | Kyrpa Mykyta, Ivan Makarov, Tepla Sofiia, Chudnovska Daria, Fedenko Anna, Zaremba Anna, Krainia Daria, Budenkova Marharyta, Butunaieva Diana, Stanislavska Kateryna, Samorodova Sofiia, Martynyshyn Yuliia, Matviienko Iryna, Bezruka Anastasiia, Mostova Mariia, Stepanenko Liubomyr, Bondarenko Vitaliia, Fedorenko Polina, Sydorka Bohdana, Okhrimenko Mykhailo, Hryha Ruslana, Ustynova Olha, Kondratenko Dmytro, Chornomorets Yelyzaveta, Heresh Yuliia, Hynku Anna-Mariia, Tarasiuk Kateryna, Demian Biliavskyi, Piatushko Ruslana, Pakholchak Kateryna, Barabukha Mariia, Poltorak Yuliia, Yuliia Fedor, Usenko Viktoriia, Balanchuk Yana, Kramchenkov Dmytro, Yatsiuk Mariia, Melnyk Tetiana, Biloverbenko Illia, Boiko Khrystyna, Steshenko Kateryna, Korcheva Anna, Syzonenko Anastasiia, Malysheva Alina, Yaroslava Kushcheva, Valeriia Denysenko
5 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Updated requirements to support plugins (autorefs needs MkDocs>=1.5 for event_priority)
 2 | mkdocs>=1.5.3,<2.0            # Core; event_priority available from 1.5
 3 | mkdocs-material>=9.5.0,<10.0  # Modern theme compatible with MkDocs 1.5+
 4 | pymdown-extensions>=10.8,<11.0
 5 | mkdocstrings>=0.24.0,<0.25.0  # Current stable API; works with mkdocs>=1.5
 6 | mkdocstrings-python>=1.10.0,<2.0  # Separate provider package for newer mkdocstrings
 7 | mknotebooks>=0.8.0,<0.9       # Compatible with MkDocs 1.5
 8 | mkdocs-autorefs>=1.0.1,<2.0   # Provides autorefs plugin using event_priority
 9 | mkdocs-include-exclude-files>=0.0.1
10 | jinja2>=3.1.4,<4.0            # Newer Jinja2 fine with updated mkdocstrings
11 | nbconvert>=7.16.0,<8.0        # Modern nbconvert (Python 3.11 compatible)
12 | nbformat>=5.10.0,<6.0
13 | pygments>=2.18.0,<3.0
14 | 
15 | # Legacy / project-specific utilities
16 | pytkdocs_tweaks==0.0.6        # Retain existing tweak package (validate compatibility periodically)
17 | 


--------------------------------------------------------------------------------
/test/test_question_answering/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import cast
 3 | 
 4 | import pytest
 5 | 
 6 | from ua_datasets import UaSquadDataset
 7 | 
 8 | 
 9 | @pytest.fixture(scope="module", params=["train", "val"])
10 | def dataset(request: pytest.FixtureRequest, dataset_root: Path) -> UaSquadDataset:
11 |     """UaSquadDataset fixture parametrized over splits.
12 | 
13 |     Skips gracefully if the remote resource is unavailable or filenames differ
14 |     from the assumed defaults (train.json / val.json) so that other test
15 |     suites can still run.
16 |     """
17 |     split: str = request.param
18 |     try:
19 |         return UaSquadDataset(root=dataset_root, split=split, download=True)
20 |     except Exception as exc:  # pragma: no cover - network/remote variability
21 |         pytest.skip(f"Skipping UaSquadDataset {split!r} split: {exc}")
22 |         # Help mypy understand this function always returns a UaSquadDataset (skip raises)
23 |         return cast(UaSquadDataset, None)  # unreachable
24 | 


--------------------------------------------------------------------------------
/test/test_text_classification/test_classification.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ua_datasets import NewsClassificationDataset
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("idx", [1, 10, 100])
 7 | def test_dataset_type(
 8 |     idx: int, train_dataset: NewsClassificationDataset, test_dataset: NewsClassificationDataset
 9 | ) -> None:
10 |     title, text, target, _ = train_dataset[idx]
11 |     assert isinstance(title, str)
12 |     assert isinstance(text, str)
13 |     assert isinstance(target, str)
14 | 
15 |     title, text, target, _ = test_dataset[idx]
16 |     assert isinstance(title, str)
17 |     assert isinstance(text, str)
18 |     assert isinstance(target, str)
19 | 
20 | 
21 | @pytest.mark.parametrize("dataset_size", [120_417])
22 | def test_traindataset_size(dataset_size: int, train_dataset: NewsClassificationDataset) -> None:
23 |     assert len(train_dataset) == dataset_size
24 | 
25 | 
26 | @pytest.mark.parametrize("dataset_size", [30_105])
27 | def test_testdataset_size(dataset_size: int, test_dataset: NewsClassificationDataset) -> None:
28 |     assert len(test_dataset) == dataset_size
29 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     strategy:
12 |       matrix:
13 |         python-version: [ '3.10', '3.11', '3.12' ]
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v5
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 | 
24 |       - name: Install uv
25 |         run: pip install --upgrade uv
26 | 
27 |       - name: Sync dependencies
28 |         run: uv sync --dev
29 | 
30 |       - name: Lint (ruff)
31 |         run: uv run ruff check .
32 | 
33 |       - name: Lint (ruff format check)
34 |         run: uv run ruff format --check .
35 | 
36 |       - name: Type check (mypy)
37 |         run: uv run mypy || true  # remove '|| true' later to enforce strict
38 | 
39 |       - name: Test (pytest)
40 |         run: uv run pytest -q --maxfail=1 --disable-warnings
41 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: ruff-check
 5 |         name: ruff-check
 6 |         entry: ./.venv/bin/ruff
 7 |         args:
 8 |           - check
 9 |           - --exit-non-zero-on-fix
10 |         language: system
11 |         types: [python]
12 |         pass_filenames: false
13 |         always_run: true
14 |       - id: ruff-format
15 |         name: ruff-format
16 |         entry: ./.venv/bin/ruff
17 |         args: [ format ]
18 |         language: system
19 |         types: [ python ]
20 |         pass_filenames: false
21 |         always_run: true
22 |       - id: mypy
23 |         name: mypy
24 |         entry: ./.venv/bin/mypy
25 |         args:
26 |           - ua_datasets
27 |         language: system
28 |         types: [ python ]
29 |         pass_filenames: false
30 |         always_run: true
31 |       - id: uv-sort
32 |         name: uv-sort
33 |         entry: ./.venv/bin/uv-sort
34 |         language: system
35 |         pass_filenames: false
36 |       - id: uv-lock
37 |         name: uv-lock
38 |         entry: uv
39 |         args:
40 |           - lock
41 |           - --dry-run
42 |         language: system
43 |         pass_filenames: false
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Bogdan Ivanyuk-Skulskiy, Anton Zaliznyi, Oleksand Reshetar, Oleksiy Protsyk, Bohdan Romanchuk, Vladyslav Shpihanovych
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
 1 | """Global pytest configuration and shared fixtures."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import Generator
 7 | 
 8 | import pytest
 9 | 
10 | 
11 | def pytest_addoption(parser: pytest.Parser) -> None:
12 |     parser.addoption(
13 |         "--dataset-root",
14 |         action="store",
15 |         default=".data",
16 |         help="Root directory where datasets will be cached/downloaded.",
17 |     )
18 | 
19 | 
20 | @pytest.fixture(scope="session")
21 | def dataset_root(request: pytest.FixtureRequest) -> Path:
22 |     """Return the root path for dataset downloads/caches (session scoped)."""
23 |     return Path(request.config.getoption("--dataset-root")).resolve()
24 | 
25 | 
26 | @pytest.fixture(scope="session", autouse=True)
27 | def _cleanup_dataset_root(dataset_root: Path) -> Generator[None, None, None]:
28 |     """Remove the dataset root directory after the entire test session.
29 | 
30 |     Ensures no downloaded artifacts (e.g. `.data` directory) remain in the
31 |     repository after tests complete, keeping the working tree clean.
32 |     """
33 |     yield
34 |     if dataset_root.exists():
35 |         import shutil
36 | 
37 |         shutil.rmtree(dataset_root, ignore_errors=True)
38 | 


--------------------------------------------------------------------------------
/.github/workflows/build_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     strategy:
11 |       matrix:
12 |         # Quote version to avoid YAML float coercion (3.10 -> 3.1)
13 |         python-version: ['3.10']
14 |         os: [ ubuntu-latest ]
15 |     runs-on: ${{ matrix.os }}
16 |     steps:
17 |       - name: Checkout code
18 |         uses: actions/checkout@v3
19 | 
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 | 
25 |       - name: Cache build artifacts
26 |         uses: actions/cache@v4
27 |         with:
28 |           path: .cache
29 |           key: docs-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'docs/requirements.txt') }}
30 |           restore-keys: |
31 |             docs-${{ runner.os }}-
32 | 
33 |       - name: Install dependencies
34 |         run: |
35 |           python -m pip install --upgrade pip
36 |           python -m pip install .
37 |           python -m pip install -r docs/requirements.txt
38 | 
39 |       - name: Build docs
40 |         env:
41 |           PYTHONWARNINGS: ignore::DeprecationWarning
42 |         run: mkdocs gh-deploy --force --strict
43 | 


--------------------------------------------------------------------------------
/test/test_token_classification/test_token.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ua_datasets.token_classification.part_of_speech import MovaInstitutePOSDataset
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("dataset_size", [7100])
 7 | def test_dataset_size(dataset_size: int, dataset: MovaInstitutePOSDataset) -> None:
 8 |     assert len(dataset) == dataset_size
 9 | 
10 | 
11 | def test_first_sample_non_empty(dataset: MovaInstitutePOSDataset) -> None:
12 |     sample, labels = dataset[0]
13 |     assert sample, "First token sequence should not be empty"
14 |     assert labels, "First label sequence should not be empty"
15 |     assert len(sample) == len(labels), "Sample and label length must match"
16 | 
17 | 
18 | def test_unique_labels(dataset: MovaInstitutePOSDataset) -> None:
19 |     unique = dataset.unique_labels
20 |     assert isinstance(unique, set)
21 |     assert unique, "There should be at least one unique label"
22 |     # Basic sanity: POS tags often include 'NOUN' or similar; do a soft check
23 |     assert any(len(tag) > 1 for tag in unique)
24 | 
25 | 
26 | def test_iteration(dataset: MovaInstitutePOSDataset) -> None:
27 |     first = next(iter(dataset))
28 |     assert isinstance(first, tuple)
29 |     assert len(first) == 2
30 |     tokens, tags = first
31 |     assert len(tokens) == len(tags)
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ua-datasets"
 3 | version = "1.0.1"
 4 | description = "A collection of ukrainian language datasets"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = {file = "LICENSE"}
 8 | authors = [
 9 |   {name = "FIdo AI", email = "ivanyuk.skulskiy@ukma.edu.ua"},
10 | ]
11 | keywords = ["ua-datasets"]
12 | classifiers = [
13 |     "Intended Audience :: Developers",
14 |     "Intended Audience :: Education",
15 |     "Intended Audience :: Science/Research",
16 |     "Natural Language :: Ukrainian",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Programming Language :: Python :: 3",
19 |     "Programming Language :: Python :: 3.10",
20 |     "Programming Language :: Python :: 3.11",
21 |     "Programming Language :: Python :: 3.12",
22 | ]
23 | urls = {repository = "https://github.com/fido-ai/ua-datasets" }
24 | dependencies = []
25 | 
26 | [tool.setuptools]
27 | include-package-data = true
28 | 
29 | [tool.setuptools.packages.find]
30 | include = ["ua_datasets*"]
31 | 
32 | [tool.setuptools.package-data]
33 | "*" = [
34 |     "README.md",
35 |     "LICENSE",
36 |     "CITATION.cff",
37 |     "assets/*"
38 | ]
39 | 
40 | [dependency-groups]
41 | dev = [
42 |     "mypy>=1.18.2",
43 |     "pre-commit>=2.21.0",
44 |     "pytest>=7.4.4",
45 |     "ruff>=0.14.2",
46 |     "uv-sort>=0.6.1",
47 | ]
48 | 
49 | [tool.ruff]
50 | line-length = 100
51 | target-version = "py310"
52 | 
53 | [tool.ruff.lint]
54 | extend-select = ["I", "B", "C4", "SIM", "PT", "RUF"]
55 | ignore = [
56 |     # Example: "E501"  # managed by formatter if enabled
57 | ]
58 | 
59 | [tool.ruff.lint.isort]
60 | known-first-party = ["ua_datasets"]
61 | combine-as-imports = true
62 | 
63 | [tool.ruff.format]
64 | quote-style = "double"
65 | indent-style = "space"
66 | skip-magic-trailing-comma = false
67 | 
68 | [tool.mypy]
69 | python_version = "3.10"
70 | packages = ["ua_datasets"]
71 | warn_unused_configs = true
72 | warn_return_any = true
73 | warn_unused_ignores = true
74 | disallow_untyped_defs = true
75 | disallow_incomplete_defs = true
76 | no_implicit_optional = true
77 | show_error_codes = true
78 | pretty = true
79 | 


--------------------------------------------------------------------------------
/examples/mova_pos.md:
--------------------------------------------------------------------------------
 1 | # Mova Institute Part of Speech Dataset
 2 | 
 3 | [Mova Institute](https://mova.institute) Part of Speech tagging dataset to train a model using the Ukrainian language.
 4 | 
 5 | !!! Info
 6 |     Total number of files: 647
 7 |     Tokens: 141 286
 8 |     Words: 111 739
 9 |     Sentences: 8016
10 | 
11 | ## Example of usage
12 | 
13 | ### Our API
14 | 
15 | ```python
16 | from ua_datasets import MovaInstitutePOSDataset
17 | 
18 | mova = MovaInstitutePOSDataset(root='data/', download=True)
19 | 
20 | print(mova.data)
21 | print(mova.labels)
22 | ```
23 | 
24 | Sample output:
25 | 
26 | ```python
27 | Sample: ['У', 'домі', 'римського', 'патриція', 'Руфіна', 'була', 'прегарна', 'фреска', ',', ...]
28 | Labels: ['ADP', 'NOUN', 'ADJ', 'NOUN', 'PROPN', 'VERB', 'ADJ', 'NOUN', 'PUNCT', ...]
29 | ```
30 | 
31 | ## Labels description
32 | 
33 | |Primary parts of speech|Definition         |Example
34 | | -------------     |:--------------------------:|:---------------------------------:|
35 | |NOUN         |Іменник           |зображення,футбол,людина
36 | |VERB         |Дієслово          |робити,грати,співати
37 | |NUMR         |Числівник          |один,два,сто
38 | |ADV         |Прислівник         |абсолютно,безумовно,точно,яскраво |
39 | |ADJ         |Прийменник         |звичайна,веселий,грайливий,радісний|
40 | |PREP         |Прийменник         |в,у,на,під,за           |
41 | |CONJ         |Сполучник          |і,та,й,але,а            |
42 | |PART         |Частка           |не,хай,нехай,де,аби        |
43 | |__Additional parts of speech__  |
44 | |PRON            |Займенник      |ти,ми,вони,я            |
45 | |ADJP            |Дієприкметник    |Кохана,написана,прочитана,заспівана|
46 | |NUMR            |Порядковий числівник|перший,сотий,другий        |
47 | 
48 | Samples and corresponding labels:
49 | 
50 | ```
51 | У[ADP] домі[NOUN] римського[ADJ] патриція[NOUN] Руфіна[PROPN] була[VERB] прегарна[ADJ] фреска[NOUN] ...
52 | 
53 | Ходить[VERB] постійно[ADV] у[PREP] драній[ADJP].
54 | 
55 | Зробив[VERB] перший[NUMR] крок[NOUN] для[PREP] неї[PRON].
56 | 
57 | Якось[ADV] зібралися[VERB] у[PREP] нього[PRON],[PUNCT] ховаючися[VERB] від[PREP] переслідувань[NOUN] ...
58 | ```
59 | 
60 | More detailed information you can find [here](https://github.com/mova-institute/zoloto/blob/master/docs/tagset.md#%D1%80%D0%B8%D1%81%D0%B8-%D1%84%D0%BE%D1%80%D0%BC)
61 | 


--------------------------------------------------------------------------------
/test/test_question_answering/test_uasquad_core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import json
 4 | from pathlib import Path
 5 | 
 6 | from ua_datasets import UaSquadDataset
 7 | 
 8 | TRAIN_JSON = {
 9 |     "data": [
10 |         {"question": "What is Python?", "context": "Python is a language.", "answer": "a language"},
11 |         {"question": "Who created Python?", "context": "Guido created it.", "answer": "Guido"},
12 |     ]
13 | }
14 | 
15 | VAL_JSON = {
16 |     "data": [
17 |         {"question": "Where?", "context": "In Europe.", "answer": "Europe"},
18 |         {"question": "When?", "context": "In 1991.", "answer": "1991"},
19 |         {"question": "Why?", "context": "For fun.", "answer": "For fun"},
20 |     ]
21 | }
22 | 
23 | 
24 | def write_json(root: Path, name: str, obj: dict) -> Path:
25 |     p = root / name
26 |     p.write_text(json.dumps(obj), encoding="utf8")
27 |     return p
28 | 
29 | 
30 | def test_train_present_no_download(tmp_path: Path) -> None:
31 |     write_json(tmp_path, "train.json", TRAIN_JSON)
32 |     ds = UaSquadDataset(root=tmp_path, split="train", download=False)
33 |     assert len(ds) == 2
34 |     ex = ds[0]
35 |     assert isinstance(ex, dict)
36 |     assert all(isinstance(ex[k], str) and ex[k] for k in ("question", "context"))
37 |     if not ex.get("is_impossible"):
38 |         assert ex["answers"]["text"]
39 |         assert isinstance(ex["answers"]["text"][0], str)
40 | 
41 | 
42 | def test_train_missing_no_download(tmp_path: Path) -> None:
43 |     ds = UaSquadDataset(root=tmp_path, split="train", download=False)
44 |     assert len(ds) == 0
45 | 
46 | 
47 | def test_val_present_no_download(tmp_path: Path) -> None:
48 |     write_json(tmp_path, "val.json", VAL_JSON)
49 |     ds = UaSquadDataset(root=tmp_path, split="val", download=False)
50 |     assert len(ds) == 3
51 |     ex = ds[len(ds) // 2]
52 |     assert isinstance(ex, dict)
53 |     assert all(isinstance(ex[k], str) and ex[k] for k in ("question", "context"))
54 | 
55 | 
56 | def test_val_missing_no_download(tmp_path: Path) -> None:
57 |     ds = UaSquadDataset(root=tmp_path, split="val", download=False)
58 |     assert len(ds) == 0
59 | 
60 | 
61 | def test_iter_matches_len(tmp_path: Path) -> None:
62 |     write_json(tmp_path, "train.json", TRAIN_JSON)
63 |     ds = UaSquadDataset(root=tmp_path, split="train", download=False)
64 |     assert len(list(ds)) == len(ds)
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # IDE
  2 | .idea/
  3 | .vscode/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | .data/
136 | 


--------------------------------------------------------------------------------
/examples/ua_news.md:
--------------------------------------------------------------------------------
 1 | # UA-News
 2 | 
 3 | ## Dataset Summary
 4 | 
 5 | Ukrainian News is a collection of more than 150 thousand news articles, gathered from more than 20 news resources. Dataset samples are divided into 5 categories: `політика`, `спорт`, `новини`, `бізнес`, `технології`. The dataset is provided by the non-profit student's organization FIdo.ai (machine learning research division of [FIdo](https://www.facebook.com/fido.naukma/), National University of Kyiv-Mohyla Academy) for research purposes in data mining (classification, clustering, keywords extraction, etc.).
 6 | 
 7 | Dataset development is still **in progress**
 8 | 
 9 | ## Dataset Structure
10 | 
11 | __Parameters__:
12 | 
13 | - `root` : Directory path
14 | 
15 | - `download`: Whether to download data
16 | 
17 | - `split`: Which split of the data to load (train or test)
18 | 
19 | - `return_tags`: Whether to return text keywords
20 | 
21 | __Splits__:
22 | 
23 | - Train :
24 |     - File size: 324 MB
25 |     - Number of samples: 120417
26 |     - Target distribution
27 | 
28 |         `політика` : 40364 (33.5%)
29 | 
30 |         `спорт` : 40364 (33.5%)
31 | 
32 |         `новини` : 40364 (33.5%)
33 | 
34 |         `бізнес` : 40364 (33.5%)
35 | 
36 |         `технології` : 40364 (33.5%)
37 | 
38 |  - Test:
39 |     - File size: 81 MB
40 |     - Number of samples: 30105
41 |     - Target distribution
42 | 
43 |         `політика` : 40364 (33.5%)
44 | 
45 |         `спорт` : 40364 (33.5%)
46 | 
47 |         `новини` : 40364 (33.5%)
48 | 
49 |         `бізнес` : 40364 (33.5%)
50 | 
51 |         `технології` : 40364 (33.5%)
52 | 
53 | 
54 | __Data sample__
55 | ```
56 | {
57 |   "title" : 'На Донеччині зафіксували сьомий випадок коронавірусу',
58 |   "text" : 'Про це повідомив голова Донецької ОДА Павло Кириленко в Facebook ...,
59 |   "tags" : ['Донецька область', 'COVID-19', 'Новини'],
60 |   "target" : 'новини'
61 |  }
62 | ```
63 | 
64 | ## Example of usage
65 | 
66 | ### Our API
67 | 
68 | ```python
69 | from ua_datasets import NewsClassificationDataset
70 | 
71 | train_data = NewsClassificationDataset(root='data/', split='train', return_tags=True)
72 | 
73 | for title, text, tags, target in train_data:
74 |     print(title, text, tags, target)
75 | ```
76 | 
77 | ### Hugging Face 🤗 API
78 | 
79 | ```python
80 | from datasets import load_dataset
81 | 
82 | dataset = load_dataset("FIdo-AI/ua-news")
83 | 
84 | for item in dataset["train"]:
85 |     title, text, tags, target = item["title"], item["text"], item["tags"], item["target"]
86 |     print("Title: " + title)
87 |     print("Text: " + text)
88 |     print("Tags: " + tags)
89 |     print("Target: " + target)
90 | ```
91 | 


--------------------------------------------------------------------------------
/test/test_question_answering/test_uasquad.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ua_datasets import UaSquadDataset
 4 | 
 5 | 
 6 | def test_basic_integrity(dataset: UaSquadDataset) -> None:
 7 |     """At least one QA triplet is present and components are non-empty strings.
 8 | 
 9 |     If the dataset is empty (e.g. missing val split remotely) the fixture may still
10 |     supply it; in that case we skip rather than fail so CI remains green for other splits.
11 |     """
12 |     if len(dataset) == 0:
13 |         pytest.skip("Empty split provided (no samples). Skipping integrity checks.")
14 |     ex = dataset[0]
15 |     assert isinstance(ex, dict)
16 |     assert isinstance(ex.get("question"), str)
17 |     assert ex["question"].strip()
18 |     assert isinstance(ex.get("context"), str)
19 |     assert ex["context"].strip()
20 |     if not ex.get("is_impossible"):
21 |         assert ex["answers"]["text"]
22 |         assert isinstance(ex["answers"]["text"][0], str)
23 | 
24 | 
25 | def test_multiple_samples_if_available(dataset: UaSquadDataset) -> None:
26 |     """Check spaced samples (first, middle, last) when dataset is large enough."""
27 |     if len(dataset) == 0:
28 |         return
29 |     n = len(dataset)
30 |     for idx in [0, n // 2, n - 1]:
31 |         ex = dataset[idx]
32 |         assert isinstance(ex.get("question"), str)
33 |         assert ex["question"].strip()
34 |         assert isinstance(ex.get("context"), str)
35 |         assert ex["context"].strip()
36 |         if not ex.get("is_impossible"):
37 |             assert ex["answers"]["text"]
38 | 
39 | 
40 | def test_iter_first_three(dataset: UaSquadDataset) -> None:
41 |     """Iterating yields triplets of strings; limit to first three to stay quick."""
42 |     count_checked = 0
43 |     for ex in dataset:
44 |         assert isinstance(ex.get("question"), str)
45 |         assert ex["question"].strip()
46 |         assert isinstance(ex.get("context"), str)
47 |         assert ex["context"].strip()
48 |         if not ex.get("is_impossible"):
49 |             assert ex["answers"]["text"]
50 |         count_checked += 1
51 |     # If dataset non-empty ensure we actually validated at least one
52 |     assert count_checked == len(dataset)
53 | 
54 | 
55 | def test_examples_length_and_schema(dataset: UaSquadDataset) -> None:
56 |     if len(dataset) == 0:
57 |         return
58 |     examples = dataset.examples
59 |     assert len(examples) == len(dataset)
60 |     first = examples[0]
61 |     for key in ["id", "context", "question", "answers", "is_impossible"]:
62 |         assert key in first
63 |     assert isinstance(first["answers"], dict)
64 | 
65 | 
66 | def test_repr_contains_split_and_count(dataset: UaSquadDataset) -> None:
67 |     r = repr(dataset)
68 |     # Should at least mention the split string and a count marker
69 |     assert dataset.split in r
70 |     assert "examples=" in r or str(len(dataset)) in r
71 | 


--------------------------------------------------------------------------------
/examples/ua_squad.md:
--------------------------------------------------------------------------------
 1 | # UA-SQuAD
 2 | 
 3 | ## Dataset Summary
 4 | 
 5 | Ukrainian version of [Stanford Question Answering Dataset](https://rajpurkar.github.io/SQuAD-explorer/) that includes context, questions and corresponding answers. Current version of the datasets consists of 13 859 samples. Dataset development is still **in progress**.
 6 | 
 7 | !!! Info
 8 |     Number of samples: 13 859
 9 |     Number of questions without answers: 2 927
10 |     File size: 17.1 MB
11 | 
12 | ### Data sample (HF-style)
13 | 
14 | ```json
15 | {
16 |     "id": "3d9f1c2e7a4b1f20",
17 |     "title": "DONDA",
18 |     "context": "5 січня 2012 року Вест оголосив про створення компанії ...",
19 |     "question": "Якою була мета нової творчої компанії DONDA, створеної Каньє?",
20 |     "answers": {"text": ["виготовлення продуктів та поширення досвіду, які люди хочуть отримати й можуть собі дозволити"], "answer_start": [123]},
21 |     "is_impossible": false
22 | }
23 | ```
24 | 
25 | ## Example of usage
26 | 
27 | ### Python API (HF-style examples)
28 | 
29 | ```python
30 | from ua_datasets import UaSquadDataset
31 | 
32 | qa_dataset = UaSquadDataset("data/", split="train", download=True)
33 | 
34 | for ex in qa_dataset:  # each ex is a dict
35 |     print("Question:", ex["question"])
36 |     print("Answers:", ex["answers"]["text"])  # list (may be empty if is_impossible)
37 |     if ex.get("is_impossible"):
38 |         print("(No answer — impossible question)")
39 |     break
40 | ```
41 | 
42 | ### Optional: DatasetDict helper (no external Hub required)
43 | 
44 | If you have the optional `datasets` library installed, you can build a local `DatasetDict`
45 | using the in-package helper (this does NOT call the Hugging Face Hub API if the JSON
46 | files are already cached locally):
47 | 
48 | ```python
49 | from ua_datasets.question_answering.uasquad_question_answering import load_ua_squad_v2
50 | 
51 | dd = load_ua_squad_v2(root="data/ua_squad", download=True)  # returns a datasets.DatasetDict
52 | row = dd["train"][0]
53 | print(row["question"], row["answers"]["text"], row["is_impossible"])
54 | ```
55 | 
56 | If `datasets` is not installed this helper will raise a `RuntimeError`; install it with:
57 | 
58 | ```bash
59 | uv add datasets  # or: uv add datasets
60 | ```
61 | 
62 | If you don't need a Hugging Face `Dataset`, stick with the pure-Python iteration example above.
63 | 
64 | ### Migration Note
65 | 
66 | Legacy versions exposed `(question, context, answer)` tuples and keys `Question/Context/Answer` in raw JSON; these have been replaced by the standard SQuAD v2 schema shown above. Update loops to: `for ex in ds: ex['question'], ex['answers']['text']`.
67 | 
68 | ## We thank our contributors
69 | 
70 | Kyrpa Mykyta, Ivan Makarov, Tepla Sofiia, Chudnovska Daria, Fedenko Anna, Zaremba Anna, Krainia Daria, Budenkova Marharyta, Butunaieva Diana, Stanislavska Kateryna, Samorodova Sofiia, Martynyshyn Yuliia, Matviienko Iryna, Bezruka Anastasiia, Mostova Mariia, Stepanenko Liubomyr, Bondarenko Vitaliia, Fedorenko Polina, Sydorka Bohdana, Okhrimenko Mykhailo, Hryha Ruslana, Ustynova Olha, Kondratenko Dmytro, Chornomorets Yelyzaveta, Heresh Yuliia, Hynku Anna-Mariia, Tarasiuk Kateryna, Demian Biliavskyi, Piatushko Ruslana, Pakholchak Kateryna, Barabukha Mariia, Poltorak Yuliia, Yuliia Fedor, Usenko Viktoriia, Balanchuk Yana, Kramchenkov Dmytro, Yatsiuk Mariia, Melnyk Tetiana, Biloverbenko Illia, Boiko Khrystyna, Steshenko Kateryna, Korcheva Anna, Syzonenko Anastasiia, Malysheva Alina, Yaroslava Kushcheva, Valeriia Denysenko
71 | 


--------------------------------------------------------------------------------
/test/test_token_classification/test_pos_dataset_hardening.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ua_datasets.token_classification.part_of_speech import (
 6 |     MovaInstitutePOSDataset,
 7 |     ParseError,
 8 | )
 9 | 
10 | 
11 | @pytest.fixture
12 | def tmp_dataset_root(tmp_path: Path) -> Path:
13 |     return tmp_path
14 | 
15 | 
16 | def _write(root: Path, name: str, content: str) -> None:
17 |     (root / name).write_text(content, encoding="utf8")
18 | 
19 | 
20 | def test_final_sentence_without_trailing_newline(tmp_dataset_root: Path) -> None:
21 |     content = "1\tToken\t_\tNOUN\n"  # no trailing blank line
22 |     _write(tmp_dataset_root, "final.conllu.txt", content)
23 |     ds: MovaInstitutePOSDataset = MovaInstitutePOSDataset(
24 |         root=tmp_dataset_root, download=False, file_name="final.conllu.txt"
25 |     )
26 |     assert len(ds) == 1
27 |     assert ds[0][0] == ["Token"]
28 | 
29 | 
30 | def test_comments_and_blank_lines(tmp_dataset_root: Path) -> None:
31 |     content = (
32 |         "# sent 1\n"
33 |         "1\tA\t_\tDET\n"
34 |         "2\tcat\t_\tNOUN\n"
35 |         "\n"
36 |         "# sent 2\n"
37 |         "1\tSleeps\t_\tVERB\n"
38 |         "2\tquietly\t_\tADV\n"
39 |     )
40 |     _write(tmp_dataset_root, "comments.conllu.txt", content)
41 |     ds: MovaInstitutePOSDataset = MovaInstitutePOSDataset(
42 |         root=tmp_dataset_root, download=False, file_name="comments.conllu.txt"
43 |     )
44 |     assert len(ds) == 2
45 |     assert ds[0][0] == ["A", "cat"]
46 | 
47 | 
48 | def test_multiword_tokens_ignored(tmp_dataset_root: Path) -> None:
49 |     content = (
50 |         "1\tI\t_\tPRON\n"
51 |         "2-3\tgo+ing\t_\t_\n"  # multiword range
52 |         "2\tam\t_\tAUX\n"
53 |         "3\tgoing\t_\tVERB\n"
54 |         "4\thome\t_\tNOUN\n"
55 |         "\n"
56 |     )
57 |     _write(tmp_dataset_root, "mwt.conllu.txt", content)
58 |     ds: MovaInstitutePOSDataset = MovaInstitutePOSDataset(
59 |         root=tmp_dataset_root, download=False, file_name="mwt.conllu.txt"
60 |     )
61 |     tokens, tags = ds[0]
62 |     assert tokens == ["I", "am", "going", "home"]
63 |     assert tags == ["PRON", "AUX", "VERB", "NOUN"]
64 | 
65 | 
66 | def test_malformed_lines_ignored(tmp_dataset_root: Path) -> None:
67 |     content = "1\tOk\t_\tINTJ\nBADLINE WITHOUT TABS\n2\tthen\t_\tADV\n\n"
68 |     _write(tmp_dataset_root, "bad.conllu.txt", content)
69 |     ds: MovaInstitutePOSDataset = MovaInstitutePOSDataset(
70 |         root=tmp_dataset_root, download=False, file_name="bad.conllu.txt"
71 |     )
72 |     tokens, tags = ds[0]
73 |     assert tokens == ["Ok", "then"]
74 |     assert tags == ["INTJ", "ADV"]
75 | 
76 | 
77 | def test_empty_file_raises_parse_error(tmp_dataset_root: Path) -> None:
78 |     _write(tmp_dataset_root, "empty.conllu.txt", "")
79 |     with pytest.raises(ParseError):
80 |         MovaInstitutePOSDataset(root=tmp_dataset_root, download=False, file_name="empty.conllu.txt")
81 | 
82 | 
83 | def test_label_frequencies(tmp_dataset_root: Path) -> None:
84 |     content = "1\tHello\t_\tINTJ\n2\tworld\t_\tNOUN\n\n1\tworld\t_\tNOUN\n2\tagain\t_\tADV\n\n"
85 |     _write(tmp_dataset_root, "freq.conllu.txt", content)
86 |     ds: MovaInstitutePOSDataset = MovaInstitutePOSDataset(
87 |         root=tmp_dataset_root, download=False, file_name="freq.conllu.txt"
88 |     )
89 |     freqs = ds.label_frequencies()
90 |     assert freqs["NOUN"] == 2
91 |     assert freqs["INTJ"] == 1
92 |     assert freqs["ADV"] == 1
93 |     # unique_labels still consistent
94 |     assert ds.unique_labels == {"INTJ", "NOUN", "ADV"}
95 | 


--------------------------------------------------------------------------------
/test/test_question_answering/test_uasquad_hardening.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | from ua_datasets.question_answering.uasquad_question_answering import (
 7 |     ParseError,
 8 |     UaSquadDataset,
 9 | )
10 | 
11 | 
12 | def _write(root: Path, name: str, obj: object) -> Path:
13 |     p = root / name
14 |     if isinstance(obj, str):
15 |         p.write_text(obj, encoding="utf8")
16 |     else:
17 |         p.write_text(json.dumps(obj), encoding="utf8")
18 |     return p
19 | 
20 | 
21 | @pytest.fixture
22 | def qa_tmp_root(tmp_path: Path) -> Path:
23 |     return tmp_path
24 | 
25 | 
26 | def test_malformed_json_raises(qa_tmp_root: Path) -> None:
27 |     _write(qa_tmp_root, "train.json", "{not-json}")
28 |     with pytest.raises(ParseError):
29 |         UaSquadDataset(root=qa_tmp_root, split="train", download=False)
30 | 
31 | 
32 | def test_empty_data_list_raises(qa_tmp_root: Path) -> None:
33 |     _write(qa_tmp_root, "train.json", {"data": []})
34 |     with pytest.raises(ParseError):
35 |         UaSquadDataset(root=qa_tmp_root, split="train", download=False)
36 | 
37 | 
38 | def test_answer_frequencies_and_unique(qa_tmp_root: Path) -> None:
39 |     obj = {
40 |         "data": [
41 |             {"question": "Q1", "context": "C1", "answer": "A1"},
42 |             {"question": "Q2", "context": "C2", "answer": "A1"},
43 |             {"question": "Q3", "context": "C3", "answer": "A2"},
44 |         ]
45 |     }
46 |     _write(qa_tmp_root, "train.json", obj)
47 |     ds = UaSquadDataset(root=qa_tmp_root, split="train", download=False)
48 |     freqs = ds.answer_frequencies()
49 |     assert freqs == {"A1": 2, "A2": 1}
50 |     assert ds.unique_answers == {"A1", "A2"}
51 | 
52 | 
53 | def test_force_download_skip(monkeypatch: pytest.MonkeyPatch, qa_tmp_root: Path) -> None:
54 |     # Existing file should bypass network when force_download False
55 |     _write(qa_tmp_root, "train.json", {"data": [{"question": "Q", "context": "C", "answer": "A"}]})
56 |     called = {"count": 0}
57 | 
58 |     def fake_urlopen(url: str, timeout: int = 0) -> None:  # pragma: no cover - should not be used
59 |         called["count"] += 1
60 |         raise AssertionError("Should not download when file exists and force_download=False")
61 | 
62 |     monkeypatch.setattr(
63 |         "ua_datasets.question_answering.uasquad_question_answering.urlopen", fake_urlopen
64 |     )
65 |     UaSquadDataset(root=qa_tmp_root, split="train", download=True, force_download=False)
66 |     assert called["count"] == 0
67 | 
68 | 
69 | def test_force_download_replaces(monkeypatch: pytest.MonkeyPatch, qa_tmp_root: Path) -> None:
70 |     # Initial file
71 |     _write(qa_tmp_root, "train.json", {"data": [{"question": "Q", "context": "C", "answer": "A"}]})
72 |     new_payload = {"data": [{"question": "QNEW", "context": "CNEW", "answer": "ANEW"}]}
73 | 
74 |     class FakeResp:
75 |         def __init__(self, data: bytes) -> None:
76 |             self._data = data
77 | 
78 |         def read(self) -> bytes:
79 |             return self._data
80 | 
81 |         def __enter__(self) -> "FakeResp":
82 |             return self
83 | 
84 |         def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
85 |             return None
86 | 
87 |     def fake_urlopen(url: str, timeout: int = 0) -> FakeResp:
88 |         return FakeResp(json.dumps(new_payload).encode("utf8"))
89 | 
90 |     monkeypatch.setattr(
91 |         "ua_datasets.question_answering.uasquad_question_answering.urlopen", fake_urlopen
92 |     )
93 |     ds = UaSquadDataset(root=qa_tmp_root, split="train", download=True, force_download=True)
94 |     assert len(ds) == 1
95 |     ex = ds[0]
96 |     assert ex["question"] == "QNEW"
97 |     if not ex.get("is_impossible"):
98 |         assert ex["answers"]["text"][0] == "ANEW"
99 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | # UA-datasets
  2 | 
  3 | > Unified, lightweight access to Ukrainian NLP benchmark datasets (QA, Text Classification, POS tagging) with automatic download, caching and consistent iteration.
  4 | 
  5 | **UA-datasets** is maintained by FIdo.ai (machine learning research division of the non-profit student organization [FIdo](https://www.facebook.com/fido.naukma/) at the National University of Kyiv-Mohyla Academy) for research purposes.
  6 | 
  7 | ---
  8 | 
  9 | ## Features at a glance
 10 | 
 11 | | Capability | Description |
 12 | |------------|-------------|
 13 | | Unified API | `len(ds)`, indexing, iteration across all datasets |
 14 | | Resilient downloads | Retries, integrity / basic validation, fallback filenames (UA-SQuAD val) |
 15 | | Minimal deps | Core loaders rely only on the standard library |
 16 | | Consistent samples | QA: HF-style dict (`id`, `title`, `context`, `question`, `answers`, `is_impossible`); Classification `(title, text, label, tags?)`; POS `(tokens, tags)` |
 17 | | Frequency helpers | Simple methods for label/answer frequency analysis |
 18 | | Ready for tooling | Works seamlessly with `uv`, `ruff`, `mypy`, `pytest`, `pre-commit` |
 19 | 
 20 | ---
 21 | 
 22 | ## Available Datasets
 23 | 
 24 | | Task | Dataset | Class | Splits | Notes |
 25 | |------|---------|-------|--------|-------|
 26 | | Question Answering | UA-SQuAD | `UaSquadDataset` | `train`, `val` | SQuAD-style JSON; legacy val filename fallbacks |
 27 | | Text Classification | UA-News | `NewsClassificationDataset` | `train`, `test` | CSV (title,text,target[,tags]); optional tag parsing |
 28 | | POS Tagging | Mova Institute POS | `MovaInstitutePOSDataset` | corpus | CoNLL-U like format; yields (tokens, tags) |
 29 | 
 30 | ---
 31 | 
 32 | ## Quick Start
 33 | 
 34 | ```python
 35 | from pathlib import Path
 36 | from ua_datasets.question_answering import UaSquadDataset
 37 | 
 38 | ds = UaSquadDataset(root=Path("./data/ua_squad"), split="train", download=True)
 39 | print(f"Examples: {len(ds)}")
 40 | ex = ds[0]
 41 | print(ex["question"], "->", ex["answers"]["text"])  # list of answers (possibly empty if impossible)
 42 | ```
 43 | 
 44 | Text classification:
 45 | 
 46 | ```python
 47 | from ua_datasets.text_classification import NewsClassificationDataset
 48 | news = NewsClassificationDataset(root=Path("./data/ua_news"), split="train", download=True)
 49 | title, text, label, tags = news[0]
 50 | ```
 51 | 
 52 | POS tagging:
 53 | 
 54 | ```python
 55 | from ua_datasets.token_classification import MovaInstitutePOSDataset
 56 | pos = MovaInstitutePOSDataset(root=Path("./data/mova_pos"), download=True)
 57 | tokens, tags = pos[0]
 58 | ```
 59 | 
 60 | ---
 61 | 
 62 | ## Installation
 63 | 
 64 | Choose one method:
 65 | 
 66 | ### Using `uv` (recommended)
 67 | 
 68 | ```bash
 69 | uv add ua-datasets
 70 | ```
 71 | 
 72 | ### Via pip
 73 | 
 74 | ```bash
 75 | pip install ua_datasets
 76 | ```
 77 | 
 78 | ### From source (editable)
 79 | 
 80 | ```bash
 81 | git clone https://github.com/fido-ai/ua-datasets.git
 82 | cd ua-datasets
 83 | pip install -e .
 84 | ```
 85 | 
 86 | ---
 87 | 
 88 | ## Benchmarks & Acknowledgements
 89 | 
 90 | - **Benchmarks:** See [Benchmarks](further_details/benchmarks.md) for leaderboard scaffolding.
 91 | - **Acknowledgements:** See [Acknowledgements](further_details/acknowledgements.md) for dataset contributors.
 92 | 
 93 | ---
 94 | 
 95 | ## Citation
 96 | 
 97 | If you found this library useful in academic research, please cite:
 98 | 
 99 | ```bibtex
100 | @software{ua_datasets_2021,
101 |   author = {Ivanyuk-Skulskiy, Bogdan and Zaliznyi, Anton and Reshetar, Oleksand and Protsyk, Oleksiy and Romanchuk, Bohdan and Shpihanovych, Vladyslav},
102 |   month = oct,
103 |   title = {ua_datasets: a collection of Ukrainian language datasets},
104 |   url = {https://github.com/fido-ai/ua-datasets},
105 |   version = {1.0.0},
106 |   year = {2021}
107 | }
108 | ```
109 | 
110 | ⭐ Consider starring the project on [GitHub](https://github.com/fido-ai/ua-datasets) to support visibility.
111 | 


--------------------------------------------------------------------------------
/docs/_static/custom_css.css:
--------------------------------------------------------------------------------
  1 | /* Fix /page#foo going to the top of the viewport and being hidden by the navbar */
  2 | html {
  3 |     scroll-padding-top: 50px;
  4 | }
  5 | 
  6 | /* Fit the Twitter handle alongside the GitHub one in the top right. */
  7 | 
  8 | div.md-header__source {
  9 |     width: revert;
 10 |     max-width: revert;
 11 | }
 12 | 
 13 | a.md-source {
 14 |     display: inline-block;
 15 | }
 16 | 
 17 | .md-source__repository {
 18 |     max-width: 100%;
 19 | }
 20 | 
 21 | /* Emphasise sections of nav on left hand side */
 22 | 
 23 | nav.md-nav {
 24 |   padding-left: 5px;
 25 | }
 26 | 
 27 | nav.md-nav--secondary {
 28 |     border-left: revert !important;
 29 | }
 30 | 
 31 | .md-nav__title {
 32 |   font-size: 0.9rem;
 33 | }
 34 | 
 35 | .md-nav__item--section > .md-nav__link {
 36 |   font-size: 0.9rem;
 37 | }
 38 | 
 39 | /* Indent autogenerated documentation */
 40 | 
 41 | div.doc-contents {
 42 |   padding-left: 25px;
 43 |   border-left: 4px solid rgba(230, 230, 230);
 44 | }
 45 | 
 46 | /* Increase visibility of splitters "---" */
 47 | 
 48 | [data-md-color-scheme="default"] .md-typeset hr {
 49 |     border-bottom-color: rgb(0, 0, 0);
 50 |     border-bottom-width: 1pt;
 51 | }
 52 | 
 53 | [data-md-color-scheme="slate"] .md-typeset hr {
 54 |     border-bottom-color: rgb(230, 230, 230);
 55 | }
 56 | 
 57 | /* More space at the bottom of the page */
 58 | 
 59 | .md-main__inner {
 60 |   margin-bottom: 1.5rem;
 61 | }
 62 | 
 63 | /* Remove prev/next footer buttons */
 64 | 
 65 | .md-footer__inner {
 66 |     display: none;
 67 | }
 68 | 
 69 | /* Change font sizes */
 70 | 
 71 | html {
 72 |     /* Decrease font size for overall webpage
 73 |        Down from 137.5% which is the Material default */
 74 |     font-size: 110%;
 75 | }
 76 | 
 77 | .md-typeset .admonition {
 78 |     /* Increase font size in admonitions */
 79 |     font-size: 100% !important;
 80 | }
 81 | 
 82 | .md-typeset details {
 83 |     /* Increase font size in details */
 84 |     font-size: 100% !important;
 85 | }
 86 | 
 87 | .md-typeset h1 {
 88 |     font-size: 1.6rem;
 89 | }
 90 | 
 91 | .md-typeset h2 {
 92 |     font-size: 1.5rem;
 93 | }
 94 | 
 95 | .md-typeset h3 {
 96 |     font-size: 1.3rem;
 97 | }
 98 | 
 99 | .md-typeset h4 {
100 |     font-size: 1.1rem;
101 | }
102 | 
103 | .md-typeset h5 {
104 |     font-size: 0.9rem;
105 | }
106 | 
107 | .md-typeset h6 {
108 |     font-size: 0.8rem;
109 | }
110 | 
111 | /* Bugfix: remove the superfluous parts generated when doing:
112 | 
113 | ??? Blah
114 | 
115 |     ::: library.something
116 | */
117 | 
118 | .md-typeset details .mkdocstrings > h4 {
119 |     display: none;
120 | }
121 | 
122 | .md-typeset details .mkdocstrings > h5 {
123 |     display: none;
124 | }
125 | 
126 | /* Change default colours for <a> tags */
127 | 
128 | [data-md-color-scheme="default"] {
129 |     --md-typeset-a-color: rgb(0, 189, 164) !important;
130 | }
131 | [data-md-color-scheme="slate"] {
132 |     --md-typeset-a-color: rgb(0, 189, 164) !important;
133 | }
134 | 
135 | /* Highlight functions, classes etc. type signatures. Really helps to make clear where
136 |    one item ends and another begins. */
137 | 
138 | [data-md-color-scheme="default"] {
139 |     --doc-heading-color: #DDD;
140 |     --doc-heading-border-color: #CCC;
141 |     --doc-heading-color-alt: #F0F0F0;
142 | }
143 | [data-md-color-scheme="slate"] {
144 |     --doc-heading-color: rgb(25,25,33);
145 |     --doc-heading-border-color: rgb(25,25,33);
146 |     --doc-heading-color-alt: rgb(33,33,44);
147 |     --md-code-bg-color: rgb(38,38,50);
148 | }
149 | 
150 | h4.doc-heading {
151 |     /* NOT var(--md-code-bg-color) as that's not visually distinct from other code blocks.*/
152 |     background-color: var(--doc-heading-color);
153 |     border: solid var(--doc-heading-border-color);
154 |     border-width: 1.5pt;
155 |     border-radius: 2pt;
156 |     padding: 0pt 5pt 2pt 5pt;
157 | }
158 | h5.doc-heading, h6.heading {
159 |     background-color: var(--doc-heading-color-alt);
160 |     border-radius: 2pt;
161 |     padding: 0pt 5pt 2pt 5pt;
162 | }
163 | 


--------------------------------------------------------------------------------
/test/test_text_classification/test_news_dataset_hardening.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pytest
  4 | 
  5 | from ua_datasets.text_classification.news_classification import (
  6 |     NewsClassificationDataset,
  7 |     ParseError,
  8 | )
  9 | 
 10 | 
 11 | def _write(root: Path, name: str, text: str) -> Path:
 12 |     p = root / name
 13 |     p.write_text(text, encoding="utf8")
 14 |     return p
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def tmp_news_root(tmp_path: Path) -> Path:
 19 |     return tmp_path
 20 | 
 21 | 
 22 | def test_empty_file_raises_parse_error(tmp_news_root: Path) -> None:
 23 |     _write(tmp_news_root, "train.csv", "")
 24 |     with pytest.raises(ParseError):
 25 |         NewsClassificationDataset(root=tmp_news_root, split="train", download=False)
 26 | 
 27 | 
 28 | def test_missing_required_column(tmp_news_root: Path) -> None:
 29 |     # Missing target column
 30 |     content = "title,text,tags\nA,B,tag1|tag2\n"
 31 |     _write(tmp_news_root, "train.csv", content)
 32 |     with pytest.raises(ParseError):
 33 |         NewsClassificationDataset(root=tmp_news_root, split="train", download=False)
 34 | 
 35 | 
 36 | def test_basic_loading_and_label_cache(tmp_news_root: Path) -> None:
 37 |     content = "title,text,tags,target\nT1,Body one,tag1|tag2,CLASS1\nT2,Body two,,CLASS2\n"
 38 |     _write(tmp_news_root, "train.csv", content)
 39 |     ds = NewsClassificationDataset(root=tmp_news_root, split="train", download=False)
 40 |     assert len(ds) == 2
 41 |     assert ds.labels == {"CLASS1", "CLASS2"}
 42 |     freqs = ds.label_frequencies()
 43 |     assert freqs == {"CLASS1": 1, "CLASS2": 1}
 44 | 
 45 | 
 46 | def test_tag_parsing_return_tags(tmp_news_root: Path) -> None:
 47 |     content = "title,text,tags,target\nT1,Body one,tag1|tag2,CLASS1\nT2,Body two,tag3,CLASS1\n"
 48 |     _write(tmp_news_root, "train.csv", content)
 49 |     ds = NewsClassificationDataset(
 50 |         root=tmp_news_root, split="train", download=False, return_tags=True
 51 |     )
 52 |     title, _text, target, tags = ds[0]
 53 |     assert title == "T1"
 54 |     assert target == "CLASS1"
 55 |     assert tags == ["tag1", "tag2"]
 56 |     # second sample
 57 |     _, _, _, tags2 = ds[1]
 58 |     assert tags2 == ["tag3"]
 59 | 
 60 | 
 61 | def test_no_trailing_newline(tmp_news_root: Path) -> None:
 62 |     # File ends without newline, should still parse second row
 63 |     content = "title,text,tags,target\nT1,Body one,,A\nT2,Body two,,B"  # no trailing newline
 64 |     _write(tmp_news_root, "train.csv", content)
 65 |     ds = NewsClassificationDataset(root=tmp_news_root, split="train", download=False)
 66 |     assert len(ds) == 2
 67 | 
 68 | 
 69 | def test_force_download_skips_when_disabled(
 70 |     monkeypatch: pytest.MonkeyPatch, tmp_news_root: Path
 71 | ) -> None:
 72 |     # Create existing file then ensure download is *not* called when force_download False
 73 |     content = "title,text,tags,target\nT1,Body one,,A\n"
 74 |     _write(tmp_news_root, "train.csv", content)
 75 |     called = {"count": 0}
 76 | 
 77 |     def fake_urlopen(url: str, timeout: int = 0) -> None:
 78 |         called["count"] += 1
 79 |         raise AssertionError("Should not be called when file exists and force_download=False")
 80 | 
 81 |     monkeypatch.setattr("ua_datasets.text_classification.news_classification.urlopen", fake_urlopen)
 82 |     NewsClassificationDataset(root=tmp_news_root, split="train", download=False)
 83 |     assert called["count"] == 0
 84 | 
 85 | 
 86 | def test_force_download_triggers(monkeypatch: pytest.MonkeyPatch, tmp_news_root: Path) -> None:
 87 |     content = "title,text,tags,target\nT1,Body one,,A\n"
 88 |     _write(tmp_news_root, "train.csv", content)
 89 | 
 90 |     # Replace content via forced download
 91 |     new_csv = "title,text,tags,target\nN1,Body new,,B\n"
 92 | 
 93 |     class FakeResponse:
 94 |         def __init__(self, data: bytes) -> None:
 95 |             self._data = data
 96 | 
 97 |         def read(self) -> bytes:
 98 |             return self._data
 99 | 
100 |         def __enter__(self) -> "FakeResponse":
101 |             return self
102 | 
103 |         def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
104 |             return None
105 | 
106 |     def fake_urlopen(url: str, timeout: int = 0) -> FakeResponse:
107 |         return FakeResponse(new_csv.encode("utf8"))
108 | 
109 |     monkeypatch.setattr("ua_datasets.text_classification.news_classification.urlopen", fake_urlopen)
110 |     ds = NewsClassificationDataset(
111 |         root=tmp_news_root, split="train", download=True, force_download=True
112 |     )
113 |     assert len(ds) == 1
114 |     title, *_ = ds[0]
115 |     assert title == "N1"
116 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | theme:
  2 |     name: material
  3 |     features:
  4 |         - navigation.sections  # Sections are included in the navigation on the left.
  5 |         - toc.integrate  # Table of contents is integrated on the left; does not appear separately on the right.
  6 |         - header.autohide  # header disappears as you scroll
  7 |     palette:
  8 |         # Light mode / dark mode
  9 |         # We deliberately don't automatically use `media` to check a user's preferences. We default to light mode as
 10 |         # (a) it looks more professional, and (b) is more obvious about the fact that it offers a (dark mode) toggle.
 11 |         - scheme: default
 12 |           primary: white
 13 |           accent: amber
 14 |           toggle:
 15 |              icon: material/weather-night
 16 |              name: Switch to dark mode
 17 |         - scheme: slate
 18 |           primary: black
 19 |           accent: amber
 20 |           toggle:
 21 |              icon: material/weather-sunny
 22 |              name: Switch to light mode
 23 |     icon:
 24 |         repo: fontawesome/brands/github  # GitHub logo in top right
 25 |         logo: "material/math-integral-box"  # Diffrax logo in top left
 26 |     favicon: "_static/favicon.png"
 27 |     custom_dir: "docs/_overrides"  # Overriding part of the HTML
 28 | 
 29 |     # These additions are my own custom ones, having overridden a partial.
 30 |     twitter_name: "@bogdan_ivanyuk"
 31 |     twitter_url: "https://twitter.com/bogdan_ivanyuk"
 32 | 
 33 | site_name: ua-datasets
 34 | site_description: The documentation for the ua-datasets software library.
 35 | site_author: Bogdan Ivaniuk-Skulskyi
 36 | 
 37 | repo_url: https://github.com/fido-ai/ua-datasets
 38 | repo_name: fido-ai/ua-datasets
 39 | edit_uri: ""  # No edit button, as some of our pages are in /docs and some in /examples via symlink, so it's impossible for them all to be accurate
 40 | 
 41 | strict: true  # Don't allow warnings during the build process
 42 | 
 43 | extra_javascript:
 44 |     # The below three make MathJax work, see https://squidfunk.github.io/mkdocs-material/reference/mathjax/
 45 |     - _static/mathjax.js
 46 |     - https://polyfill.io/v3/polyfill.min.js?features=es6
 47 |     - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
 48 | 
 49 | extra_css:
 50 |     - _static/custom_css.css
 51 | 
 52 | markdown_extensions:
 53 |     - pymdownx.arithmatex:  # Render LaTeX via MathJax
 54 |         generic: true
 55 |     - pymdownx.superfences  # Seems to enable syntax highlighting when used with the Material theme.
 56 |     - pymdownx.details  # Allowing hidden expandable regions denoted by ???
 57 |     - pymdownx.snippets:  # Include one Markdown file into another
 58 |         base_path: docs
 59 |     - admonition
 60 |     - toc:
 61 |         permalink: "¤"  # Adds a clickable permalink to each section heading
 62 |         toc_depth: 4  # Prevents h5, h6 (i.e. methods) from showing up in the TOC.
 63 | 
 64 | plugins:
 65 |     - search  # default search plugin; needs manually re-enabling when using any other plugins
 66 |     - autorefs  # Cross-links to headings
 67 |     - include_exclude_files:
 68 |         include:
 69 |         exclude:
 70 |             - "_overrides"
 71 |             - "_static/README.md"
 72 |     - mknotebooks  # Jupyter notebooks
 73 |     - mkdocstrings:
 74 |         handlers:
 75 |             python:
 76 |                 setup_commands:
 77 |                     - import pytkdocs_tweaks
 78 |                     - pytkdocs_tweaks.main()
 79 | 
 80 |                 selection:
 81 |                     inherited_members: true  # Allow looking up inherited methods
 82 |                 rendering:
 83 |                     show_root_heading: true  # actually display anything at all...
 84 |                     show_root_full_path: true  # display "diffrax.asdf" not just "asdf"
 85 |                     show_if_no_docstring: true
 86 |                     show_signature_annotations: true
 87 |                     show_source: false  # don't include source code
 88 |                     members_order: source  # order methods according to their order of definition in the source code, not alphabetical order
 89 |                     heading_level: 4  # Makes everything top-level be <h4>. Child entries will be <h5> etc., but because of toc_depth, above, (deliberately) won't appear in the TOC.
 90 | 
 91 | nav:
 92 |     - 'index.md'
 93 |     - 'citation.md'
 94 |     - Examples:
 95 |         - UA-SQuAD: 'examples/ua_squad.md'
 96 |         - Mova Institute POS: 'examples/mova_pos.md'
 97 |         - UA News classification: 'examples/ua_news.md'
 98 |     - Further details:
 99 |         - 'further_details/acknowledgements.md'
100 |         - 'further_details/benchmarks.md'
101 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <p align="center">
  3 |   <img src="https://github.com/fido-ai/ua-datasets/blob/main/imgs/NaUKMA.png" width="350" title="hover text" alt="NaUKMA FIdo Logo">
  4 | </p>
  5 | 
  6 | <h1 align="center">
  7 |     ua_datasets
  8 | </h1>
  9 | 
 10 | [![PyPI version](https://img.shields.io/pypi/v/ua-datasets.svg)](https://pypi.org/project/ua-datasets/)
 11 | [![Python versions](https://img.shields.io/pypi/pyversions/ua-datasets.svg)](https://pypi.org/project/ua-datasets/)
 12 | [![License](https://img.shields.io/pypi/l/ua-datasets.svg)](https://github.com/fido-ai/ua-datasets/blob/main/LICENSE)
 13 | [![Downloads](https://static.pepy.tech/badge/ua-datasets)](https://pepy.tech/project/ua-datasets)
 14 | 
 15 | [![Build CI](https://github.com/fido-ai/ua-datasets/actions/workflows/ci.yml/badge.svg)](https://github.com/fido-ai/ua-datasets/actions/workflows/ci.yml)
 16 | [![Code size](https://img.shields.io/github/languages/code-size/fido-ai/ua-datasets)](https://github.com/fido-ai/ua-datasets)
 17 | [![Code style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
 18 | [![Type checking: mypy](https://img.shields.io/badge/type%20checking-mypy-blue.svg)](http://mypy-lang.org/)
 19 | 
 20 | [**UA-datasets**](https://fido-ai.github.io/ua-datasets/) provides ready-to-use Ukrainian NLP benchmark datasets with a **single, lightweight Python API**.
 21 | 
 22 | > Fast access to Question Answering, News Classification, and POS Tagging corpora — with automatic download, caching, and consistent iteration.
 23 | 
 24 | ### Why use this library?
 25 | 
 26 | - **Unified API**: All datasets expose `len(ds)`, indexing, iteration, and simple frequency helpers.
 27 | - **Robust downloads**: Automatic retries, integrity guards, and filename fallbacks for legacy splits.
 28 | - **Zero heavy deps**: Pure Python + standard library (core loaders) for quick startup.
 29 | - **Repro friendly**: Validation split for UA-SQuAD; classification CSV parsing with resilience to minor format drift.
 30 | - **Tooling ready**: Works seamlessly with ruff, mypy, pytest, and uv-based workflows.
 31 | 
 32 | 
 33 | _Maintained by the FIdo.ai research group (National University of Kyiv-Mohyla Academy)._
 34 | 
 35 | ## Minimal Example
 36 | 
 37 | ```python
 38 | # Assumes `uv` workspace already synced with `uv sync` and project installed.
 39 | 
 40 | from pathlib import Path
 41 | from ua_datasets.question_answering import UaSquadDataset
 42 | from ua_datasets.text_classification import NewsClassificationDataset
 43 | from ua_datasets.token_classification import MovaInstitutePOSDataset
 44 | 
 45 | # Question Answering (first HF-style example dict)
 46 | qa = UaSquadDataset(root=Path("./data/ua_squad"), split="train", download=True)
 47 | print("QA examples:", len(qa))
 48 | example = qa[0]
 49 | print(example.keys())  # id, title, context, question, answers, is_impossible
 50 | print(example["question"], "->", example["answers"]["text"])  # list of accepted answers
 51 | 
 52 | # News Classification
 53 | news = NewsClassificationDataset(root=Path("./data/ua_news"), split="train", download=True)
 54 | title, text, target, tags = news[0]
 55 | print("Label count:", len(news.labels), "First label:", target)
 56 | 
 57 | # Part-of-Speech Tagging
 58 | pos = MovaInstitutePOSDataset(root=Path("./data/mova_pos"), download=True)
 59 | tokens, tags = pos[0]
 60 | print(tokens[:8], tags[:8])
 61 | ```
 62 | 
 63 | For development commands see the Installation section below.
 64 | 
 65 | ## Installation
 66 | 
 67 | Choose one of the following methods.
 68 | 
 69 | ### 1. Using uv (recommended)
 70 | 
 71 | Add to an existing project:
 72 | 
 73 | ```bash
 74 | uv add ua-datasets
 75 | ```
 76 | 
 77 | 
 78 | <!-- markdownlint-disable MD033 -->
 79 | <details>
 80 | <summary><strong>2. Using pip (PyPI)</strong></summary>
 81 | 
 82 | ```bash
 83 | # install
 84 | pip install ua_datasets
 85 | # upgrade
 86 | pip install -U ua_datasets
 87 | ```
 88 | 
 89 |  </details>
 90 | 
 91 | <details>
 92 | <summary><strong>3. From source (editable install)</strong></summary>
 93 | 
 94 | ```bash
 95 | git clone https://github.com/fido-ai/ua-datasets.git
 96 | cd ua-datasets
 97 | pip install -e .[dev]  # if you later define optional dev extras
 98 | ```
 99 | 
100 | Or with uv (editable semantics via local path):
101 | 
102 | ```bash
103 | git clone https://github.com/fido-ai/ua-datasets.git
104 | cd ua-datasets
105 | uv sync --dev
106 | ```
107 | 
108 | </details>
109 | <!-- markdownlint-enable MD033 -->
110 | 
111 | ## Latest Updates
112 | 
113 | | Date | Highlights |
114 | |------|------------|
115 | | 25-10-2025 | Added validation split for UA-SQuAD and updated package code. |
116 | | 05-07-2022 | Added HuggingFace API for UA-SQuAD (Q&A) and UA-News (Text Classification). |
117 | 
118 | 
119 | ## Available Datasets
120 | 
121 | | Task | Dataset | Import Class | Splits | Notes |
122 | |------|---------|--------------|--------|-------|
123 | | Question Answering | UA-SQuAD | `UaSquadDataset` | `train`, `val` | SQuAD v2-style examples (`is_impossible`, multi answers); iteration yields dicts |
124 | | Text Classification | UA-News | `NewsClassificationDataset` | `train`, `test` | CSV (title, text, target[, tags]); optional tag parsing |
125 | | Token Classification | Mova Institute POS | `MovaInstitutePOSDataset` | (single corpus) | CoNLL-U like POS tagging; yields (tokens, tags) per sentence |
126 | 
127 | ## Contribution
128 | 
129 | In case you are willing to contribute (update any part of the library, add your dataset) do not hesitate to connect through [GitHub Issue](https://github.com/fido-ai/ua-datasets/issues/new/choose). Thanks in advance for your contribution!
130 | 
131 | ## Citation
132 | 
133 | ```bibtex
134 | @software{ua_datasets_2021,
135 |   author = {Ivanyuk-Skulskiy, Bogdan and Zaliznyi, Anton and Reshetar, Oleksand and Protsyk, Oleksiy and Romanchuk, Bohdan and Shpihanovych, Vladyslav},
136 |   month = oct,
137 |   title = {ua_datasets: a collection of Ukrainian language datasets},
138 |   url = {https://github.com/fido-ai/ua-datasets},
139 |   version = {1.0.0},
140 |   year = {2021}
141 | }
142 | ```
143 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | name: release
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 | 
  8 | jobs:
  9 |   build:
 10 |     strategy:
 11 |       matrix:
 12 |         python-version: [3.11]
 13 |         os: ['ubuntu-latest']
 14 |     runs-on: ${{ matrix.os }}
 15 |     steps:
 16 |         - name: Checkout code
 17 |           uses: actions/checkout@v4
 18 | 
 19 |         - name: Set up Python ${{ matrix.python-version }}
 20 |           uses: actions/setup-python@v5
 21 |           with:
 22 |               python-version: ${{ matrix.python-version }}
 23 | 
 24 |         - name: Setup
 25 |           shell: bash
 26 |           run: |
 27 |               python -m pip install --upgrade pip
 28 |               python -m pip install build
 29 |               python -m build
 30 |         - name: Get versions
 31 |           id: get-versions
 32 |           shell: bash
 33 |           run: |
 34 |               python -c "
 35 |               import subprocess
 36 |               import tomllib
 37 |               vparse = lambda x: tuple(map(int, x.split('.')))
 38 |               with open('pyproject.toml', 'rb') as f:
 39 |                   data = tomllib.load(f)
 40 |               name = data['project']['name']
 41 |               checkout_version = data['project']['version']
 42 |               pypi_version = subprocess.run(f'python -m pip index versions {name}',
 43 |                                             shell=True, capture_output=True).stdout
 44 |               pypi_version = pypi_version.split(b'\n', 1)[0].split(b' ')[1][1:-1].decode('utf-8')
 45 |               new_version = str(vparse(checkout_version) > vparse(pypi_version)).lower()
 46 |               subprocess.run(f'echo name={name} >> $GITHUB_OUTPUT', shell=True)
 47 |               subprocess.run(f'echo tag=v{checkout_version} >> $GITHUB_OUTPUT', shell=True)
 48 |               subprocess.run(f'echo new-version={new_version} >> $GITHUB_OUTPUT', shell=True)
 49 |               print(f'Got checkout_version={vparse(checkout_version)!r}')
 50 |               print(f'Got pypi_version={vparse(pypi_version)!r}')
 51 |               print(f'Setting name={name}')
 52 |               print(f'Setting tag=v{checkout_version}')
 53 |               print(f'Setting new-version={new_version}')
 54 |               "
 55 |         - name: Test sdist
 56 |           id: test-sdist
 57 |           if: steps.get-versions.outputs.new-version == 'true'
 58 |           shell: bash
 59 |           run: |
 60 |               python -m pip install dist/*.tar.gz
 61 |               cd $(mktemp -d)
 62 |               set +e
 63 |               bash -c "
 64 |                 python -m pip install pytest
 65 |                 cp -r ${{ github.workspace }}/test ./test
 66 |                 pytest  --disable-warnings
 67 |               "
 68 |               if [ "$?" -eq 0 ]
 69 |               then
 70 |                   echo result=true >> $GITHUB_OUTPUT
 71 |               else
 72 |                   echo result=false >> $GITHUB_OUTPUT
 73 |               fi
 74 |               set -e
 75 |               python -m pip uninstall -y -r <(pip freeze)
 76 |               cd ${{ github.workspace }}
 77 |         - name: Test bdist_wheel
 78 |           id: test-bdist-wheel
 79 |           if: steps.get-versions.outputs.new-version == 'true'
 80 |           shell: bash
 81 |           run: |
 82 |               python -m pip install dist/*.whl
 83 |               cd $(mktemp -d)
 84 |               set +e
 85 |               bash -c "
 86 |                 python -m pip install pytest
 87 |                 cp -r ${{ github.workspace }}/test ./test
 88 |                 pytest  --disable-warnings
 89 |               "
 90 |               if [ "$?" -eq 0 ]
 91 |               then
 92 |                   echo result=true >> $GITHUB_OUTPUT
 93 |               else
 94 |                   echo result=false >> $GITHUB_OUTPUT
 95 |               fi
 96 |               set -e
 97 |               python -m pip uninstall -y -r <(pip freeze)
 98 |               cd ${{ github.workspace }}
 99 |         - name: Logging
100 |           shell: bash
101 |           run: |
102 |               echo new-version=${{ steps.get-versions.outputs.new-version }}
103 |               echo sdist-result=${{ steps.test-sdist.outputs.result }}
104 |               echo bdist-result=${{ steps.test-bdist-wheel.outputs.result }}
105 |         - name: Tag
106 |           env:
107 |             github_user: KyloRen1
108 |             github_token: ${{ github.token }}
109 |           if: (steps.get-versions.outputs.new-version == 'true') && (steps.test-sdist.outputs.result == 'true') && (steps.test-bdist-wheel.outputs.result == 'true')
110 |           shell: bash
111 |           run: |
112 |               git config --global user.email "noreply@example.com"
113 |               git config --global user.name "Action: Update Python project"
114 |               git tag "${{ steps.get-versions.outputs.tag }}" -m ""
115 |               git push https://$github_user:$github_token@github.com/${{ github.repository }} "${{ steps.get-versions.outputs.tag }}"
116 |         - name: GitHub release
117 |           if: (steps.get-versions.outputs.new-version == 'true') && (steps.test-sdist.outputs.result == 'true') && (steps.test-bdist-wheel.outputs.result == 'true')
118 |           uses: softprops/action-gh-release@v1
119 |           with:
120 |               name: "${{ steps.get-versions.outputs.name }} ${{ steps.get-versions.outputs.tag }}"
121 |               body: "Autogenerated release notes as follows:"
122 |               tag_name: "${{ steps.get-versions.outputs.tag }}"
123 |               token: ${{ github.token }}
124 |               generate_release_notes: true
125 | 
126 |         - name: Push to PyPI
127 |           if: (steps.get-versions.outputs.new-version == 'true') && (steps.test-sdist.outputs.result == 'true') && (steps.test-bdist-wheel.outputs.result == 'true')
128 |           uses: pypa/gh-action-pypi-publish@release/v1
129 |           with:
130 |               password: ${{ secrets.pypi_token }}
131 | 
132 |         - name: Fail
133 |           if: (steps.get-versions.outputs.new-version == 'true') && ((steps.test-sdist.outputs.result != 'true') || (steps.test-bdist-wheel.outputs.result != 'true'))
134 |           shell: bash
135 |           run: exit 1
136 | 


--------------------------------------------------------------------------------
/ua_datasets/utils.py:
--------------------------------------------------------------------------------
  1 | """Shared internal utilities (network + atomic file helpers).
  2 | 
  3 | This consolidates retrying download logic and atomic write operations used by
  4 | multiple dataset loaders.
  5 | 
  6 | """
  7 | 
  8 | from __future__ import annotations
  9 | 
 10 | from hashlib import sha256
 11 | from pathlib import Path
 12 | from time import sleep
 13 | from typing import Any, Callable, Optional
 14 | from urllib.error import HTTPError, URLError
 15 | from urllib.request import urlopen
 16 | 
 17 | __all__ = [
 18 |     "DownloadFailure",
 19 |     "atomic_write_text",
 20 |     "download_text_with_retries",
 21 | ]
 22 | 
 23 | 
 24 | class DownloadFailure(RuntimeError):
 25 |     """Raised when a download ultimately fails after retries."""
 26 | 
 27 | 
 28 | def download_text_with_retries(
 29 |     url: str,
 30 |     *,
 31 |     timeout: int = 15,
 32 |     max_retries: int = 3,
 33 |     expected_sha256: str | None = None,
 34 |     backoff_factor: float = 0.5,
 35 |     validate: Optional[Callable[[str], bool]] = None,
 36 |     opener: Callable[..., Any] = urlopen,
 37 |     show_progress: bool = False,
 38 |     chunk_size: int = 8192,
 39 | ) -> str:
 40 |     """Download URL returning decoded UTF-8 text with retries & optional integrity.
 41 | 
 42 |     Enhanced with an optional streaming progress bar (stdout) using only the
 43 |     standard library to preserve the project's minimal dependency footprint.
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     url : str
 48 |         Resource to fetch (HTTP/HTTPS).
 49 |     timeout : int
 50 |         Per-attempt timeout (seconds).
 51 |     max_retries : int
 52 |         Maximum number of attempts before failing.
 53 |     expected_sha256 : str | None
 54 |         If provided, the hex digest must match the downloaded bytes.
 55 |     backoff_factor : float
 56 |         Linear backoff factor (sleep = factor * attempt_number).
 57 |     validate : Callable[[str], bool] | None
 58 |         Optional predicate applied to decoded text; must return True for success.
 59 |     opener : Callable[..., Any]
 60 |         Function used to open the URL (injected for test monkeypatching).
 61 |     show_progress : bool
 62 |         If True, prints a simple ASCII progress indicator while streaming.
 63 |     chunk_size : int
 64 |         Byte size for streaming chunks when show_progress is enabled.
 65 |     """
 66 |     attempt = 0
 67 |     last_exc: Exception | None = None
 68 |     while attempt < max_retries:
 69 |         attempt += 1
 70 |         try:
 71 |             # Use provided opener (enables test monkeypatching at call sites)
 72 |             with opener(url, timeout=timeout) as resp:  # nosec - caller controls domain
 73 |                 if show_progress:
 74 |                     # Attempt to read content length for percentage; fallback to 0 (unknown)
 75 |                     try:
 76 |                         total_size = int(getattr(resp, "headers", {}).get("Content-Length", "0"))
 77 |                     except Exception:
 78 |                         total_size = 0
 79 |                     downloaded = 0
 80 |                     buf = bytearray()
 81 |                     while True:
 82 |                         # Some mocked/monkeypatched responses (in tests) provide a
 83 |                         # read() method that does NOT accept a size argument OR return
 84 |                         # the full payload on every call (no internal cursor). We:
 85 |                         #   1. Attempt sized reads
 86 |                         #   2. Fallback to a single full read if TypeError is raised
 87 |                         #   3. Break immediately after a fallback full read to avoid
 88 |                         #      an infinite loop continually re-appending identical bytes.
 89 |                         try:
 90 |                             chunk = resp.read(chunk_size)
 91 |                             fallback_full_read = False
 92 |                         except TypeError:  # signature read() -> bytes (no size param)
 93 |                             chunk = resp.read()
 94 |                             fallback_full_read = True
 95 |                         if not chunk:
 96 |                             break
 97 |                         buf.extend(chunk)
 98 |                         downloaded += len(chunk)
 99 |                         if total_size > 0:
100 |                             pct = downloaded / total_size * 100
101 |                             bar_width = 30
102 |                             filled = int(bar_width * downloaded / total_size)
103 |                             bar = "#" * filled + "-" * (bar_width - filled)
104 |                             print(
105 |                                 f"\rDownloading {url} [{bar}] {pct:5.1f}% ({downloaded}/{total_size} bytes)",
106 |                                 end="",
107 |                                 flush=True,
108 |                             )
109 |                         else:
110 |                             print(f"\rDownloading {url} {downloaded} bytes", end="", flush=True)
111 |                         if fallback_full_read:
112 |                             # Prevent infinite loop when mock returns whole content each call
113 |                             break
114 |                     data = bytes(buf)
115 |                     # Ensure newline after completion for clean subsequent output
116 |                     print()
117 |                 else:
118 |                     data = resp.read()
119 |             if expected_sha256 is not None:
120 |                 digest = sha256(data).hexdigest()
121 |                 if digest.lower() != expected_sha256.lower():
122 |                     raise DownloadFailure(
123 |                         f"SHA256 mismatch for {url}: expected {expected_sha256} got {digest}"
124 |                     )
125 |             text = data.decode("utf8")
126 |             if not text.strip():
127 |                 raise DownloadFailure("Downloaded content empty/whitespace.")
128 |             if validate and not validate(text):
129 |                 raise DownloadFailure("Validation predicate rejected content.")
130 |             return text
131 |         except (HTTPError, URLError, TimeoutError, DownloadFailure) as exc:
132 |             last_exc = exc
133 |             if attempt < max_retries:
134 |                 sleep(backoff_factor * attempt)
135 |             else:
136 |                 break
137 |         except UnicodeDecodeError as exc:
138 |             last_exc = exc
139 |             break
140 |         except Exception as exc:  # unknown fatal
141 |             last_exc = exc
142 |             break
143 |     raise DownloadFailure(f"Failed to download {url} after {max_retries} attempts: {last_exc}")
144 | 
145 | 
146 | def atomic_write_text(path: Path, text: str, *, encoding: str = "utf8") -> None:
147 |     """Write text atomically by first writing to a temporary sibling file.
148 | 
149 |     Ensures readers do not observe a partially written file.
150 |     """
151 |     tmp = path.with_suffix(path.suffix + ".tmp")
152 |     tmp.write_text(text, encoding=encoding)
153 |     tmp.replace(path)
154 | 


--------------------------------------------------------------------------------
/ua_datasets/token_classification/part_of_speech.py:
--------------------------------------------------------------------------------
  1 | """Part-of-speech tagging dataset loader for the Mova Institute corpus.
  2 | 
  3 | This module provides a light-weight, dependency-free interface to download and
  4 | parse a (CoNLL-U like) POS tagging dataset with a focus on robustness and
  5 | clarity.
  6 | 
  7 | Example
  8 | -------
  9 | >>> ds = MovaInstitutePOSDataset(root=Path('./data'), download=True)
 10 | >>> tokens, tags = ds[0]
 11 | >>> len(ds), len(tokens) == len(tags)
 12 | """
 13 | 
 14 | from collections.abc import Sequence as ABCSequence
 15 | from dataclasses import dataclass, field
 16 | from pathlib import Path
 17 | from typing import Dict, Generic, Iterator, List, Set, Tuple, TypeVar
 18 | 
 19 | from ua_datasets.utils import DownloadFailure, atomic_write_text, download_text_with_retries
 20 | 
 21 | __all__ = [
 22 |     "DownloadError",
 23 |     "MovaInstitutePOSDataset",
 24 |     "ParseError",
 25 | ]
 26 | 
 27 | Sentence = List[str]
 28 | TagSequence = List[str]
 29 | 
 30 | 
 31 | S = TypeVar("S", bound=Sentence)
 32 | T = TypeVar("T", bound=TagSequence)
 33 | 
 34 | 
 35 | class DownloadError(RuntimeError):
 36 |     """Raised when the dataset cannot be downloaded after retries."""
 37 | 
 38 | 
 39 | class ParseError(RuntimeError):
 40 |     """Raised when the dataset file cannot be parsed into any sentences."""
 41 | 
 42 | 
 43 | @dataclass(slots=True)
 44 | class MovaInstitutePOSDataset(ABCSequence, Generic[S, T]):
 45 |     """Dataset wrapper for the Mova Institute POS tagging corpus.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     root:
 50 |         Directory where the dataset file will be stored / read from.
 51 |     download:
 52 |         If True (default) the dataset will be downloaded if missing.
 53 |     file_name:
 54 |         Local filename for the cached dataset (text format).
 55 |     data_file:
 56 |         Remote URL containing the dataset contents.
 57 |     """
 58 | 
 59 |     root: Path
 60 |     download: bool = True
 61 |     file_name: str = "mova_institute_pos_dataset.txt"
 62 |     data_file: str = "https://lab.mova.institute/files/robochyi_tb.conllu.txt"
 63 |     force_download: bool = False
 64 |     max_retries: int = 3
 65 |     timeout: int = 15  # seconds for individual HTTP attempt
 66 |     expected_sha256: str | None = None
 67 |     show_progress: bool = True
 68 | 
 69 |     dataset_path: Path = field(init=False)
 70 |     _samples: List[Sentence] = field(init=False, default_factory=list)
 71 |     _labels: List[TagSequence] = field(init=False, default_factory=list)
 72 |     _unique_labels_cache: Set[str] = field(init=False, default_factory=set)
 73 | 
 74 |     def __post_init__(self) -> None:
 75 |         self.root = Path(self.root)
 76 |         self.dataset_path = self.root / self.file_name
 77 |         if self.download:
 78 |             self.download_dataset()
 79 |         if not self._check_exists():  # Fail early with a clear message.
 80 |             raise FileNotFoundError(
 81 |                 "Dataset not found. Use download=True to fetch it or ensure the file exists."
 82 |             )
 83 |         self._samples, self._labels = self._load_data()
 84 |         if not self._samples:
 85 |             raise ParseError(
 86 |                 f"Parsed zero sentences from dataset file '{self.dataset_path}'. File may be empty or malformed."
 87 |             )
 88 |         # Cache unique labels (frozenset semantics but returning a set copy in property)
 89 |         self._unique_labels_cache = {lab for seq in self._labels for lab in seq}
 90 | 
 91 |     @property
 92 |     def labels(self) -> List[TagSequence]:
 93 |         """Raw label sequences (parallel to `data`)."""
 94 |         return self._labels
 95 | 
 96 |     @property
 97 |     def data(self) -> List[Sentence]:
 98 |         """Raw token sequences."""
 99 |         return self._samples
100 | 
101 |     @property
102 |     def unique_labels(self) -> Set[str]:
103 |         """Unique set of tag labels present in the corpus (cached)."""
104 |         return self._unique_labels_cache
105 | 
106 |     def label_frequencies(self) -> Dict[str, int]:
107 |         """Return a mapping of label -> occurrence count.
108 | 
109 |         Useful for quick exploratory statistics.
110 |         """
111 |         freqs: Dict[str, int] = {}
112 |         for seq in self._labels:
113 |             for lab in seq:
114 |                 freqs[lab] = freqs.get(lab, 0) + 1
115 |         return freqs
116 | 
117 |     def _iter_conllu_sentences(self) -> Iterator[Tuple[Sentence, TagSequence]]:
118 |         """Yield (tokens, tags) for each sentence in the dataset file."""
119 |         tokens: Sentence = []
120 |         tags: TagSequence = []
121 |         with self.dataset_path.open("r", encoding="utf8") as fh:
122 |             for raw in fh:
123 |                 line = raw.rstrip("\n")
124 |                 stripped = line.strip()
125 |                 if not stripped:  # sentence boundary
126 |                     if tokens:
127 |                         yield tokens, tags
128 |                         tokens, tags = [], []
129 |                     continue
130 |                 if stripped.startswith("#"):
131 |                     continue
132 |                 parts = stripped.split("\t")
133 |                 if len(parts) < 4:
134 |                     continue
135 |                 id_field = parts[0]
136 |                 # Skip multiword tokens like '3-4'
137 |                 if "-" in id_field:
138 |                     continue
139 |                 if not id_field.isdigit():
140 |                     continue
141 |                 token = parts[1]
142 |                 tag = parts[3]
143 |                 tokens.append(token)
144 |                 tags.append(tag)
145 |             # Flush final sentence if file lacks trailing newline/blank line
146 |             if tokens:
147 |                 yield tokens, tags
148 | 
149 |     def _load_data(self) -> Tuple[List[Sentence], List[TagSequence]]:
150 |         samples: List[Sentence] = []
151 |         labels: List[TagSequence] = []
152 |         for sent, tag_seq in self._iter_conllu_sentences():
153 |             samples.append(sent)
154 |             labels.append(tag_seq)
155 |         return samples, labels
156 | 
157 |     def __getitem__(self, idx: int) -> Tuple[Sentence, TagSequence]:  # type: ignore[override]
158 |         return self._samples[idx], self._labels[idx]
159 | 
160 |     def __len__(self) -> int:
161 |         return len(self._samples)
162 | 
163 |     def __iter__(self) -> Iterator[Tuple[Sentence, TagSequence]]:
164 |         for sample, label in zip(self._samples, self._labels, strict=True):
165 |             yield sample, label
166 | 
167 |     def __repr__(self) -> str:
168 |         return f"{self.__class__.__name__}(n_sentences={len(self)}, unique_labels={len(self.unique_labels)})"
169 | 
170 |     def _check_exists(self) -> bool:
171 |         return self.dataset_path.exists()
172 | 
173 |     def download_dataset(self) -> None:
174 |         """Download the raw dataset file if needed using shared retry helper."""
175 |         if self._check_exists() and not self.force_download:
176 |             return
177 |         self.root.mkdir(parents=True, exist_ok=True)
178 |         try:
179 |             text = download_text_with_retries(
180 |                 self.data_file,
181 |                 timeout=self.timeout,
182 |                 max_retries=self.max_retries,
183 |                 expected_sha256=self.expected_sha256,
184 |                 show_progress=self.show_progress,
185 |             )
186 |         except DownloadFailure as exc:
187 |             raise DownloadError(str(exc)) from exc
188 |         atomic_write_text(self.dataset_path, text)
189 | 


--------------------------------------------------------------------------------
/ua_datasets/text_classification/news_classification.py:
--------------------------------------------------------------------------------
  1 | """News classification dataset loader.
  2 | 
  3 | Expected CSV Columns
  4 | --------------------
  5 | Required minimal columns: ``title``, ``text``, ``tags``, ``target`` in that
  6 | order. (Historically this dataset has used that order.) If columns are missing
  7 | or re-ordered the loader attempts to locate required names; if any mandatory
  8 | column is absent a :class:`ParseError` is raised.
  9 | 
 10 | Example
 11 | -------
 12 | >>> ds = NewsClassificationDataset(root=Path('./news'), split='train', download=True)
 13 | >>> title, text, target, tags = ds[0]
 14 | >>> len(ds), target in ds.labels
 15 | """
 16 | 
 17 | from __future__ import annotations
 18 | 
 19 | import csv
 20 | from dataclasses import dataclass, field
 21 | from pathlib import Path
 22 | from typing import Dict, Iterator, List, Optional, Set, Tuple
 23 | from urllib.request import urlopen
 24 | 
 25 | from ua_datasets.utils import DownloadFailure, atomic_write_text, download_text_with_retries
 26 | 
 27 | __all__ = [
 28 |     "DownloadError",
 29 |     "NewsClassificationDataset",
 30 |     "ParseError",
 31 | ]
 32 | 
 33 | 
 34 | class DownloadError(RuntimeError):
 35 |     """Raised when the dataset cannot be downloaded after retries or integrity check fails."""
 36 | 
 37 | 
 38 | class ParseError(RuntimeError):
 39 |     """Raised when CSV file is empty, malformed, or missing mandatory columns."""
 40 | 
 41 | 
 42 | Row = List[str]
 43 | Sample = Tuple[str, str, str, Optional[List[str]]]
 44 | 
 45 | 
 46 | @dataclass(slots=True)
 47 | class NewsClassificationDataset:
 48 |     """Ukrainian news classification dataset.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     root:
 53 |         Directory where the dataset split CSV will be stored or read from.
 54 |     download:
 55 |         If ``True`` (default), download the split file if it is missing.
 56 |     split:
 57 |         One of ``"train"`` or ``"test"``.
 58 |     return_tags:
 59 |         If ``True`` parsed list of tags is returned instead of ``None`` in the
 60 |         4th element of each sample tuple.
 61 |     """
 62 | 
 63 |     root: Path
 64 |     download: bool = True
 65 |     split: str = "train"
 66 |     return_tags: bool = False
 67 | 
 68 |     base_url: str = "https://github.com/fido-ai/ua-datasets/releases/download/v0.0.1/"
 69 |     force_download: bool = False
 70 |     max_retries: int = 3
 71 |     timeout: int = 20  # seconds
 72 |     expected_sha256: str | None = None
 73 |     show_progress: bool = True
 74 | 
 75 |     dataset_path: Path = field(init=False)
 76 |     _columns: List[str] = field(init=False, default_factory=list)
 77 |     _rows: List[Row] = field(init=False, default_factory=list)
 78 |     _parsed_tags: Optional[List[List[str]]] = field(init=False, default=None)
 79 |     _label_cache: Set[str] = field(init=False, default_factory=set)
 80 | 
 81 |     def __post_init__(self) -> None:
 82 |         self.root = Path(self.root)
 83 |         self.dataset_path = self.root / f"{self.split}.csv"
 84 |         if self.download:
 85 |             self.download_dataset()
 86 |         if not self.dataset_path.exists():
 87 |             raise FileNotFoundError(
 88 |                 "Dataset not found. Use download=True to fetch it or ensure the file exists."
 89 |             )
 90 |         self._rows = self._load_rows()
 91 |         if not self._rows:
 92 |             raise ParseError("Loaded zero rows; file may be empty or malformed.")
 93 |         # Cache labels for fast repeated access
 94 |         self._label_cache = {row[self._columns.index("target")] for row in self._rows}
 95 | 
 96 |     def download_dataset(self) -> None:
 97 |         """Download the dataset split file if needed using shared helper."""
 98 |         if self.dataset_path.exists() and not self.force_download:
 99 |             return
100 |         self.root.mkdir(parents=True, exist_ok=True)
101 |         url = f"{self.base_url}{self.split}.csv"
102 |         try:
103 |             text = download_text_with_retries(
104 |                 url,
105 |                 timeout=self.timeout,
106 |                 max_retries=self.max_retries,
107 |                 expected_sha256=self.expected_sha256,
108 |                 opener=urlopen,
109 |                 show_progress=self.show_progress,
110 |             )
111 |         except DownloadFailure as exc:
112 |             raise DownloadError(str(exc)) from exc
113 |         atomic_write_text(self.dataset_path, text)
114 | 
115 |     def _load_rows(self) -> List[Row]:
116 |         """Load raw rows from CSV, capturing header separately and validating columns."""
117 |         with self.dataset_path.open("r", encoding="utf8", newline="") as f:
118 |             reader = csv.reader(f)
119 |             try:
120 |                 self._columns = next(reader)
121 |             except StopIteration as exc:
122 |                 raise ParseError("CSV file is empty") from exc
123 |             required = {"title", "text", "target"}
124 |             missing = required - set(self._columns)
125 |             if missing:
126 |                 raise ParseError(f"Missing required column(s): {', '.join(sorted(missing))}")
127 |             rows: List[Row] = []
128 |             for row in reader:
129 |                 if not row or all(cell == "" for cell in row):
130 |                     continue
131 |                 # Basic row length guard
132 |                 if len(row) < len(self._columns):
133 |                     # Allow shorter if trailing columns empty, pad to columns length
134 |                     row = row + [""] * (len(self._columns) - len(row))
135 |                 rows.append(row)
136 |         return rows
137 | 
138 |     @property
139 |     def column_names(self) -> List[str]:
140 |         return self._columns
141 | 
142 |     @property
143 |     def labels(self) -> Set[str]:
144 |         return set(self._label_cache)
145 | 
146 |     @property
147 |     def data(self) -> List[Row]:
148 |         return self._rows
149 | 
150 |     @staticmethod
151 |     def _preprocess_tags(tags: str) -> List[str]:
152 |         return [el for el in tags.split("|") if el]
153 | 
154 |     def _ensure_parsed_tags(self) -> None:
155 |         if not self.return_tags or self._parsed_tags is not None:
156 |             return
157 |         tags_idx = self._columns.index("tags") if "tags" in self._columns else None
158 |         parsed: List[List[str]] = []
159 |         for row in self._rows:
160 |             raw = row[tags_idx] if tags_idx is not None and tags_idx < len(row) else ""
161 |             parsed.append(self._preprocess_tags(raw))
162 |         self._parsed_tags = parsed
163 | 
164 |     def label_frequencies(self) -> Dict[str, int]:
165 |         freqs: Dict[str, int] = {}
166 |         tgt_idx = self._columns.index("target")
167 |         for row in self._rows:
168 |             tgt = row[tgt_idx]
169 |             freqs[tgt] = freqs.get(tgt, 0) + 1
170 |         return freqs
171 | 
172 |     def __len__(self) -> int:
173 |         return len(self._rows)
174 | 
175 |     def __getitem__(self, idx: int) -> Sample:
176 |         title, text, _tags_raw, target = self._rows[idx]
177 |         if self.return_tags:
178 |             self._ensure_parsed_tags()
179 |             assert self._parsed_tags is not None
180 |             return title, text, target, self._parsed_tags[idx]
181 |         return title, text, target, None
182 | 
183 |     def __iter__(self) -> Iterator[Sample]:
184 |         for i in range(len(self)):
185 |             yield self[i]
186 | 
187 |     def __repr__(self) -> str:
188 |         return f"{self.__class__.__name__}(split={self.split!r}, n_rows={len(self)}, n_labels={len(self.labels)}, return_tags={self.return_tags})"
189 | 


--------------------------------------------------------------------------------
/ua_datasets/question_answering/uasquad_question_answering.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | from dataclasses import dataclass, field
  5 | from pathlib import Path
  6 | from typing import Any, Dict, Iterator, List, Optional, Set
  7 | from urllib.request import urlopen
  8 | 
  9 | from ua_datasets.utils import DownloadFailure, atomic_write_text, download_text_with_retries
 10 | 
 11 | __all__ = [
 12 |     "DownloadError",
 13 |     "ParseError",
 14 |     "UaSquadDataset",
 15 |     "load_ua_squad_v2",
 16 | ]
 17 | 
 18 | # Public (lightweight) representation of a SQuAD v2 style example.
 19 | # We intentionally keep this a plain dict-compatible shape instead of introducing
 20 | # pydantic/dataclasses for each row to avoid overhead and preserve zero heavy deps.
 21 | HFStyleExample = Dict[str, Any]
 22 | 
 23 | 
 24 | class DownloadError(RuntimeError):
 25 |     """Raised when a split cannot be downloaded after retries or integrity check fails."""
 26 | 
 27 | 
 28 | class ParseError(RuntimeError):
 29 |     """Raised when the JSON file is malformed or yields zero valid QA triplets."""
 30 | 
 31 | 
 32 | @dataclass(slots=True)
 33 | class UaSquadDataset:
 34 |     """Ukrainian SQuAD-style Question Answering dataset.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     root:
 39 |         Directory where splits will be cached.
 40 |     split:
 41 |         One of ``"train"`` or ``"val"``.
 42 |     download:
 43 |         If ``True`` (default) downloads the split file if it is missing.
 44 |     file_map:
 45 |         Optional mapping from split name to filename. Defaults to
 46 |         ``{"train": "train.json", "val": "val.json"}``.
 47 |     base_url:
 48 |         Base URL path ending with a slash from which filenames are resolved.
 49 |     """
 50 | 
 51 |     root: Path
 52 |     split: str = "train"
 53 |     download: bool = True
 54 |     file_map: dict[str, List[str]] = field(
 55 |         default_factory=lambda: {
 56 |             "train": ["train.json"],
 57 |             "val": ["val.json", "validation.json", "dev.json", "val.jspon"],
 58 |         }
 59 |     )
 60 |     base_url: str = "https://huggingface.co/datasets/FIdo-AI/ua-squad/resolve/main/"
 61 |     force_download: bool = False
 62 |     max_retries: int = 3
 63 |     timeout: int = 20  # seconds
 64 |     expected_sha256: str | None = None
 65 |     show_progress: bool = True
 66 |     # If True (default) skip flat-format training examples whose 'answer' value is an empty string.
 67 |     # This avoids polluting the training set with ambiguous empty-answer placeholders while still
 68 |     # retaining explicit impossible examples represented by a missing 'answer' key (answer=None).
 69 |     ignore_empty_answer: bool = True
 70 | 
 71 |     dataset_path: Optional[Path] = field(init=False, default=None)
 72 |     # SQuAD v2 style expanded storage
 73 |     _examples: List[HFStyleExample] = field(init=False, default_factory=list)
 74 |     _unique_answers_cache: Set[str] = field(init=False, default_factory=set)
 75 | 
 76 |     def __post_init__(self) -> None:
 77 |         self.root = Path(self.root)
 78 |         if self.split not in self.file_map:
 79 |             raise ValueError(
 80 |                 f"Unsupported split '{self.split}'. Expected one of: {list(self.file_map)}"
 81 |             )
 82 |         self.dataset_path = self._resolve_or_download_split()
 83 |         if self.dataset_path is None:
 84 |             # Graceful empty dataset (tests expect len==0 allowed)
 85 |             self._examples = []
 86 |             return
 87 |         self._examples = self._parse(
 88 |             self.dataset_path,
 89 |             ignore_empty_answer=self.ignore_empty_answer,
 90 |             split=self.split,
 91 |         )
 92 |         if not self._examples:
 93 |             raise ParseError(
 94 |                 f"Parsed zero QA examples from '{self.dataset_path}'. File may be malformed."
 95 |             )
 96 |         # Build unique answer cache ignoring empties and impossible examples.
 97 |         self._unique_answers_cache = {
 98 |             t
 99 |             for ex in self._examples
100 |             if not ex.get("is_impossible")
101 |             for t in ex.get("answers", {}).get("text", [])
102 |             if t
103 |         }
104 | 
105 |     @property
106 |     def unique_answers(self) -> Set[str]:
107 |         return set(self._unique_answers_cache)
108 | 
109 |     def answer_frequencies(self) -> Dict[str, int]:
110 |         freqs: Dict[str, int] = {}
111 |         for ex in self._examples:
112 |             if ex.get("is_impossible"):
113 |                 continue
114 |             for t in ex.get("answers", {}).get("text", []):
115 |                 if not t:
116 |                     continue
117 |                 freqs[t] = freqs.get(t, 0) + 1
118 |         return freqs
119 | 
120 |     def _resolve_or_download_split(self) -> Path | None:
121 |         """Locate or download split file with retries & optional integrity."""
122 |         candidates = self.file_map[self.split]
123 |         self.root.mkdir(parents=True, exist_ok=True)
124 | 
125 |         # Existing file short-circuit
126 |         for name in candidates:
127 |             path = self.root / name
128 |             if path.exists() and not self.force_download:
129 |                 return path
130 |         if not self.download:
131 |             return None
132 | 
133 |         for name in candidates:
134 |             path = self.root / name
135 |             url = f"{self.base_url}{name}"
136 |             try:
137 |                 text = download_text_with_retries(
138 |                     url,
139 |                     timeout=self.timeout,
140 |                     max_retries=self.max_retries,
141 |                     expected_sha256=self.expected_sha256,
142 |                     validate=lambda t: t.lstrip().startswith("{") or t.lstrip().startswith("["),
143 |                     opener=urlopen,
144 |                     show_progress=self.show_progress,
145 |                 )
146 |                 atomic_write_text(path, text)
147 |                 return path
148 |             except DownloadFailure:
149 |                 continue
150 |         return None
151 | 
152 |     @staticmethod
153 |     def _parse(
154 |         path: Path,
155 |         *,
156 |         ignore_empty_answer: bool = True,
157 |         split: str | None = None,
158 |     ) -> List[HFStyleExample]:
159 |         """Parse flat (train-like) or nested SQuAD / SQuAD v2 style JSON into HF style examples only."""
160 |         with path.open("r", encoding="utf8") as f:
161 |             try:
162 |                 obj = json.load(f)
163 |             except json.JSONDecodeError as exc:
164 |                 raise ParseError(f"Failed to decode JSON file '{path}': {exc}") from exc
165 | 
166 |         data = obj.get("data", [])
167 |         examples: List[HFStyleExample] = []
168 | 
169 |         def _gen_id(question: str, context: str) -> str:
170 |             # Lightweight deterministic id (not cryptographic, good enough for local uniqueness)
171 |             import hashlib
172 | 
173 |             h = hashlib.sha1()
174 |             h.update((question + "\n" + context).encode("utf-8"))
175 |             return h.hexdigest()[:16]
176 | 
177 |         def _compute_answer_start(context: str, answer_text: str) -> int:
178 |             return context.find(answer_text) if answer_text else -1
179 | 
180 |         nested_format = (
181 |             data
182 |             and isinstance(data, list)
183 |             and isinstance(data[0], dict)
184 |             and "paragraphs" in data[0]
185 |         )
186 | 
187 |         if nested_format:
188 |             # SQuAD / SQuAD v2 style validation (or full) format
189 |             for article in data:
190 |                 title = article.get("title")
191 |                 for para in article.get("paragraphs", []):
192 |                     raw_context = para.get("context")
193 |                     if raw_context is None:
194 |                         continue
195 |                     context = str(raw_context).strip()
196 |                     if not context:
197 |                         continue
198 |                     for qa in para.get("qas", []):
199 |                         raw_question = qa.get("question")
200 |                         if raw_question is None:
201 |                             continue
202 |                         question = str(raw_question).strip()
203 |                         if not question:
204 |                             continue
205 |                         # answers may be empty in SQuAD v2
206 |                         ans_objs = qa.get("answers") or []
207 |                         texts: List[str] = []
208 |                         starts: List[int] = []
209 |                         for cand in ans_objs:
210 |                             t = str(cand.get("text", "")).strip()
211 |                             if not t:
212 |                                 continue
213 |                             start = cand.get("answer_start")
214 |                             if isinstance(start, int) and start >= 0:
215 |                                 # validate substring alignment quickly (best effort)
216 |                                 if context[start : start + len(t)] != t:
217 |                                     # fallback to search
218 |                                     start = _compute_answer_start(context, t)
219 |                             else:
220 |                                 start = _compute_answer_start(context, t)
221 |                             if start >= 0:
222 |                                 texts.append(t)
223 |                                 starts.append(start)
224 |                         is_impossible = bool(qa.get("is_impossible", len(texts) == 0))
225 |                         examples.append(
226 |                             {
227 |                                 "id": qa.get("id") or _gen_id(question, context),
228 |                                 "title": title,
229 |                                 "context": context,
230 |                                 "question": question,
231 |                                 "answers": {"text": texts, "answer_start": starts},
232 |                                 "is_impossible": is_impossible,
233 |                             }
234 |                         )
235 |         else:
236 |             # Flat simplified train-like structure with singular 'answer'
237 |             for item in data:
238 |                 if not isinstance(item, dict):
239 |                     continue
240 |                 question = str(item.get("question", "")).strip()
241 |                 context = str(item.get("context", "")).strip()
242 |                 answer = item.get("answer")
243 |                 if not question or not context:
244 |                     continue
245 |                 if answer is None:
246 |                     # impossible (no answer provided)
247 |                     texts = []
248 |                     starts = []
249 |                     is_impossible = True
250 |                 else:
251 |                     ans_text = str(answer).strip()
252 |                     if not ans_text:
253 |                         # Empty string answer
254 |                         if ignore_empty_answer and split == "train":
255 |                             # Skip this example entirely when training to avoid noisy empties.
256 |                             continue
257 |                         # Keep as impossible example for non-train splits (evaluation) or when flag disabled.
258 |                         texts = []
259 |                         starts = []
260 |                         is_impossible = True
261 |                     else:
262 |                         start_pos = _compute_answer_start(context, ans_text)
263 |                         if start_pos == -1:
264 |                             # Accept provided answer text even if not found in context for synthetic tests;
265 |                             # record start as -1 to indicate unknown alignment.
266 |                             texts = [ans_text]
267 |                             starts = [-1]
268 |                             is_impossible = False
269 |                         else:
270 |                             texts = [ans_text]
271 |                             starts = [start_pos]
272 |                             is_impossible = False
273 |                 examples.append(
274 |                     {
275 |                         "id": _gen_id(question, context),
276 |                         "title": None,
277 |                         "context": context,
278 |                         "question": question,
279 |                         "answers": {"text": texts, "answer_start": starts},
280 |                         "is_impossible": is_impossible,
281 |                     }
282 |                 )
283 | 
284 |         return examples
285 | 
286 |     def __getitem__(self, idx: int) -> HFStyleExample:
287 |         return self._examples[idx]
288 | 
289 |     def __len__(self) -> int:
290 |         return len(self._examples)
291 | 
292 |     def __iter__(self) -> Iterator[HFStyleExample]:
293 |         for ex in self._examples:
294 |             yield ex
295 | 
296 |     def __repr__(self) -> str:
297 |         return f"{self.__class__.__name__}(split={self.split!r}, examples={len(self._examples)}, unique_answers={len(self._unique_answers_cache)})"
298 | 
299 |     def _check_exists(self) -> bool:
300 |         return bool(self.dataset_path and self.dataset_path.exists())
301 | 
302 |     # ---- SQuAD v2 style accessors -------------------------------------------------
303 |     @property
304 |     def examples(self) -> List[HFStyleExample]:
305 |         """Full list of SQuAD v2 style examples.
306 | 
307 |         Each example dict has keys: id, title, context, question, answers, is_impossible.
308 |         Answers is a dict {'text': List[str], 'answer_start': List[int]} as expected by
309 |         Hugging Face's squad_v2 format. No heavy HF dependency is required here.
310 |         """
311 |         return list(self._examples)
312 | 
313 |     def to_hf_dict(self) -> List[Dict[str, Any]]:  # lightweight alias
314 |         """Alias returning examples (intended for quick serialization)."""
315 |         return self.examples
316 | 
317 |     def to_hf_dataset(self) -> Any:  # pragma: no cover - optional convenience
318 |         """Return a Hugging Face Dataset (requires 'datasets' installed).
319 | 
320 |         This keeps the core library free from the dependency; import is local.
321 |         """
322 |         try:  # local import to avoid hard dependency
323 |             import importlib
324 | 
325 |             ds_mod = importlib.import_module("datasets")
326 |             Dataset = ds_mod.Dataset
327 |         except Exception as exc:
328 |             raise RuntimeError(
329 |                 "The 'datasets' package is required for to_hf_dataset(); install with 'pip install datasets'."
330 |             ) from exc
331 |         return Dataset.from_list(self._examples)
332 | 
333 | 
334 | # ----------------------------------------------------------------------------
335 | # Convenience loader mimicking Hugging Face squad_v2 DatasetDict structure.
336 | # ----------------------------------------------------------------------------
337 | def load_ua_squad_v2(
338 |     root: Path | str = Path("./data/ua_squad"),
339 |     *,
340 |     download: bool = True,
341 |     force_download: bool = False,
342 |     features: Any | None = None,
343 | ) -> Any:
344 |     """Load UA-SQuAD splits and return a ``datasets.DatasetDict`` matching squad_v2 shape.
345 | 
346 |     Parameters
347 |     ----------
348 |     root : Path | str
349 |         Root directory where ``train.json`` / ``val.json`` (or fallbacks) reside / will be downloaded.
350 |     download : bool
351 |         Whether to download missing splits.
352 |     force_download : bool
353 |         Re-download even if local files exist.
354 |     features : Optional[datasets.Features]
355 |         Custom features to cast onto the resulting datasets. If omitted a default
356 |         SQuAD v2 style schema is applied.
357 | 
358 |     Returns
359 |     -------
360 |     datasets.DatasetDict
361 |         With keys ``train`` and ``validation`` each exposing columns:
362 |         id, title, context, question, answers{"text": list[str], "answer_start": list[int]}, is_impossible.
363 |     """
364 |     try:  # local import to avoid hard dependency
365 |         import importlib
366 | 
367 |         ds_mod = importlib.import_module("datasets")
368 |         DatasetDict = ds_mod.DatasetDict
369 |         Features = ds_mod.Features
370 |         Sequence = ds_mod.Sequence
371 |         Value = ds_mod.Value
372 |     except ModuleNotFoundError as exc:  # pragma: no cover
373 |         raise RuntimeError(
374 |             "The 'datasets' package is required for load_ua_squad_v2(); install with 'uv add datasets'."
375 |         ) from exc
376 | 
377 |     root = Path(root)
378 |     train_ds = UaSquadDataset(
379 |         root=root, split="train", download=download, force_download=force_download
380 |     ).to_hf_dataset()
381 |     val_ds = UaSquadDataset(
382 |         root=root, split="val", download=download, force_download=force_download
383 |     ).to_hf_dataset()
384 | 
385 |     if features is None:
386 |         features = Features(
387 |             {
388 |                 "id": Value("string"),
389 |                 "title": Value("string"),
390 |                 "context": Value("string"),
391 |                 "question": Value("string"),
392 |                 "answers": {
393 |                     "text": Sequence(Value("string")),
394 |                     "answer_start": Sequence(Value("int32")),
395 |                 },
396 |                 "is_impossible": Value("bool"),
397 |             }
398 |         )
399 | 
400 |     train_ds = train_ds.cast(features)
401 |     val_ds = val_ds.cast(features)
402 |     return DatasetDict({"train": train_ds, "validation": val_ds})
403 | 


--------------------------------------------------------------------------------
/uv.lock:
--------------------------------------------------------------------------------
  1 | version = 1
  2 | revision = 2
  3 | requires-python = ">=3.10"
  4 | 
  5 | [[package]]
  6 | name = "cfgv"
  7 | version = "3.4.0"
  8 | source = { registry = "https://pypi.org/simple" }
  9 | sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
 10 | wheels = [
 11 |     { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "click"
 16 | version = "8.3.0"
 17 | source = { registry = "https://pypi.org/simple" }
 18 | dependencies = [
 19 |     { name = "colorama", marker = "sys_platform == 'win32'" },
 20 | ]
 21 | sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
 22 | wheels = [
 23 |     { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" },
 24 | ]
 25 | 
 26 | [[package]]
 27 | name = "colorama"
 28 | version = "0.4.6"
 29 | source = { registry = "https://pypi.org/simple" }
 30 | sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
 31 | wheels = [
 32 |     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 33 | ]
 34 | 
 35 | [[package]]
 36 | name = "distlib"
 37 | version = "0.4.0"
 38 | source = { registry = "https://pypi.org/simple" }
 39 | sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
 40 | wheels = [
 41 |     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 42 | ]
 43 | 
 44 | [[package]]
 45 | name = "exceptiongroup"
 46 | version = "1.3.0"
 47 | source = { registry = "https://pypi.org/simple" }
 48 | dependencies = [
 49 |     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 50 | ]
 51 | sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 52 | wheels = [
 53 |     { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" },
 54 | ]
 55 | 
 56 | [[package]]
 57 | name = "filelock"
 58 | version = "3.20.0"
 59 | source = { registry = "https://pypi.org/simple" }
 60 | sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" }
 61 | wheels = [
 62 |     { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" },
 63 | ]
 64 | 
 65 | [[package]]
 66 | name = "identify"
 67 | version = "2.6.15"
 68 | source = { registry = "https://pypi.org/simple" }
 69 | sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311, upload-time = "2025-10-02T17:43:40.631Z" }
 70 | wheels = [
 71 |     { url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183, upload-time = "2025-10-02T17:43:39.137Z" },
 72 | ]
 73 | 
 74 | [[package]]
 75 | name = "iniconfig"
 76 | version = "2.3.0"
 77 | source = { registry = "https://pypi.org/simple" }
 78 | sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
 79 | wheels = [
 80 |     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 81 | ]
 82 | 
 83 | [[package]]
 84 | name = "markdown-it-py"
 85 | version = "4.0.0"
 86 | source = { registry = "https://pypi.org/simple" }
 87 | dependencies = [
 88 |     { name = "mdurl" },
 89 | ]
 90 | sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 91 | wheels = [
 92 |     { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
 93 | ]
 94 | 
 95 | [[package]]
 96 | name = "mdurl"
 97 | version = "0.1.2"
 98 | source = { registry = "https://pypi.org/simple" }
 99 | sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
100 | wheels = [
101 |     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
102 | ]
103 | 
104 | [[package]]
105 | name = "mypy"
106 | version = "1.18.2"
107 | source = { registry = "https://pypi.org/simple" }
108 | dependencies = [
109 |     { name = "mypy-extensions" },
110 |     { name = "pathspec" },
111 |     { name = "tomli", marker = "python_full_version < '3.11'" },
112 |     { name = "typing-extensions" },
113 | ]
114 | sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
115 | wheels = [
116 |     { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" },
117 |     { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" },
118 |     { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" },
119 |     { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" },
120 |     { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" },
121 |     { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" },
122 |     { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
123 |     { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
124 |     { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
125 |     { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
126 |     { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
127 |     { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
128 |     { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
129 |     { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
130 |     { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
131 |     { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
132 |     { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
133 |     { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
134 |     { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
135 |     { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
136 |     { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
137 |     { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
138 |     { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
139 |     { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
140 |     { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
141 |     { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
142 |     { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
143 |     { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
144 |     { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
145 |     { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
146 |     { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
147 | ]
148 | 
149 | [[package]]
150 | name = "mypy-extensions"
151 | version = "1.1.0"
152 | source = { registry = "https://pypi.org/simple" }
153 | sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
154 | wheels = [
155 |     { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
156 | ]
157 | 
158 | [[package]]
159 | name = "nodeenv"
160 | version = "1.9.1"
161 | source = { registry = "https://pypi.org/simple" }
162 | sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
163 | wheels = [
164 |     { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
165 | ]
166 | 
167 | [[package]]
168 | name = "packaging"
169 | version = "24.2"
170 | source = { registry = "https://pypi.org/simple" }
171 | sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" }
172 | wheels = [
173 |     { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" },
174 | ]
175 | 
176 | [[package]]
177 | name = "pathspec"
178 | version = "0.12.1"
179 | source = { registry = "https://pypi.org/simple" }
180 | sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
181 | wheels = [
182 |     { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
183 | ]
184 | 
185 | [[package]]
186 | name = "platformdirs"
187 | version = "4.5.0"
188 | source = { registry = "https://pypi.org/simple" }
189 | sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" }
190 | wheels = [
191 |     { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" },
192 | ]
193 | 
194 | [[package]]
195 | name = "pluggy"
196 | version = "1.6.0"
197 | source = { registry = "https://pypi.org/simple" }
198 | sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
199 | wheels = [
200 |     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
201 | ]
202 | 
203 | [[package]]
204 | name = "pre-commit"
205 | version = "4.3.0"
206 | source = { registry = "https://pypi.org/simple" }
207 | dependencies = [
208 |     { name = "cfgv" },
209 |     { name = "identify" },
210 |     { name = "nodeenv" },
211 |     { name = "pyyaml" },
212 |     { name = "virtualenv" },
213 | ]
214 | sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792, upload-time = "2025-08-09T18:56:14.651Z" }
215 | wheels = [
216 |     { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" },
217 | ]
218 | 
219 | [[package]]
220 | name = "pygments"
221 | version = "2.19.2"
222 | source = { registry = "https://pypi.org/simple" }
223 | sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
224 | wheels = [
225 |     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
226 | ]
227 | 
228 | [[package]]
229 | name = "pytest"
230 | version = "8.4.2"
231 | source = { registry = "https://pypi.org/simple" }
232 | dependencies = [
233 |     { name = "colorama", marker = "sys_platform == 'win32'" },
234 |     { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
235 |     { name = "iniconfig" },
236 |     { name = "packaging" },
237 |     { name = "pluggy" },
238 |     { name = "pygments" },
239 |     { name = "tomli", marker = "python_full_version < '3.11'" },
240 | ]
241 | sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
242 | wheels = [
243 |     { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
244 | ]
245 | 
246 | [[package]]
247 | name = "pyyaml"
248 | version = "6.0.3"
249 | source = { registry = "https://pypi.org/simple" }
250 | sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
251 | wheels = [
252 |     { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" },
253 |     { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" },
254 |     { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" },
255 |     { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" },
256 |     { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" },
257 |     { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" },
258 |     { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" },
259 |     { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" },
260 |     { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" },
261 |     { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
262 |     { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
263 |     { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
264 |     { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
265 |     { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
266 |     { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
267 |     { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
268 |     { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
269 |     { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
270 |     { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
271 |     { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
272 |     { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
273 |     { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
274 |     { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
275 |     { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
276 |     { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
277 |     { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
278 |     { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
279 |     { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
280 |     { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
281 |     { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
282 |     { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
283 |     { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
284 |     { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
285 |     { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
286 |     { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
287 |     { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
288 |     { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
289 |     { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
290 |     { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
291 |     { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
292 |     { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
293 |     { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
294 |     { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
295 |     { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
296 |     { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
297 |     { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
298 |     { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
299 |     { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
300 |     { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
301 |     { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
302 |     { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
303 |     { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
304 |     { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
305 |     { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
306 |     { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
307 |     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
308 | ]
309 | 
310 | [[package]]
311 | name = "rich"
312 | version = "14.2.0"
313 | source = { registry = "https://pypi.org/simple" }
314 | dependencies = [
315 |     { name = "markdown-it-py" },
316 |     { name = "pygments" },
317 | ]
318 | sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
319 | wheels = [
320 |     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
321 | ]
322 | 
323 | [[package]]
324 | name = "ruff"
325 | version = "0.14.2"
326 | source = { registry = "https://pypi.org/simple" }
327 | sdist = { url = "https://files.pythonhosted.org/packages/ee/34/8218a19b2055b80601e8fd201ec723c74c7fe1ca06d525a43ed07b6d8e85/ruff-0.14.2.tar.gz", hash = "sha256:98da787668f239313d9c902ca7c523fe11b8ec3f39345553a51b25abc4629c96", size = 5539663, upload-time = "2025-10-23T19:37:00.956Z" }
328 | wheels = [
329 |     { url = "https://files.pythonhosted.org/packages/16/dd/23eb2db5ad9acae7c845700493b72d3ae214dce0b226f27df89216110f2b/ruff-0.14.2-py3-none-linux_armv6l.whl", hash = "sha256:7cbe4e593505bdec5884c2d0a4d791a90301bc23e49a6b1eb642dd85ef9c64f1", size = 12533390, upload-time = "2025-10-23T19:36:18.044Z" },
330 |     { url = "https://files.pythonhosted.org/packages/5a/8c/5f9acff43ddcf3f85130d0146d0477e28ccecc495f9f684f8f7119b74c0d/ruff-0.14.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8d54b561729cee92f8d89c316ad7a3f9705533f5903b042399b6ae0ddfc62e11", size = 12887187, upload-time = "2025-10-23T19:36:22.664Z" },
331 |     { url = "https://files.pythonhosted.org/packages/99/fa/047646491479074029665022e9f3dc6f0515797f40a4b6014ea8474c539d/ruff-0.14.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5c8753dfa44ebb2cde10ce5b4d2ef55a41fb9d9b16732a2c5df64620dbda44a3", size = 11925177, upload-time = "2025-10-23T19:36:24.778Z" },
332 |     { url = "https://files.pythonhosted.org/packages/15/8b/c44cf7fe6e59ab24a9d939493a11030b503bdc2a16622cede8b7b1df0114/ruff-0.14.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d0bbeffb8d9f4fccf7b5198d566d0bad99a9cb622f1fc3467af96cb8773c9e3", size = 12358285, upload-time = "2025-10-23T19:36:26.979Z" },
333 |     { url = "https://files.pythonhosted.org/packages/45/01/47701b26254267ef40369aea3acb62a7b23e921c27372d127e0f3af48092/ruff-0.14.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7047f0c5a713a401e43a88d36843d9c83a19c584e63d664474675620aaa634a8", size = 12303832, upload-time = "2025-10-23T19:36:29.192Z" },
334 |     { url = "https://files.pythonhosted.org/packages/2d/5c/ae7244ca4fbdf2bee9d6405dcd5bc6ae51ee1df66eb7a9884b77b8af856d/ruff-0.14.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bf8d2f9aa1602599217d82e8e0af7fd33e5878c4d98f37906b7c93f46f9a839", size = 13036995, upload-time = "2025-10-23T19:36:31.861Z" },
335 |     { url = "https://files.pythonhosted.org/packages/27/4c/0860a79ce6fd4c709ac01173f76f929d53f59748d0dcdd662519835dae43/ruff-0.14.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1c505b389e19c57a317cf4b42db824e2fca96ffb3d86766c1c9f8b96d32048a7", size = 14512649, upload-time = "2025-10-23T19:36:33.915Z" },
336 |     { url = "https://files.pythonhosted.org/packages/7f/7f/d365de998069720a3abfc250ddd876fc4b81a403a766c74ff9bde15b5378/ruff-0.14.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a307fc45ebd887b3f26b36d9326bb70bf69b01561950cdcc6c0bdf7bb8e0f7cc", size = 14088182, upload-time = "2025-10-23T19:36:36.983Z" },
337 |     { url = "https://files.pythonhosted.org/packages/6c/ea/d8e3e6b209162000a7be1faa41b0a0c16a133010311edc3329753cc6596a/ruff-0.14.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61ae91a32c853172f832c2f40bd05fd69f491db7289fb85a9b941ebdd549781a", size = 13599516, upload-time = "2025-10-23T19:36:39.208Z" },
338 |     { url = "https://files.pythonhosted.org/packages/fa/ea/c7810322086db68989fb20a8d5221dd3b79e49e396b01badca07b433ab45/ruff-0.14.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1967e40286f63ee23c615e8e7e98098dedc7301568bd88991f6e544d8ae096", size = 13272690, upload-time = "2025-10-23T19:36:41.453Z" },
339 |     { url = "https://files.pythonhosted.org/packages/a9/39/10b05acf8c45786ef501d454e00937e1b97964f846bf28883d1f9619928a/ruff-0.14.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:2877f02119cdebf52a632d743a2e302dea422bfae152ebe2f193d3285a3a65df", size = 13496497, upload-time = "2025-10-23T19:36:43.61Z" },
340 |     { url = "https://files.pythonhosted.org/packages/59/a1/1f25f8301e13751c30895092485fada29076e5e14264bdacc37202e85d24/ruff-0.14.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e681c5bc777de5af898decdcb6ba3321d0d466f4cb43c3e7cc2c3b4e7b843a05", size = 12266116, upload-time = "2025-10-23T19:36:45.625Z" },
341 |     { url = "https://files.pythonhosted.org/packages/5c/fa/0029bfc9ce16ae78164e6923ef392e5f173b793b26cc39aa1d8b366cf9dc/ruff-0.14.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e21be42d72e224736f0c992cdb9959a2fa53c7e943b97ef5d081e13170e3ffc5", size = 12281345, upload-time = "2025-10-23T19:36:47.618Z" },
342 |     { url = "https://files.pythonhosted.org/packages/a5/ab/ece7baa3c0f29b7683be868c024f0838770c16607bea6852e46b202f1ff6/ruff-0.14.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b8264016f6f209fac16262882dbebf3f8be1629777cf0f37e7aff071b3e9b92e", size = 12629296, upload-time = "2025-10-23T19:36:49.789Z" },
343 |     { url = "https://files.pythonhosted.org/packages/a4/7f/638f54b43f3d4e48c6a68062794e5b367ddac778051806b9e235dfb7aa81/ruff-0.14.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5ca36b4cb4db3067a3b24444463ceea5565ea78b95fe9a07ca7cb7fd16948770", size = 13371610, upload-time = "2025-10-23T19:36:51.882Z" },
344 |     { url = "https://files.pythonhosted.org/packages/8d/35/3654a973ebe5b32e1fd4a08ed2d46755af7267da7ac710d97420d7b8657d/ruff-0.14.2-py3-none-win32.whl", hash = "sha256:41775927d287685e08f48d8eb3f765625ab0b7042cc9377e20e64f4eb0056ee9", size = 12415318, upload-time = "2025-10-23T19:36:53.961Z" },
345 |     { url = "https://files.pythonhosted.org/packages/71/30/3758bcf9e0b6a4193a6f51abf84254aba00887dfa8c20aba18aa366c5f57/ruff-0.14.2-py3-none-win_amd64.whl", hash = "sha256:0df3424aa5c3c08b34ed8ce099df1021e3adaca6e90229273496b839e5a7e1af", size = 13565279, upload-time = "2025-10-23T19:36:56.578Z" },
346 |     { url = "https://files.pythonhosted.org/packages/2e/5d/aa883766f8ef9ffbe6aa24f7192fb71632f31a30e77eb39aa2b0dc4290ac/ruff-0.14.2-py3-none-win_arm64.whl", hash = "sha256:ea9d635e83ba21569fbacda7e78afbfeb94911c9434aff06192d9bc23fd5495a", size = 12554956, upload-time = "2025-10-23T19:36:58.714Z" },
347 | ]
348 | 
349 | [[package]]
350 | name = "shellingham"
351 | version = "1.5.4"
352 | source = { registry = "https://pypi.org/simple" }
353 | sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
354 | wheels = [
355 |     { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
356 | ]
357 | 
358 | [[package]]
359 | name = "tomli"
360 | version = "2.3.0"
361 | source = { registry = "https://pypi.org/simple" }
362 | sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" }
363 | wheels = [
364 |     { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" },
365 |     { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" },
366 |     { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" },
367 |     { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" },
368 |     { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" },
369 |     { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" },
370 |     { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" },
371 |     { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" },
372 |     { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" },
373 |     { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" },
374 |     { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" },
375 |     { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" },
376 |     { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" },
377 |     { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" },
378 |     { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" },
379 |     { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" },
380 |     { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819, upload-time = "2025-10-08T22:01:17.964Z" },
381 |     { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766, upload-time = "2025-10-08T22:01:18.959Z" },
382 |     { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771, upload-time = "2025-10-08T22:01:20.106Z" },
383 |     { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586, upload-time = "2025-10-08T22:01:21.164Z" },
384 |     { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792, upload-time = "2025-10-08T22:01:22.417Z" },
385 |     { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909, upload-time = "2025-10-08T22:01:23.859Z" },
386 |     { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946, upload-time = "2025-10-08T22:01:24.893Z" },
387 |     { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705, upload-time = "2025-10-08T22:01:26.153Z" },
388 |     { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244, upload-time = "2025-10-08T22:01:27.06Z" },
389 |     { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637, upload-time = "2025-10-08T22:01:28.059Z" },
390 |     { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925, upload-time = "2025-10-08T22:01:29.066Z" },
391 |     { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045, upload-time = "2025-10-08T22:01:31.98Z" },
392 |     { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835, upload-time = "2025-10-08T22:01:32.989Z" },
393 |     { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109, upload-time = "2025-10-08T22:01:34.052Z" },
394 |     { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930, upload-time = "2025-10-08T22:01:35.082Z" },
395 |     { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964, upload-time = "2025-10-08T22:01:36.057Z" },
396 |     { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065, upload-time = "2025-10-08T22:01:37.27Z" },
397 |     { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088, upload-time = "2025-10-08T22:01:38.235Z" },
398 |     { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193, upload-time = "2025-10-08T22:01:39.712Z" },
399 |     { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488, upload-time = "2025-10-08T22:01:40.773Z" },
400 |     { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669, upload-time = "2025-10-08T22:01:41.824Z" },
401 |     { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709, upload-time = "2025-10-08T22:01:43.177Z" },
402 |     { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563, upload-time = "2025-10-08T22:01:44.233Z" },
403 |     { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756, upload-time = "2025-10-08T22:01:45.234Z" },
404 |     { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
405 | ]
406 | 
407 | [[package]]
408 | name = "tomlkit"
409 | version = "0.13.3"
410 | source = { registry = "https://pypi.org/simple" }
411 | sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" }
412 | wheels = [
413 |     { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
414 | ]
415 | 
416 | [[package]]
417 | name = "typer"
418 | version = "0.20.0"
419 | source = { registry = "https://pypi.org/simple" }
420 | dependencies = [
421 |     { name = "click" },
422 |     { name = "rich" },
423 |     { name = "shellingham" },
424 |     { name = "typing-extensions" },
425 | ]
426 | sdist = { url = "https://files.pythonhosted.org/packages/8f/28/7c85c8032b91dbe79725b6f17d2fffc595dff06a35c7a30a37bef73a1ab4/typer-0.20.0.tar.gz", hash = "sha256:1aaf6494031793e4876fb0bacfa6a912b551cf43c1e63c800df8b1a866720c37", size = 106492, upload-time = "2025-10-20T17:03:49.445Z" }
427 | wheels = [
428 |     { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" },
429 | ]
430 | 
431 | [[package]]
432 | name = "typing-extensions"
433 | version = "4.15.0"
434 | source = { registry = "https://pypi.org/simple" }
435 | sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
436 | wheels = [
437 |     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
438 | ]
439 | 
440 | [[package]]
441 | name = "ua-datasets"
442 | version = "1.0.0"
443 | source = { virtual = "." }
444 | 
445 | [package.dev-dependencies]
446 | dev = [
447 |     { name = "mypy" },
448 |     { name = "pre-commit" },
449 |     { name = "pytest" },
450 |     { name = "ruff" },
451 |     { name = "uv-sort" },
452 | ]
453 | 
454 | [package.metadata]
455 | 
456 | [package.metadata.requires-dev]
457 | dev = [
458 |     { name = "mypy", specifier = ">=1.18.2" },
459 |     { name = "pre-commit", specifier = ">=2.21.0" },
460 |     { name = "pytest", specifier = ">=7.4.4" },
461 |     { name = "ruff", specifier = ">=0.14.2" },
462 |     { name = "uv-sort", specifier = ">=0.6.1" },
463 | ]
464 | 
465 | [[package]]
466 | name = "uv-sort"
467 | version = "0.6.1"
468 | source = { registry = "https://pypi.org/simple" }
469 | dependencies = [
470 |     { name = "packaging" },
471 |     { name = "tomlkit" },
472 |     { name = "typer" },
473 | ]
474 | sdist = { url = "https://files.pythonhosted.org/packages/b3/70/df2501f7821f629c1c0e7dc90076f48ed4a364dc3225201f187a5ecf1608/uv_sort-0.6.1.tar.gz", hash = "sha256:a2f3828aedb60a54a17960ec3c1031e6cf8b711e6321016a6f50e6d30a442865", size = 23536, upload-time = "2025-07-05T00:54:56.213Z" }
475 | wheels = [
476 |     { url = "https://files.pythonhosted.org/packages/70/a0/3ee31db18de67d3ecfaa2fd58238a3a6837c2199419ee1f35cd668b94b3c/uv_sort-0.6.1-py3-none-any.whl", hash = "sha256:3b2df63e74cab5d8a581c12c4629ad297ea56960fb5d5433dcf8eb0aca2e80b9", size = 6409, upload-time = "2025-07-05T00:54:54.905Z" },
477 | ]
478 | 
479 | [[package]]
480 | name = "virtualenv"
481 | version = "20.35.3"
482 | source = { registry = "https://pypi.org/simple" }
483 | dependencies = [
484 |     { name = "distlib" },
485 |     { name = "filelock" },
486 |     { name = "platformdirs" },
487 |     { name = "typing-extensions", marker = "python_full_version < '3.11'" },
488 | ]
489 | sdist = { url = "https://files.pythonhosted.org/packages/a4/d5/b0ccd381d55c8f45d46f77df6ae59fbc23d19e901e2d523395598e5f4c93/virtualenv-20.35.3.tar.gz", hash = "sha256:4f1a845d131133bdff10590489610c98c168ff99dc75d6c96853801f7f67af44", size = 6002907, upload-time = "2025-10-10T21:23:33.178Z" }
490 | wheels = [
491 |     { url = "https://files.pythonhosted.org/packages/27/73/d9a94da0e9d470a543c1b9d3ccbceb0f59455983088e727b8a1824ed90fb/virtualenv-20.35.3-py3-none-any.whl", hash = "sha256:63d106565078d8c8d0b206d48080f938a8b25361e19432d2c9db40d2899c810a", size = 5981061, upload-time = "2025-10-10T21:23:30.433Z" },
492 | ]
493 | 


--------------------------------------------------------------------------------