├── .bumpversion.cfg
├── .dockerignore
├── .editorconfig
├── .github
    └── workflows
    │   ├── docs.yml
    │   ├── python-package.yml
    │   └── python-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode
    └── settings.json
├── Dockerfile.dev
├── LICENSE
├── README.md
├── codecov.yml
├── docker-compose.yml
├── docker
    ├── Dockerfile
    └── entrypoint.py
├── docs
    ├── advanced_usage.md
    ├── api-reference.md
    ├── backends.md
    ├── changelog.md
    ├── contributing.md
    ├── index.md
    ├── installation.md
    └── usage.md
├── examples
    ├── README.md
    ├── basic.py
    ├── benchmark.py
    ├── data
    │   └── usernames.txt
    ├── delay_queue.py
    ├── dynamic_tasks.py
    ├── fixtures_and_middlewares.py
    ├── healthcheck.py
    ├── heartbeat.py
    ├── persistence.py
    └── prometheus_metrics.py
├── mkdocs.yml
├── pyproject.toml
├── src
    └── pyncette
    │   ├── __init__.py
    │   ├── dynamodb.py
    │   ├── errors.py
    │   ├── executor.py
    │   ├── healthcheck.py
    │   ├── model.py
    │   ├── mysql.py
    │   ├── postgres.py
    │   ├── prometheus.py
    │   ├── py.typed
    │   ├── pyncette.py
    │   ├── redis
    │       ├── __init__.py
    │       ├── manage.lua
    │       └── poll_dynamic.lua
    │   ├── repository.py
    │   ├── sqlite.py
    │   ├── task.py
    │   └── utils.py
└── tests
    ├── conftest.py
    ├── test_dynamodb.py
    ├── test_mysql.py
    ├── test_postgres.py
    ├── test_pyncette.py
    ├── test_pyncette_healthcheck.py
    ├── test_pyncette_integration.py
    ├── test_pyncette_process.py
    ├── test_pyncette_prometheus.py
    ├── test_redis.py
    ├── test_sqlite.py
    └── utils
        ├── fakerepository.py
        └── timemachine.py


/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 1.0.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:pyproject.toml]
 7 | search = version = "{current_version}"
 8 | replace = version = "{new_version}"
 9 | 
10 | [bumpversion:file:README.md]
11 | search = v{current_version}.
12 | replace = v{new_version}.
13 | 
14 | [bumpversion:file:src/pyncette/__init__.py]
15 | search = __version__ = "{current_version}"
16 | replace = __version__ = "{new_version}"
17 | 
18 | [bumpversion:file:docs/changelog.md]
19 | search = ## Unreleased
20 | replace = ## {new_version} ({now:%Y-%m-%d})
21 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | **/*.egg-info
 3 | **/__pycache__
 4 | **/.mypy_cache
 5 | *.pyc
 6 | .coverage
 7 | htmlcov/
 8 | **/_build/
 9 | .venv/
10 | .pytest_cache/
11 | site/
12 | build/
13 | dist/
14 | .vscode/
15 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # see https://editorconfig.org/
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | charset = utf-8
11 | 
12 | [*.{bat,cmd,ps1}]
13 | end_of_line = crlf
14 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   workflow_dispatch:
 9 | 
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | 
15 | concurrency:
16 |   group: "pages"
17 |   cancel-in-progress: false
18 | 
19 | jobs:
20 |   build:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |     - uses: actions/checkout@v4
24 |     - name: Install uv
25 |       uses: astral-sh/setup-uv@v5
26 |       with:
27 |         version: "latest"
28 |     - name: Set up Python
29 |       uses: actions/setup-python@v5
30 |       with:
31 |         python-version: "3.12"
32 |     - name: Install dependencies
33 |       run: uv sync --extra all --extra dev
34 |     - name: Build docs
35 |       run: uv run mkdocs build --strict
36 |     - name: Upload artifact
37 |       if: github.event_name != 'pull_request'
38 |       uses: actions/upload-pages-artifact@v3
39 |       with:
40 |         path: site/
41 | 
42 |   deploy:
43 |     if: github.event_name != 'pull_request'
44 |     environment:
45 |       name: github-pages
46 |       url: ${{ steps.deployment.outputs.page_url }}
47 |     runs-on: ubuntu-latest
48 |     needs: build
49 |     steps:
50 |     - name: Deploy to GitHub Pages
51 |       id: deployment
52 |       uses: actions/deploy-pages@v4
53 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
  1 | name: Python package
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ master ]
  6 |   pull_request:
  7 |     branches: [ master ]
  8 | 
  9 | jobs:
 10 |   lint:
 11 |     runs-on: ubuntu-latest
 12 |     steps:
 13 |     - uses: actions/checkout@v4
 14 |     - name: Install uv
 15 |       uses: astral-sh/setup-uv@v5
 16 |       with:
 17 |         version: "latest"
 18 |     - name: Set up Python
 19 |       uses: actions/setup-python@v5
 20 |       with:
 21 |         python-version: "3.12"
 22 |     - name: Install dependencies
 23 |       run: uv sync --extra all --extra dev
 24 |     - name: Run pre-commit
 25 |       run: uv run pre-commit run --all-files --show-diff-on-failure
 26 |     - name: Run ty
 27 |       run: uv run ty check src examples
 28 | 
 29 |   test:
 30 |     runs-on: ubuntu-latest
 31 |     strategy:
 32 |       matrix:
 33 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
 34 |     steps:
 35 |     - uses: actions/checkout@v4
 36 |     - name: Install uv
 37 |       uses: astral-sh/setup-uv@v5
 38 |       with:
 39 |         version: "latest"
 40 |     - name: Set up Python ${{ matrix.python-version }}
 41 |       uses: actions/setup-python@v5
 42 |       with:
 43 |         python-version: ${{ matrix.python-version }}
 44 |     - name: Install dependencies
 45 |       run: uv sync --extra all --extra dev
 46 |     - name: Run unit tests
 47 |       run: uv run pytest --cov --cov-report=term-missing --cov-report=xml -vv -m "not integration" --backend=default tests
 48 |     - name: Upload coverage
 49 |       uses: codecov/codecov-action@v5
 50 |       with:
 51 |         token: ${{ secrets.CODECOV_TOKEN }}
 52 |         files: ./coverage.xml
 53 |         flags: py${{ matrix.python-version }}
 54 | 
 55 |   integration:
 56 |     runs-on: ubuntu-latest
 57 |     timeout-minutes: 20
 58 |     services:
 59 |       postgres:
 60 |         image: postgres
 61 |         env:
 62 |           POSTGRES_PASSWORD: postgres
 63 |           POSTGRES_DB: pyncette
 64 |         options: >-
 65 |           --health-cmd pg_isready
 66 |           --health-interval 10s
 67 |           --health-timeout 5s
 68 |           --health-retries 5
 69 |         ports:
 70 |           - 5432:5432
 71 |       redis:
 72 |         image: redis
 73 |         options: >-
 74 |           --health-cmd "redis-cli ping"
 75 |           --health-interval 10s
 76 |           --health-timeout 5s
 77 |           --health-retries 5
 78 |         ports:
 79 |           - 6379:6379
 80 |       localstack:
 81 |         image: localstack/localstack
 82 |         env:
 83 |           SERVICES: dynamodb
 84 |         ports:
 85 |           - 4566:4566
 86 |         options: >-
 87 |           --health-cmd "curl -fso /dev/null http://localhost:4566/_localstack/health"
 88 |           --health-interval 10s
 89 |           --health-timeout 5s
 90 |           --health-retries 5
 91 |       mysql:
 92 |         image: mysql
 93 |         env:
 94 |           MYSQL_ALLOW_EMPTY_PASSWORD: "1"
 95 |           MYSQL_DATABASE: pyncette
 96 |           MYSQL_USER: pyncette
 97 |           MYSQL_PASSWORD: password
 98 |         options: >-
 99 |           --health-cmd "mysqladmin ping --silent"
100 |           --health-interval 10s
101 |           --health-timeout 5s
102 |           --health-retries 5
103 |         ports:
104 |           - 3306:3306
105 | 
106 |     steps:
107 |     - uses: actions/checkout@v4
108 |     - name: Install uv
109 |       uses: astral-sh/setup-uv@v5
110 |       with:
111 |         version: "latest"
112 |     - name: Set up Python
113 |       uses: actions/setup-python@v5
114 |       with:
115 |         python-version: "3.12"
116 |     - name: Install dependencies
117 |       run: uv sync --extra all --extra dev
118 |     - name: Run integration tests
119 |       env:
120 |         POSTGRES_URL: postgres://postgres:postgres@localhost/postgres
121 |         AWS_ACCESS_KEY_ID: "foobar"
122 |         AWS_SECRET_ACCESS_KEY: "foobar"
123 |       run: uv run pytest --cov --cov-report=term-missing --cov-report=xml -vv tests
124 |     - name: Upload coverage
125 |       uses: codecov/codecov-action@v5
126 |       with:
127 |         token: ${{ secrets.CODECOV_TOKEN }}
128 |         files: ./coverage.xml
129 |         flags: integration
130 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using uv and Twine when a release is created
 2 | 
 3 | name: Upload Python Package
 4 | 
 5 | on:
 6 |   release:
 7 |     types: [created]
 8 | 
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |     - name: Install uv
15 |       uses: astral-sh/setup-uv@v5
16 |       with:
17 |         version: "latest"
18 |     - name: Set up Python
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: '3.12'
22 |     - name: Build package
23 |       run: uv build
24 |     - name: Publish to PyPI
25 |       env:
26 |         UV_PUBLISH_TOKEN: ${{ secrets.PYPI_UPLOAD_TOKEN }}
27 |       run: uv publish
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | __pycache__
 3 | 
 4 | # C extensions
 5 | *.so
 6 | 
 7 | # Packages
 8 | *.egg
 9 | *.egg-info
10 | dist
11 | build
12 | eggs
13 | .eggs
14 | parts
15 | bin
16 | var
17 | sdist
18 | wheelhouse
19 | develop-eggs
20 | .installed.cfg
21 | lib
22 | lib64
23 | venv*/
24 | pyvenv*/
25 | pip-wheel-metadata/
26 | .venv/
27 | 
28 | # Installer logs
29 | pip-log.txt
30 | 
31 | # Unit test / coverage reports
32 | .coverage
33 | .coverage.*
34 | .pytest_cache/
35 | nosetests.xml
36 | coverage.xml
37 | htmlcov
38 | 
39 | # Translations
40 | *.mo
41 | 
42 | # Mr Developer
43 | .mr.developer.cfg
44 | .project
45 | .pydevproject
46 | .idea
47 | *.iml
48 | *.komodoproject
49 | 
50 | # Complexity
51 | output/*.html
52 | output/*/index.html
53 | 
54 | # Documentation builds
55 | docs/_build
56 | site/
57 | 
58 | .DS_Store
59 | *~
60 | .*.sw[po]
61 | .build
62 | .ve
63 | .env
64 | .cache
65 | .pytest
66 | .benchmarks
67 | .bootstrap
68 | .appveyor.token
69 | *.bak
70 | 
71 | # Mypy Cache
72 | .mypy_cache/
73 | 
74 | ##  VSCode
75 | .vscode/tags
76 | 
77 | ## Pyncette
78 | pyncette.db
79 | 
80 | uv.lock
81 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # To install the git pre-commit hooks run:
 2 | #   pre-commit install --install-hooks
 3 | # To update the versions:
 4 | #   pre-commit autoupdate
 5 | exclude: '^\.bumpversion\.cfg(/|$)'
 6 | repos:
 7 |   - repo: https://github.com/asottile/pyupgrade
 8 |     rev: v3.21.0
 9 |     hooks:
10 |       - id: pyupgrade
11 |         args: [--py39-plus]
12 | 
13 |   # Ruff - Fast Python linter and formatter (replaces black, isort, pyupgrade, and many flake8 plugins)
14 |   - repo: https://github.com/astral-sh/ruff-pre-commit
15 |     rev: v0.14.0
16 |     hooks:
17 |       # Run the linter
18 |       - id: ruff
19 |         args: [--fix]
20 |       # Run the formatter
21 |       - id: ruff-format
22 | 
23 |   # Pre-commit hooks for file quality
24 |   - repo: https://github.com/pre-commit/pre-commit-hooks
25 |     rev: v6.0.0
26 |     hooks:
27 |       - id: trailing-whitespace
28 |         exclude_types: [svg]
29 |       - id: end-of-file-fixer
30 |         exclude_types: [svg]
31 |       - id: check-yaml
32 |       - id: check-toml
33 |       - id: check-json
34 |       - id: check-added-large-files
35 |         args: [--maxkb=1000]
36 |       - id: check-merge-conflict
37 |       - id: debug-statements
38 |       - id: mixed-line-ending
39 |         args: [--fix=lf]
40 | 
41 |   # Markdown formatting
42 |   - repo: https://github.com/executablebooks/mdformat
43 |     rev: 0.7.21
44 |     hooks:
45 |       - id: mdformat
46 |         additional_dependencies:
47 |           - mdformat-gfm # GitHub Flavored Markdown
48 |           - mdformat-black # Format Python code blocks
49 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "black",
3 |     "restructuredtext.confPath": "${workspaceFolder}/docs",
4 |     "python.pythonPath": "/usr/bin/python3"
5 | }
6 | 


--------------------------------------------------------------------------------
/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | ENV DEBIAN_FRONTEND=noninteractive
 3 | 
 4 | RUN apt-get update -y \
 5 |     && apt-get install -y software-properties-common
 6 | RUN add-apt-repository ppa:deadsnakes/ppa
 7 | RUN apt-get update -y \
 8 |     && apt-get install -y \
 9 |         python3.9 \
10 |         python3.9-distutils \
11 |         python3.10 \
12 |         python3.10-distutils \
13 |         python3.11 \
14 |         python3.11-distutils \
15 |         python3.12 \
16 |         python3.13 \
17 |         python3-pip \
18 |         python3-apt \
19 |         redis-tools \
20 |         postgresql-client \
21 |         mysql-client \
22 |         git \
23 |         curl \
24 |         unzip \
25 |         groff \
26 |     && rm -rf /var/lib/apt/lists/*
27 | 
28 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
29 |     && unzip awscliv2.zip \
30 |     && ./aws/install
31 | 
32 | # Install uv for package management
33 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh
34 | ENV PATH="/root/.local/bin:$PATH"
35 | 
36 | RUN python3 -m pip install awscli-local[ver2]
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019, Tibor Djurica Potpara
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Overview
  2 | 
  3 | [![Documentation](https://img.shields.io/badge/docs-github%20pages-blue)](https://tibordp.github.io/pyncette/)
  4 | [![Github Actions Build Status](https://github.com/tibordp/pyncette/workflows/Python%20package/badge.svg?branch=master)](https://github.com/tibordp/pyncette/actions?query=branch%3Amaster+workflow%3A%22Python+package%22)
  5 | [![Coverage Status](https://codecov.io/gh/tibordp/pyncette/branch/master/graphs/badge.svg?branch=master)](https://codecov.io/github/tibordp/pyncette)
  6 | [![PyPI Package latest release](https://img.shields.io/pypi/v/pyncette.svg)](https://pypi.org/project/pyncette)
  7 | [![PyPI Wheel](https://img.shields.io/pypi/wheel/pyncette.svg)](https://pypi.org/project/pyncette)
  8 | [![Supported versions](https://img.shields.io/pypi/pyversions/pyncette.svg)](https://pypi.org/project/pyncette)
  9 | [![Supported implementations](https://img.shields.io/pypi/implementation/pyncette.svg)](https://pypi.org/project/pyncette)
 10 | [![Commits since latest release](https://img.shields.io/github/commits-since/tibordp/pyncette/v1.0.0.svg)](https://github.com/tibordp/pyncette/compare/v1.0.0...master)
 11 | 
 12 | A reliable distributed scheduler with pluggable storage backends for Async Python.
 13 | 
 14 | - Free software: MIT license
 15 | 
 16 | ## Installation
 17 | 
 18 | Minimal installation (just SQLite persistence):
 19 | 
 20 | ```bash
 21 | pip install pyncette
 22 | ```
 23 | 
 24 | Full installation (all the backends and Prometheus metrics exporter):
 25 | 
 26 | ```bash
 27 | pip install pyncette[all]
 28 | ```
 29 | 
 30 | You can also install the in-development version with:
 31 | 
 32 | ```bash
 33 | pip install https://github.com/tibordp/pyncette/archive/master.zip
 34 | ```
 35 | 
 36 | ## Documentation
 37 | 
 38 | https://tibordp.github.io/pyncette/
 39 | 
 40 | ## Usage example
 41 | 
 42 | Simple in-memory scheduler (does not persist state)
 43 | 
 44 | ```python
 45 | from pyncette import Pyncette, Context
 46 | 
 47 | app = Pyncette()
 48 | 
 49 | 
 50 | @app.task(schedule="* * * * *")
 51 | async def foo(context: Context):
 52 |     print("This will run every minute")
 53 | 
 54 | 
 55 | if __name__ == "__main__":
 56 |     app.main()
 57 | ```
 58 | 
 59 | Persistent distributed cron using Redis (coordinates execution with parallel instances and survives restarts)
 60 | 
 61 | ```python
 62 | from pyncette import Pyncette, Context
 63 | from pyncette.redis import redis_repository
 64 | 
 65 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost")
 66 | 
 67 | 
 68 | @app.task(schedule="* * * * * */10")
 69 | async def foo(context: Context):
 70 |     print("This will run every 10 seconds")
 71 | 
 72 | 
 73 | if __name__ == "__main__":
 74 |     app.main()
 75 | ```
 76 | 
 77 | See the `examples` directory for more examples of usage.
 78 | 
 79 | ## Use cases
 80 | 
 81 | Pyncette is designed for reliable (at-least-once or at-most-once) execution of recurring tasks (think cronjobs) whose
 82 | lifecycles are managed dynamically, but can work effectively for non-reccuring tasks too.
 83 | 
 84 | Example use cases:
 85 | 
 86 | - You want to perform a database backup every day at noon
 87 | - You want a report to be generated daily for your 10M users at the time of their choosing
 88 | - You want currency conversion rates to be refreshed every 10 seconds
 89 | - You want to allow your users to schedule non-recurring emails to be sent at an arbitrary time in the future
 90 | 
 91 | Pyncette might not be a good fit if:
 92 | 
 93 | - You want your tasks to be scheduled to run (ideally) once as soon as possible. It is doable, but you will be better served by a general purpose reliable queue like RabbitMQ or Amazon SQS.
 94 | - You need tasks to execute at sub-second intervals with low jitter. Pyncette coordinates execution on a per task-instance basis and this corrdination can add overhead and jitter.
 95 | 
 96 | ## Supported backends
 97 | 
 98 | Pyncette comes with an implementation for the following backends (used for persistence and coordination) out-of-the-box:
 99 | 
100 | - SQLite (included)
101 | - Redis (`pip install pyncette[redis]`)
102 | - PostgreSQL (`pip install pyncette[postgres]`)
103 | - MySQL 8.0+ (`pip install pyncette[mysql]`)
104 | - Amazon DynamoDB (`pip install pyncette[dynamodb]`)
105 | 
106 | Pyncette imposes few requirements on the underlying datastores, so it can be extended to support other databases or
107 | custom storage formats / integrations with existing systems. For best results, the backend needs to provide:
108 | 
109 | - Some sort of serialization mechanism, e.g. traditional transactions, atomic stored procedures or compare-and-swap
110 | - Efficient range queries over a secondary index, which can be eventually consistent
111 | 
112 | ## Development
113 | 
114 | ### Prerequisites
115 | 
116 | Install [uv](https://docs.astral.sh/uv/) for fast package management:
117 | 
118 | ```bash
119 | curl -LsSf https://astral.sh/uv/install.sh | sh
120 | ```
121 | 
122 | ### Setup Development Environment
123 | 
124 | Sync dependencies and install the package in editable mode:
125 | 
126 | ```bash
127 | uv sync --extra all --extra dev
128 | ```
129 | 
130 | ### Running Tests
131 | 
132 | **Unit tests** (fast, no external dependencies):
133 | 
134 | ```bash
135 | uv run pytest -m "not integration" tests
136 | ```
137 | 
138 | **Integration tests** (requires Redis, PostgreSQL, MySQL, DynamoDB):
139 | 
140 | Using Docker Compose to set up all backends:
141 | 
142 | ```bash
143 | docker-compose up -d
144 | docker-compose run --rm shell
145 | uv run pytest tests
146 | ```
147 | 
148 | Or manually with services running locally:
149 | 
150 | ```bash
151 | uv run pytest tests
152 | ```
153 | 
154 | **Test on specific Python version**:
155 | 
156 | ```bash
157 | uv venv --python 3.11
158 | uv sync --extra all --extra dev
159 | uv run pytest tests
160 | ```
161 | 
162 | ### Code Quality
163 | 
164 | Run linting and type checking:
165 | 
166 | ```bash
167 | uv run pre-commit run --all-files
168 | uv run ty check src examples
169 | ```
170 | 
171 | ### Building Documentation
172 | 
173 | ```bash
174 | uv run mkdocs build
175 | # Or serve locally with live reload
176 | uv run mkdocs serve
177 | ```
178 | 
179 | ### Building the Package
180 | 
181 | ```bash
182 | uv build
183 | ```
184 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   status:
 3 |     patch:
 4 |       default:
 5 |         target: auto
 6 |         threshold: 10%
 7 |     project:
 8 |       default:
 9 |         target: auto
10 |         threshold: 5%
11 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   postgres:
 3 |     image: postgres
 4 |     restart: always
 5 |     environment:
 6 |       POSTGRES_PASSWORD: postgres
 7 |       POSTGRES_DB: pyncette
 8 |     ports:
 9 |       - "5432:5432"
10 | 
11 |   redis:
12 |     image: redis
13 |     restart: always
14 |     ports:
15 |       - "6379:6379"
16 | 
17 |   localstack:
18 |     image: localstack/localstack
19 |     ports:
20 |       - "4566:4566"
21 |     environment:
22 |       - SERVICES=dynamodb
23 | 
24 |   mysql:
25 |     image: mysql
26 |     ports:
27 |       - "3306:3306"
28 |     environment:
29 |       MYSQL_ALLOW_EMPTY_PASSWORD: "1"
30 |       MYSQL_DATABASE: pyncette
31 |       MYSQL_USER: pyncette
32 |       MYSQL_PASSWORD: password
33 | 
34 |   shell:
35 |     build:
36 |       context: .
37 |       dockerfile: Dockerfile.dev
38 |     command: bash
39 |     working_dir: /src
40 |     environment:
41 |       POSTGRES_URL: "postgres://postgres:postgres@postgres/pyncette"
42 |       REDIS_URL: "redis://redis"
43 |       DYNAMODB_ENDPOINT: "http://localstack:4566"
44 |       LOCALSTACK_HOST: "localstack"
45 |       AWS_ACCESS_KEY_ID: "foobar"
46 |       AWS_SECRET_ACCESS_KEY: "foobar"
47 |       MYSQL_HOST: "mysql"
48 |     volumes:
49 |       - type: bind
50 |         source: .
51 |         target: /src
52 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | RUN apt-get update && apt-get install -y \
 4 |     dumb-init \
 5 |  && rm -rf /var/lib/apt/lists/*
 6 | 
 7 | ADD . /opt/pyncette
 8 | RUN pip install /opt/pyncette[all] && rm -rf /opt/
 9 | 
10 | WORKDIR /pyncette
11 | ADD ./docker/entrypoint.py entrypoint.py
12 | 
13 | EXPOSE 9699/tcp
14 | ENV USE_UVLOOP=1
15 | 
16 | ENTRYPOINT ["/usr/bin/dumb-init", "--"]
17 | CMD ["python", "entrypoint.py"]
18 | 


--------------------------------------------------------------------------------
/docker/entrypoint.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from prometheus_client import start_http_server
 4 | 
 5 | from pyncette import Context
 6 | from pyncette import Pyncette
 7 | from pyncette.healthcheck import use_healthcheck_server
 8 | from pyncette.prometheus import use_prometheus
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | app = Pyncette(sqlite_database="pyncette.db")
13 | use_healthcheck_server(app)
14 | use_prometheus(app)
15 | 
16 | 
17 | @app.task(schedule="* * * * * */2")
18 | async def hello_world(context: Context):
19 |     logger.info("Hello, world!")
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     start_http_server(port=9699, addr="0.0.0.0")  # noqa: S104
24 |     app.main()
25 | 


--------------------------------------------------------------------------------
/docs/advanced_usage.md:
--------------------------------------------------------------------------------
 1 | # Advanced usage
 2 | 
 3 | ## Partitioned dynamic tasks
 4 | 
 5 | Certain backends, like Redis and Amazon DynamoDB have a natural partitioning to them. Generally, when using
 6 | dynamic tasks, the task name is used as a partition key. For example, in DynamoDB, each dynamic task instance
 7 | is associated with one row/document, but they all share the same partition id.
 8 | 
 9 | Similarly for Redis, each task instance record is stored in its own key, but the index that sets them in order of
10 | next execution is stored in a single key, so a single large task will not benefit from a clustered Redis setup.
11 | 
12 | If there is a very large number of dynamic task instances associated with a single task or they are polled
13 | very frequently, this can lead to hot partitions and degraded performance. There can also be limits as to how many
14 | task instances can even be stored in a single partition. For DynamoDB, the limit is 10GB.
15 | 
16 | Pyncette supports transparent partitioning of tasks through `partitioned_task` decorator.
17 | 
18 | ```python
19 | from pyncette import Pyncette, Context
20 | 
21 | app = Pyncette()
22 | 
23 | 
24 | @app.partitioned_task(partition_count=32)
25 | async def hello(context: Context) -> None:
26 |     print(f"Hello {context.args['username']}")
27 | 
28 | 
29 | async with app.create() as app_context:
30 |     await asyncio.gather(
31 |         app_context.schedule_task(
32 |             hello, "bill_task", schedule="0 * * * *", username="bill"
33 |         ),
34 |         app_context.schedule_task(
35 |             hello, "steve_task", schedule="20 * * * *", username="steve"
36 |         ),
37 |         app_context.schedule_task(
38 |             hello, "john_task", schedule="40 * * * *", username="john"
39 |         ),
40 |     )
41 |     await app_context.run()
42 | ```
43 | 
44 | This splits the dynamic task into 32 partitions and the task instances are automatically assigned to them based on the hash of the task instance name.
45 | 
46 | The default partition selector uses SHA1 hash of the instance name, but a custom selector can be provided:
47 | 
48 | ```python
49 | def custom_partition_selector(partition_count: int, task_id: str) -> int:
50 |     return (
51 |         hash(task_id) % partition_count
52 |     )  # Do not use this, as the hash() is not stable
53 | 
54 | 
55 | @app.partitioned_task(partition_count=32, partition_selector=custom_partition_selector)
56 | async def hello(context: Context) -> None:
57 |     print(f"Hello {context.args['username']}")
58 | ```
59 | 
60 | ### Choosing the partition count
61 | 
62 | Care must be taken when selecting a pertition count, as it is not easy to change it later after tasks have already been
63 | scheduled. Changing a partition count will generally map task instances to a different partition, making them not run and also
64 | making it impossible to unschedule them through `unschedule_task`.
65 | 
66 | There is also a tradeoff as the time complexity as a single Pyncette poll grows linearly with the total number of tasks (or their
67 | partitions). Setting the number of partitions too high can lead to diminished performance due to the polling overhead.
68 | 
69 | It is possible to configure Pyncette to only poll certain partitions using the `enabled_partitions` parameter. This will allow the
70 | tasks to be scheduled and unscheduled by any application instance, but only the partitions selected will be polled. You may use
71 | this if you have a large number of instances for a given task in order to spread the load evenly among them.
72 | 
73 | ```python
74 | @app.partitioned_task(
75 |     partition_count=8,
76 |     # Partitions 4, 5, 6 and 7 will not be polled
77 |     enabled_partitions=[0, 1, 2, 3],
78 | )
79 | async def hello(context: Context) -> None:
80 |     print(f"Hello {context.args['username']}")
81 | ```
82 | 


--------------------------------------------------------------------------------
/docs/api-reference.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 | 
3 | This page is automatically generated from the Python source code docstrings.
4 | 
5 | ::: pyncette
6 | options:
7 | show_submodules: true
8 | 


--------------------------------------------------------------------------------
/docs/backends.md:
--------------------------------------------------------------------------------
  1 | # Backends
  2 | 
  3 | By default Pyncette runs without persistence. This means that the schedule is maintained in-memory and there is no coordination between multiple instances of the app.
  4 | 
  5 | Enabling persistence allows the aplication to recover from restarts as well as the ability to run multiple instances of an app concurrently without duplicate executions of tasks.
  6 | 
  7 | ## SQLite
  8 | 
  9 | SQLite is the default peristence engine and is included in the base Python package.
 10 | 
 11 | ```python
 12 | from pyncette import Pyncette, Context
 13 | 
 14 | app = Pyncette(sqlite_database="pyncette.db")
 15 | 
 16 | 
 17 | @app.task(schedule="* * * * * */10")
 18 | async def foo(context: Context):
 19 |     print("This will run every 10 seconds")
 20 | 
 21 | 
 22 | if __name__ == "__main__":
 23 |     app.main()
 24 | ```
 25 | 
 26 | ## Redis
 27 | 
 28 | Redis can be enabled by passing `redis_repository` as `repository_factory` parameter to the `Pyncette` constructor.
 29 | 
 30 | ```python
 31 | from pyncette import Pyncette, Context
 32 | from pyncette.redis import redis_repository
 33 | 
 34 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost")
 35 | ```
 36 | 
 37 | Optionally, the tasks can be namespaced if the Redis server is shared among different Pyncette apps:
 38 | 
 39 | ```python
 40 | app = Pyncette(
 41 |     repository_factory=redis_repository,
 42 |     redis_url="redis://localhost",
 43 |     redis_namespace="my_super_app",
 44 | )
 45 | ```
 46 | 
 47 | ## PostgreSQL
 48 | 
 49 | Redis can be enabled by passing `postgres_repository` as `repository_factory` parameter to the `Pyncette` constructor.
 50 | 
 51 | ```python
 52 | from pyncette import Pyncette, Context
 53 | from pyncette.postgres import postgres_repository
 54 | 
 55 | app = Pyncette(
 56 |     repository_factory=postgres_repository,
 57 |     postgres_url='postgres://postgres@localhost/pyncette'
 58 |     postgres_table_name='pyncette_tasks'
 59 | )
 60 | ```
 61 | 
 62 | The table will be automatically initialized on startup if it does not exists unless `postgres_skip_table_create` is set to `True`.
 63 | 
 64 | ## MySQL
 65 | 
 66 | MySQL can be configured by passing `mysql_repository` as `repository_factory` parameter to the `Pyncette` constructor.
 67 | 
 68 | The MySQL backend requires MySQL version 8.0+.
 69 | 
 70 | ```python
 71 | from pyncette import Pyncette, Context
 72 | from pyncette.postgres import mysql_repository
 73 | 
 74 | app = Pyncette(
 75 |     repository_factory=mysql_repository,
 76 |     mysql_host="localhost",
 77 |     mysql_database="pyncette",
 78 |     mysql_user="pyncette",
 79 |     mysql_password="password",
 80 |     mysql_table_name="pyncette_tasks",
 81 | )
 82 | ```
 83 | 
 84 | The table will be automatically initialized on startup if it does not exists unless `mysql_skip_table_create` is set to `True`.
 85 | 
 86 | ## Amazon DynamoDB
 87 | 
 88 | Amazon DynamoDB backend can be configured with `dynamodb_repository`.
 89 | 
 90 | ```python
 91 | from pyncette import Pyncette, Context
 92 | from pyncette.dynamodb import dynamodb_repository
 93 | 
 94 | app = Pyncette(
 95 |     repository_factory=dynamodb_repository,
 96 |     dynamodb_region_name="eu-west-1",
 97 |     dynamodb_table_name="pyncette",
 98 | )
 99 | ```
100 | 
101 | DynamoDB repository will use [ambient credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#guide-credentials), such as environment variables, `~/.aws/config` or EC2 metadata service if e.g. running on EC2 or a Kubernetes cluster with kiam/kube2iam.
102 | 
103 | For convenience, an appropriate DynamoDB table will be automatically created on startup if it does not exist. The created table uses on-demand pricing model. If you would like to customize this behavior, you can manually create the table beforehand and pass `dynamodb_skip_table_create=True` in parameters.
104 | 
105 | Expected table schema should look something like this
106 | 
107 | ```json
108 | {
109 |     "AttributeDefinitions": [
110 |         { "AttributeName": "partition_id", "AttributeType": "S" },
111 |         { "AttributeName": "ready_at", "AttributeType": "S" },
112 |         { "AttributeName": "task_id", "AttributeType": "S" }
113 |     ],
114 |     "KeySchema": [
115 |         { "AttributeName": "partition_id", "KeyType": "HASH" },
116 |         { "AttributeName": "task_id", "KeyType": "RANGE" }
117 |     ],
118 |     "LocalSecondaryIndexes": [
119 |         {
120 |             "IndexName": "ready_at",
121 |             "KeySchema": [
122 |                 { "AttributeName": "partition_id", "KeyType": "HASH" },
123 |                 { "AttributeName": "ready_at", "KeyType": "RANGE" }
124 |             ],
125 |             "Projection": {
126 |                 "ProjectionType": "ALL"
127 |             }
128 |         }
129 |     ]
130 | }
131 | ```
132 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## 1.0.0 (2025-10-17)
  4 | 
  5 | ### Breaking Changes
  6 | 
  7 | - **Dropped Python 3.8 support** - Minimum Python version is now 3.9
  8 | 
  9 | ### Packaging and Tooling Modernization
 10 | 
 11 | - Migrated from `setup.py` to modern PEP 621 `pyproject.toml` with hatchling build backend
 12 | - Replaced tox with uv for dependency management and testing across all workflows
 13 | - Updated GitHub Actions workflows to use native uv commands (`uv sync`, `uv run`, `uv build`)
 14 | - Migrated documentation from Sphinx/reStructuredText to MkDocs Material/Markdown
 15 | - Set up automatic API documentation generation with mkdocstrings
 16 | - Replaced mypy with ty for type checking
 17 | - Consolidated linting/formatting to use Ruff (replacing black, isort, pyupgrade)
 18 | - Modernized pre-commit hooks configuration
 19 | - Updated Docker development environment to install uv
 20 | 
 21 | ### Bug Fixes
 22 | 
 23 | - Fixed Python 3.14 compatibility: Converted all SQLite SQL queries to use consistent named parameter style (`:name`) instead of mixing PostgreSQL-style (`$1`), qmark (`?`), and named parameters
 24 | - Fixed latent bug in `poll_task` where lease comparison failed due to UUID vs string type mismatch
 25 | - Fixed bug in `poll_dynamic_task` where optimistic locking was not working due to incorrect parameter binding
 26 | 
 27 | ### Documentation
 28 | 
 29 | - Converted all documentation files from `.rst` to `.md` format
 30 | - Updated all development instructions to use uv commands
 31 | - Added relevant PyPI keywords for better discoverability
 32 | 
 33 | ## 0.11.0 (2024-11-25)
 34 | 
 35 | - Add support for Python 3.12 and 3.13
 36 | 
 37 | ## 0.10.1 (2023-05-09)
 38 | 
 39 | - Include missing lua files in the built wheel
 40 | 
 41 | ## 0.10.0 (2023-05-08)
 42 | 
 43 | - Drop support for Python 3.7
 44 | - Add support for Python 3.11
 45 | - Modernize Python package structure and linters
 46 | - Fix a few bugs and type annotations
 47 | 
 48 | ## 0.8.1 (2021-04-08)
 49 | 
 50 | - Improve performance for calculation of the next execution time
 51 | - Add ability for repositories to pass a pagination token
 52 | - Add `add_to_context()` to inject static data to context
 53 | - Clean up documentation and add additional examples
 54 | 
 55 | ## 0.8.0 (2021-04-05)
 56 | 
 57 | - Added Amazon DynamoDB backend
 58 | - Added MySQL backend
 59 | - Added support for partitioned dynamic tasks
 60 | 
 61 | ## 0.7.0 (2021-03-31)
 62 | 
 63 | - Added support for automatic and cooperative lease heartbeating
 64 | - PostgreSQL backend can now skip automatic table creation
 65 | - Improved signal handling
 66 | - CI: Add Codecov integration
 67 | - Devenv: Run integration tests in Docker Compose
 68 | 
 69 | ## 0.6.1 (2020-04-02)
 70 | 
 71 | - Optimize the task querying on Postgres backend
 72 | - Fix: ensure that there are no name colissions between concrete instances of different dynamic tasks
 73 | - Improve fairness of polling tasks under high contention.
 74 | 
 75 | ## 0.6.0 (2020-03-31)
 76 | 
 77 | - Added PostgreSQL backend
 78 | - Added Sqlite backend and made it the default (replacing `InMemoryRepository`)
 79 | - Refactored test suite to cover all conformance/integration tests on all backends
 80 | - Refactored Redis backend, simplifying the Lua scripts and improving exceptional case handling (e.g. tasks disappearing between query and poll)
 81 | - Main loop only sleeps for the rest of remaining `poll_interval` before next tick instead of the full amount
 82 | - General bug fixes, documentation changes, clean up
 83 | 
 84 | ## 0.5.0 (2020-03-27)
 85 | 
 86 | - Fixes bug where a locked dynamic task could be executed again on next tick.
 87 | - poll_task is now reentrant with regards to locking. If the lease passed in matches the lease on the task, it behaves as though it were unlocked.
 88 | 
 89 | ## 0.4.0 (2020-02-16)
 90 | 
 91 | - Middleware support and optional metrics via Prometheus
 92 | - Improved the graceful shutdown behavior
 93 | - Task instance and application context are now available in the task context
 94 | - Breaking change: dynamic task parameters are now accessed via `context.args['name']` instead of `context.name`
 95 | - Improved examples, documentation and packaging
 96 | 
 97 | ## 0.2.0 (2020-01-08)
 98 | 
 99 | - Timezone support
100 | - More efficient poling when Redis backend is used
101 | 
102 | ## 0.1.1 (2020-01-08)
103 | 
104 | - First release that actually works.
105 | 
106 | ## 0.0.0 (2019-12-31)
107 | 
108 | - First release on PyPI.
109 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions are welcome, and they are greatly appreciated! Every
  4 | little bit helps, and credit will always be given.
  5 | 
  6 | ## Bug reports
  7 | 
  8 | When [reporting a bug](https://github.com/tibordp/pyncette/issues) please include:
  9 | 
 10 | - Your operating system name and version.
 11 | - Any details about your local setup that might be helpful in troubleshooting.
 12 | - Detailed steps to reproduce the bug.
 13 | 
 14 | ## Documentation improvements
 15 | 
 16 | Pyncette could always use more documentation, whether as part of the
 17 | official Pyncette docs, in docstrings, or even on the web in blog posts,
 18 | articles, and such.
 19 | 
 20 | ## Feature requests and feedback
 21 | 
 22 | The best way to send feedback is to file an issue at https://github.com/tibordp/pyncette/issues.
 23 | 
 24 | If you are proposing a feature:
 25 | 
 26 | - Explain in detail how it would work.
 27 | - Keep the scope as narrow as possible, to make it easier to implement.
 28 | - Remember that this is a volunteer-driven project, and that code contributions are welcome :)
 29 | 
 30 | ## Development
 31 | 
 32 | To set up `pyncette` for local development:
 33 | 
 34 | 1. Fork [pyncette](https://github.com/tibordp/pyncette)
 35 |    (look for the "Fork" button).
 36 | 
 37 | 1. Clone your fork locally:
 38 | 
 39 |    ```bash
 40 |    git clone git@github.com:tibordp/pyncette.git
 41 |    ```
 42 | 
 43 | 1. Create a branch for local development:
 44 | 
 45 |    ```bash
 46 |    git checkout -b name-of-your-bugfix-or-feature
 47 |    ```
 48 | 
 49 |    Now you can make your changes locally.
 50 | 
 51 | 1. Set up your development environment:
 52 | 
 53 |    ```bash
 54 |    uv sync --extra all --extra dev
 55 |    ```
 56 | 
 57 | 1. Running integration tests assumes that there will be Redis, PostgreSQL, MySQL and Localstack (for DynamoDB) running on localhost. Alternatively, there is a Docker Compose environment that will set up all the backends so that integration tests can run seamlessly:
 58 | 
 59 |    ```bash
 60 |    docker-compose up -d
 61 |    docker-compose run --rm shell
 62 |    ```
 63 | 
 64 | 1. When you're done making changes, run all the checks:
 65 | 
 66 |    ```bash
 67 |    # Run linting and formatting
 68 |    uv run pre-commit run --all-files
 69 | 
 70 |    # Run type checking
 71 |    uv run ty check src examples
 72 | 
 73 |    # Run tests
 74 |    uv run pytest tests
 75 | 
 76 |    # Build documentation
 77 |    uv run mkdocs build
 78 |    ```
 79 | 
 80 | 1. Commit your changes and push your branch to GitHub:
 81 | 
 82 |    ```bash
 83 |    git add .
 84 |    git commit -m "Your detailed description of your changes."
 85 |    git push origin name-of-your-bugfix-or-feature
 86 |    ```
 87 | 
 88 | 1. Submit a pull request through the GitHub website.
 89 | 
 90 | If you run into issues setting up a local environment or testing the code locally, feel free to submit the PR anyway and GitHub Actions will test it for you.
 91 | 
 92 | ## Pull Request Guidelines
 93 | 
 94 | If you need some code review or feedback while you're developing the code just make the pull request.
 95 | 
 96 | For merging, you should:
 97 | 
 98 | 1. Update documentation when there's new API, functionality etc.
 99 | 1. Add a note to `docs/changelog.md` about the changes.
100 | 
101 | ## Tips
102 | 
103 | To run a subset of tests:
104 | 
105 | ```bash
106 | uv run pytest -k test_myfeature
107 | ```
108 | 
109 | To run tests for a specific Python version:
110 | 
111 | ```bash
112 | uv venv --python 3.11
113 | uv sync --extra all --extra dev
114 | uv run pytest tests
115 | ```
116 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Pyncette
 2 | 
 3 | A reliable distributed scheduler with pluggable storage backends for Async Python.
 4 | 
 5 | ## Overview
 6 | 
 7 | Pyncette is designed for reliable (at-least-once or at-most-once) execution of recurring tasks (think cronjobs) whose
 8 | lifecycles are managed dynamically, but can work effectively for non-reccuring tasks too.
 9 | 
10 | Example use cases:
11 | 
12 | - You want to perform a database backup every day at noon
13 | - You want a report to be generated daily for your 10M users at the time of their choosing
14 | - You want currency conversion rates to be refreshed every 10 seconds
15 | - You want to allow your users to schedule non-recurring emails to be sent at an arbitrary time in the future
16 | 
17 | Pyncette might not be a good fit if:
18 | 
19 | - You want your tasks to be scheduled to run (ideally) once as soon as possible. It is doable, but you will be better served by a general purpose reliable queue like RabbitMQ or Amazon SQS.
20 | - You need tasks to execute at sub-second intervals with low jitter. Pyncette coordinates execution on a per task-instance basis and this corrdination can add overhead and jitter.
21 | 
22 | ## Quick Start
23 | 
24 | Simple in-memory scheduler (does not persist state)
25 | 
26 | ```python
27 | from pyncette import Pyncette, Context
28 | 
29 | app = Pyncette()
30 | 
31 | 
32 | @app.task(schedule="* * * * *")
33 | async def foo(context: Context):
34 |     print("This will run every minute")
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     app.main()
39 | ```
40 | 
41 | Persistent distributed cron using Redis (coordinates execution with parallel instances and survives restarts)
42 | 
43 | ```python
44 | from pyncette import Pyncette, Context
45 | from pyncette.redis import redis_repository
46 | 
47 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost")
48 | 
49 | 
50 | @app.task(schedule="* * * * * */10")
51 | async def foo(context: Context):
52 |     print("This will run every 10 seconds")
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     app.main()
57 | ```
58 | 
59 | See the `examples` directory for more examples of usage.
60 | 
61 | ## Supported backends
62 | 
63 | Pyncette comes with an implementation for the following backends (used for persistence and coordination) out-of-the-box:
64 | 
65 | - SQLite (included)
66 | - Redis (`pip install pyncette[redis]`)
67 | - PostgreSQL (`pip install pyncette[postgres]`)
68 | - MySQL 8.0+ (`pip install pyncette[mysql]`)
69 | - Amazon DynamoDB (`pip install pyncette[dynamodb]`)
70 | 
71 | Pyncette imposes few requirements on the underlying datastores, so it can be extended to support other databases or
72 | custom storage formats / integrations with existing systems. For best results, the backend needs to provide:
73 | 
74 | - Some sort of serialization mechanism, e.g. traditional transactions, atomic stored procedures or compare-and-swap
75 | - Efficient range queries over a secondary index, which can be eventually consistent
76 | 
77 | ## Features
78 | 
79 | - **Reliable execution**: At-least-once or at-most-once execution guarantees
80 | - **Distributed coordination**: Run multiple instances without duplicate task execution
81 | - **Flexible scheduling**: Cron-like syntax or interval-based scheduling
82 | - **Dynamic tasks**: Register and unregister tasks at runtime
83 | - **Timezone support**: Schedule tasks in different timezones
84 | - **Heartbeating**: Keep long-running tasks alive with cooperative or automatic heartbeating
85 | - **Middleware support**: Add custom logic around task execution
86 | - **Pluggable backends**: SQLite, Redis, PostgreSQL, MySQL, and DynamoDB support
87 | 
88 | ## License
89 | 
90 | Free software: MIT license
91 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | At the command line:
 4 | 
 5 | ```bash
 6 | pip install pyncette
 7 | ```
 8 | 
 9 | For installing with Redis peristence:
10 | 
11 | ```bash
12 | pip install pyncette[redis]
13 | ```
14 | 
15 | For installing with MySQL peristence:
16 | 
17 | ```bash
18 | pip install pyncette[mysql]
19 | ```
20 | 
21 | For installing with Amazon DynamoDB peristence:
22 | 
23 | ```bash
24 | pip install pyncette[dynamodb]
25 | ```
26 | 
27 | For installing with PostgreSQL peristence:
28 | 
29 | ```bash
30 | pip install pyncette[postgres]
31 | ```
32 | 
33 | For installing with Prometheus metrics exporter:
34 | 
35 | ```bash
36 | pip install pyncette[prometheus]
37 | ```
38 | 
39 | For a full installation with all the extras:
40 | 
41 | ```bash
42 | pip install pyncette[all]
43 | ```
44 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
  1 | # Usage
  2 | 
  3 | The core unit of execution in Pyncette is a `Task`. Each task is a Python coroutine that specifies what needs to be executed.
  4 | 
  5 | ```python
  6 | from pyncette import Pyncette, Context
  7 | 
  8 | app = Pyncette()
  9 | 
 10 | 
 11 | @app.task(interval=datetime.timedelta(seconds=2))
 12 | async def successful_task(context: Context) -> None:
 13 |     print("This will execute every second")
 14 | 
 15 | 
 16 | if __name__ == "__main__":
 17 |     app.main()
 18 | ```
 19 | 
 20 | ## Running the main loop
 21 | 
 22 | The usual use case is that Pyncette runs as its own process, so the standard way to start the main loop is with `main` method of the `Pyncette`. This sets up the logging to standard output and signal handler allowing for graceful shutdown (first SIGINT initiates the graceful shutdown and the second one terminates the process).
 23 | 
 24 | If Pyncette is run alongside other code or for customization, `create` can be used to initialize the runtime environment and then the main loop can be run with `run`:
 25 | 
 26 | ```python
 27 | import asyncio
 28 | from pyncette import Pyncette
 29 | 
 30 | app = Pyncette()
 31 | 
 32 | ...
 33 | 
 34 | async with app.create() as app_context:
 35 |     await app_context.run()
 36 | ```
 37 | 
 38 | ## Specifying the schedule
 39 | 
 40 | There are two ways a schedule can be specified, one is with the cron-like syntax (uses `croniter` under the hood to support the calculation):
 41 | 
 42 | ```python
 43 | @app.task(schedule="* * * * *")
 44 | async def every_minute(context: Context): ...
 45 | 
 46 | 
 47 | @app.task(schedule="* * * * * */10")
 48 | async def every_10_seconds(context: Context): ...
 49 | 
 50 | 
 51 | @app.task(schedule="20 4 * * * *")
 52 | async def every_day_at_4_20_am(context: Context): ...
 53 | ```
 54 | 
 55 | The other way is with an interval:
 56 | 
 57 | ```python
 58 | @app.task(interval=datetime.timedelta(seconds=12))
 59 | async def every_12_seconds(context: Context): ...
 60 | ```
 61 | 
 62 | ## Customizing tasks
 63 | 
 64 | Pyncette supports multiple different execution modes which provide different levels of reliability guarantees, depending on the nature of the task.
 65 | 
 66 | The default task configuration:
 67 | 
 68 | - When the task is scheduled for execution, it is locked for 60 seconds
 69 | - If the task execution succeeds, the next execution is scheduled and the task is unlocked
 70 | - If the task execution fails (exception is raised), the lock is not released, so it will be retried after the lease expires.
 71 | - If the task execution exceeds the lease duration, it will be executed again (so there could be two executions at the same time)
 72 | 
 73 | ### Best-effort tasks
 74 | 
 75 | If the task is run in a best-effort mode, locking will not be employed, and the next execution will be scheduled immediately when it becomes ready.
 76 | 
 77 | ```python
 78 | from pyncette import ExecutionMode
 79 | 
 80 | 
 81 | @app.task(
 82 |     interval=datetime.timedelta(seconds=10), execution_mode=ExecutionMode.AT_MOST_ONCE
 83 | )
 84 | async def every_10_seconds(context: Context):
 85 |     print("Ping")
 86 | ```
 87 | 
 88 | !!!caution
 89 | If best effort is used, there is no way to retry a failed execution, and exceptions thrown by the task will only be logged.
 90 | 
 91 | ### Failure behavior
 92 | 
 93 | Failure behavior can be specified with `failure_mode` parameter:
 94 | 
 95 | ```python
 96 | from pyncette import ExecutionMode
 97 | 
 98 | 
 99 | @app.task(interval=datetime.timedelta(seconds=10), failure_mode=FailureMode.UNLOCK)
100 | async def every_10_seconds(context: Context):
101 |     print("Ping")
102 | ```
103 | 
104 | - `FailureMode.NONE` the task will stay locked until the lease expires. This is the default.
105 | - `FailureMode.UNLOCK` the task will be immediately unlocked if an exception is thrown, so it will be retried on the next tick.
106 | - `FailureMode.COMMIT` treat the exception as a success and schedule the next execution in case the exception is thrown.
107 | 
108 | ### Timezone support
109 | 
110 | Pyncette is timezone-aware, the timezone for a task can be specified by `timezone` parameter:
111 | 
112 | ```python
113 | from pyncette import ExecutionMode
114 | 
115 | 
116 | @app.task(schedule="0 12 * * *", timezone="Europe/Dublin")
117 | async def task1(context: Context):
118 |     print(f"Hello from Dublin!")
119 | 
120 | 
121 | @app.task(schedule="0 12 * * *", timezone="UTC+12")
122 | async def task2(context: Context):
123 |     print(f"Hello from Камча́тка!")
124 | ```
125 | 
126 | The accepted values are all that `dateutil.tz.gettz` accepts.
127 | 
128 | ### Disabling a task
129 | 
130 | Tasks can be disabled by passing an `enabled=False` in the parameters. This can be used for example
131 | to conditionally enable tasks only on certain instances.
132 | 
133 | ```python
134 | @app.task(schedule="* * * * *", enabled=False)
135 | async def task1(context: Context):
136 |     print(f"This will never run.")
137 | ```
138 | 
139 | Tasks can be disabled also in the initialization code:
140 | 
141 | ```python
142 | from pyncette import Pyncette, Context
143 | 
144 | app = Pyncette()
145 | 
146 | 
147 | @app.task(schedule="* * * * *")
148 | async def task1(context: Context):
149 |     print(f"This will never run.")
150 | 
151 | 
152 | async with app.create() as app_context:
153 |     task1.enabled = False
154 |     await app_context.run()
155 | ```
156 | 
157 | ### Task parameters
158 | 
159 | The `task` decorator accepts an arbitrary number of additional parameters, which are available through the `context` parameter
160 | 
161 | ```python
162 | from pyncette import ExecutionMode
163 | 
164 | 
165 | # If we use multiple decorators on the same coroutine, we must explicitely provide the name
166 | @app.task(name="task1", interval=datetime.timedelta(seconds=10), username="abra")
167 | @app.task(name="task2", interval=datetime.timedelta(seconds=20), username="kadabra")
168 | @app.task(name="task3", interval=datetime.timedelta(seconds=30), username="alakazam")
169 | async def task(context: Context):
170 |     print(f"{context.args['username']}")
171 | ```
172 | 
173 | This allows for parametrized tasks with multiple decorators, this is an essential feature needed to support dynamic tasks.
174 | 
175 | !!!note
176 | There is a restriction that all the values of the parameters must be JSON-serializable, since they are persisted in storage when dynamic tasks are used.
177 | 
178 | ## Middlewares
179 | 
180 | If you have common logic that should execute around every task invocation, middlewares can be used. Good examples of middlewares are ones used for logging and metrics.
181 | 
182 | ```python
183 | app = Pyncette()
184 | 
185 | 
186 | @app.middleware
187 | async def retry(context: Context, next: Callable[[], Awaitable[None]]):
188 |     # Example only, prefer to rely on Pyncette to drive task retry logic
189 |     for _ in range(5):
190 |         try:
191 |             await next()
192 |             return
193 |         except Exception as e:
194 |             pass
195 |     raise Exception(f"Task {context.task.name} failed too many times.")
196 | 
197 | 
198 | @app.middleware
199 | async def logging(context: Context, next: Callable[[], Awaitable[None]]):
200 |     logger.info(f"Task {context.task.name} started")
201 |     try:
202 |         await next()
203 |     except Exception as e:
204 |         logger.error(f"Task {context.task.name} failed", e)
205 |         raise
206 | 
207 | 
208 | @app.middleware
209 | async def db_transaction(context: Context, next: Callable[[], Awaitable[None]]):
210 |     context.db.begin_transaction()
211 |     try:
212 |         await next()
213 |     except Exception:
214 |         context.db.rollback()
215 |         raise
216 |     else:
217 |         context.db.commit()
218 | ```
219 | 
220 | Middlewares execute in order they are defined.
221 | 
222 | ## Fixtures
223 | 
224 | Fixtures provide a convenient way for injecting dependencies into tasks, and specifying the set-up and tear-down code. They can be though of as application-level middlewares. For example, let's say we want to inject the database and a logfile as dependencies to all our tasks:
225 | 
226 | ```python
227 | app = Pyncette()
228 | 
229 | 
230 | @app.fixture()
231 | async def db(app_context: PyncetteContext):
232 |     db = await database.connect(...)
233 |     try:
234 |         yield db
235 |     finally:
236 |         await db.close()
237 | 
238 | 
239 | @app.fixture(name="super_log_file")
240 | async def logfile(app_context: PyncetteContext):
241 |     with open("log.txt", "a") as file:
242 |         yield file
243 | 
244 | 
245 | @app.task(interval=datetime.timedelta(seconds=2))
246 | async def successful_task(context: Context) -> None:
247 |     context.super_log_file.write("Querying the database")
248 |     results = await context.db.query(...)
249 |     ...
250 | ```
251 | 
252 | The lifetime of a fixture is that of a Pyncette application, i.e. the setup code for all fixtures runs before the first tick and the tear-down code runs after the graceful shutdown is initiated and all the pending tasks have finished. Like middlewares, fixtures execute in the order they are defined (and in reverse order on shutdown).
253 | 
254 | ## Persistence
255 | 
256 | By default Pyncette runs without persistence. This means that the schedule is mainteined in-memory and there is no coordination between multiple instances of the app.
257 | 
258 | Enabling persistence allows the aplication to recover from restarts as well as the ability to run multiple instances of an app concurrently without duplicate executions of tasks.
259 | 
260 | See [Backends](backends.md) for instructions on how to configure persistence for a database of your choice.
261 | 
262 | ## Heartbeating
263 | 
264 | If have tasks that have an unpredictable run time, it can be hard to come up with an appropriate lease duration in advance. If set too short, lease will expire, leading to duplicate task execution and if too long, there can be insufficient protection against unhealthy workers.
265 | 
266 | A way to mitigate is to use heartbeating. Heartbeating will periodically extend the lease on the task as long as task is still running. Pyncette supports two approaches to heartbeating:
267 | 
268 | - Cooperative heartbeating: your task periodically calls `context.heartbeat()` to extend the lease
269 | - Automatic heartbeating: your task is decorated with `with_heartbeat` and it heartbeats automatically in the background for as long as the task is executing.
270 | 
271 | Beware that automatic heartbeating can potentially be dangerous if, for example, your task is stuck in an infinite loop or an I/O operation that does not have a proper time out. In this case the lease can be kept alive indefinitely and the task will not make any progress. Cooperative heartbeating may be more verbose, but offers a greater degree of control.
272 | 
273 | If `context.heartbeat()` is called when the lease is already lost, the call will raise `LeaseLostException`, allowing you to bail out early, since another instance is likely already processing the same task.
274 | 
275 | ```python
276 | from pyncette.utils import with_heartbeat
277 | 
278 | 
279 | @app.task(schedule="* * * * * */10")
280 | @with_heartbeat()
281 | async def foo(context: Context):
282 |     # The task will be kept alive by the heartbeat
283 |     await asyncio.sleep(3600)
284 | 
285 | 
286 | if __name__ == "__main__":
287 |     app.main()
288 | ```
289 | 
290 | ## Dynamic tasks
291 | 
292 | Pyncette supports a use case where the tasks are not necessarily known in advance with `schedule_task`.
293 | 
294 | ```python
295 | @app.dynamic_task()
296 | async def hello(context: Context) -> None:
297 |     print(f"Hello {context.args['username']}")
298 | 
299 | 
300 | async with app.create() as app_context:
301 |     await asyncio.gather(
302 |         app_context.schedule_task(
303 |             hello, "bill_task", schedule="0 * * * *", username="bill"
304 |         ),
305 |         app_context.schedule_task(
306 |             hello, "steve_task", schedule="20 * * * *", username="steve"
307 |         ),
308 |         app_context.schedule_task(
309 |             hello, "john_task", schedule="40 * * * *", username="john"
310 |         ),
311 |     )
312 |     await app_context.run()
313 | ```
314 | 
315 | When persistence is used, the schedules and task parameters of the are persisted alongside the execution data, which allows the tasks to be registered and unregistered at will.
316 | 
317 | An example use case is a web application where every user can have something happen at their chosen schedule. Polling is efficient, since the concrete instances of the dynamic class are only loaded from the storage if the are already due, instead of being polled all the time.
318 | 
319 | The task instances can be removed by `unschedule_task`
320 | 
321 | ```python
322 | ...
323 | 
324 | async with app.create() as app_context:
325 |     await app_context.schedule_task(
326 |         hello, "bill_task", schedule="0 * * * *", username="bill"
327 |     )
328 |     await app_context.unschedule_task(hello, "bill_task")
329 |     await app_context.run()
330 | ```
331 | 
332 | !!!note
333 | If the number of dynamic tasks is large, it is a good idea to limit the batch size:
334 | 
335 | ````
336 | ```python
337 | app = Pyncette(
338 |     repository_factory=redis_repository,
339 |     redis_url='redis://localhost',
340 |     batch_size=10
341 | )
342 | ```
343 | 
344 | This will cause that only a specified number of dynamic tasks are scheduled for execution during a single tick, as well as allow potential multiple instances of the same app to load balance effectively.
345 | ````
346 | 
347 | ## Once-off dynamic tasks
348 | 
349 | Dynamic tasks can also be scheduled to execute only once at a specific date.
350 | 
351 | ```python
352 | @app.dynamic_task()
353 | async def task(context: Context) -> None:
354 |     print(f"Hello {context.task.name}!")
355 | 
356 | 
357 | async with app.create() as app_context:
358 |     await app_context.schedule_task(
359 |         task, "y2k38", execute_at=datetime(2038, 1, 19, 3, 14, 7)
360 |     )
361 |     await app_context.schedule_task(
362 |         task, "tomorrow", execute_at=datetime.now() + timedelta(days=1)
363 |     )
364 | 
365 |     # This will execute once immediately, since it is already overdue
366 |     await app_context.schedule_task(
367 |         task, "overdue", execute_at=datetime.now() - timedelta(days=1)
368 |     )
369 |     await app_context.run()
370 | ```
371 | 
372 | Once-off tasks have the same reliability guarantees as recurrent tasks, which is controlled by `execution_mode` and `failure_mode` parameters, but in case of success, they will not be scheduled again.
373 | 
374 | ## Performance
375 | 
376 | Tasks are executed in parallel. If you have a lot of long running tasks, you can set `concurrency_limit` in `Pyncette` constructor, as this ensures that there are at most that many executing tasks at any given time. If there are no free slots in the semaphore, this will serve as a back-pressure and ensure that we don't poll additional tasks until some of the currently executing ones finish, enabling the pending tasks to be scheduled on other instances of your app. Setting `concurrency_limit` to 1 is equivalent of serializing the execution of all the tasks.
377 | 
378 | Depending on the backend used, having a dynamic task with a very large number of instances can lead to diminished performance. See [Advanced Usage](advanced_usage.md) for a way to address this issue.
379 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
  1 | # Examples
  2 | 
  3 | ## [basic.py](./basic.py)
  4 | 
  5 | Hello world example.
  6 | 
  7 | ## [persistence.py](./persistence.py)
  8 | 
  9 | This example stores the state of the scheduler in a variety of backends supported by Pyncette
 10 | 
 11 | By having a persistent backend, you can run multiple multiple processes and they will coordinate
 12 | execution among them, making sure that tasks are only executed by one of them on schedule.
 13 | 
 14 | ## [dynamic_tasks.py](./dynamic_tasks.py)
 15 | 
 16 | This example illustrates dynamic tasks i.e. tasks that are not pre-defined in code and
 17 | can be scheduled at runtime.
 18 | 
 19 | Marking the function with `@app.dynamic_task` serves as a template and individual task
 20 | instances can be scheduled with `schedule_task` (and unscheduled with `unschedule_task`).
 21 | 
 22 | Using a persistent backend, Pyncette supports efficient execution of a large number of
 23 | dynamic task instances.
 24 | 
 25 | ## [delay_queue.py](./delay_queue.py)
 26 | 
 27 | This example uses Pyncette to implement a reliable delay queue (persistence is needed for durability
 28 | or for running multiple instances of the app at the same time, see [examples/persistence.py](./persistence.py) for details)
 29 | 
 30 | After the task instance suceeds it will not be scheduled again as with recurrent tasks, however,
 31 | if an exception is raised, it will be retried if `ExecutionMode.AT_LEAST_ONCE` is used.
 32 | 
 33 | ## [fixtures_and_middlewares.py](./fixtures_and_middlewares.py)
 34 | 
 35 | This example illustrates the use of fixtures and middlewares.
 36 | 
 37 | Middlewares are functions that wrap the execution of every defined task, so they are a good
 38 | place to put cross-cutting concerns such as logging, database session management, metrics, ...
 39 | 
 40 | Fixtures can be thought of application-level middlewares. They wrap the lifecycle of the entire
 41 | Pyncette app and can be used to perform initialization, cleanup and can inject resources such as
 42 | service clients to the task context.
 43 | 
 44 | ## [healthcheck.py](./healthcheck.py)
 45 | 
 46 | This example illustrates the use of healthcheck HTTP server. It exposes the /health endpoint
 47 | which returns 200 if last successfull poll was less than 2 poll intervals ago, 500 otherwise.
 48 | 
 49 | ```
 50 | curl localhost:8080/health
 51 | ```
 52 | 
 53 | ## [heartbeat.py](./heartbeat.py)
 54 | 
 55 | This example demonstrates the heartbeating functionality, which allows for the lease on the
 56 | task to be extended. This can be useful if tasks have an unpredictable run time to minimize
 57 | the risk of another instance taking over the lease.
 58 | 
 59 | Heartbeating can be either cooperative or automatic.
 60 | 
 61 | ## [prometheus_metrics.py](./prometheus_metrics.py)
 62 | 
 63 | Pyncette ships with an optional Prometheus instrumentation based on the official prometheus_client
 64 | Python package. It includes the following metrics:
 65 | 
 66 | - Tick duration [Histogram]
 67 | - Tick volume [Counter]
 68 | - Tick failures [Counter]
 69 | - Number of currently executing ticks [Gauge]
 70 | - Task duration [Histogram]
 71 | - Task volume [Counter]
 72 | - Task failures [Counter]
 73 | - Number of currently executing tasks [Gauge]
 74 | - Task run staleness (i.e. how far behind the scheduled time the actual executions are) [Histogram]
 75 | - Repository operation duration [Histogram]
 76 | - Repository operation volume [Counter]
 77 | - Repository operation volume [Failures]
 78 | - Number of currently repository operations [Gauge]
 79 | 
 80 | It pushes the metrics to default registry (`prometheus_client.REGISTRY`), so it can be combined with other
 81 | code alongside it.
 82 | 
 83 | To see the exported metrics while running this example, use something like
 84 | 
 85 | ```
 86 | curl localhost:9699/metrics
 87 | ```
 88 | 
 89 | ## [benchmark.py](./benchmark.py)
 90 | 
 91 | This example schedules a large number of dynamic tasks and then runs them (in multiple processes) as a way
 92 | to gauge the total throughput of Pyncette for a particular backend.
 93 | 
 94 | To run this example, configure the selected backend in the Pyncette constructor, then run populate the database.
 95 | 
 96 | ```
 97 | python examples/benchmark.py populate -n <number of tasks to insert>
 98 | ```
 99 | 
100 | While the tasks are populating you can run
101 | 
102 | ```
103 | python examples/benchmark.py run --processes <# of processes>
104 | ```
105 | 
106 | The process will continuously print the overall throughput (task executions per second) and the lag (seconds since the last successful tick).
107 | 


--------------------------------------------------------------------------------
/examples/basic.py:
--------------------------------------------------------------------------------
 1 | from pyncette import Context
 2 | from pyncette import Pyncette
 3 | 
 4 | app = Pyncette()
 5 | 
 6 | 
 7 | @app.task(schedule="* * * * * */5")
 8 | async def hello_world(context: Context) -> None:
 9 |     print("Hello world!")
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     app.main()
14 | 


--------------------------------------------------------------------------------
/examples/benchmark.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This example schedules a large number of dynamic tasks and then runs them (in multiple processes)
  3 | as a way to gauge the total throughput of Pyncette for a particular backend.
  4 | 
  5 | To run this example, configure the selected backend in the Pyncette constructor, then run populate the database.
  6 | 
  7 |     python examples/benchmark.py populate -n <number of tasks to insert>
  8 | 
  9 | While the tasks are populating you can run
 10 | 
 11 |     python examples/benchmark.py run --processes <# of processes>
 12 | 
 13 | The process will continuously print the overall throughput (task executions per second) and the lag
 14 | (seconds since the last successful tick).
 15 | """
 16 | 
 17 | import argparse
 18 | import asyncio
 19 | import datetime
 20 | import logging
 21 | import random
 22 | import time
 23 | import uuid
 24 | from multiprocessing import Process
 25 | from multiprocessing.sharedctypes import RawValue  # type: ignore
 26 | from typing import Any
 27 | from typing import Optional
 28 | 
 29 | import coloredlogs
 30 | 
 31 | from pyncette import Context
 32 | from pyncette import ExecutionMode
 33 | from pyncette import Pyncette
 34 | from pyncette.redis import redis_repository
 35 | 
 36 | logger = logging.getLogger(__name__)
 37 | 
 38 | # Adjust the values below
 39 | app = Pyncette(
 40 |     repository_factory=redis_repository,
 41 |     redis_url="redis://localhost",
 42 |     redis_namespace="benchmark",
 43 |     batch_size=100,
 44 | )
 45 | 
 46 | PARTITION_COUNT = 32
 47 | 
 48 | 
 49 | @app.partitioned_task(partition_count=PARTITION_COUNT, execution_mode=ExecutionMode.AT_LEAST_ONCE)
 50 | async def benchmark_task(context: Context) -> None:
 51 |     context.hit_count.value += 1
 52 |     if context.app_context.last_tick is not None:
 53 |         context.staleness.value = (datetime.datetime.now(datetime.timezone.utc) - context.app_context.last_tick).total_seconds()
 54 | 
 55 | 
 56 | async def populate(n: int, parallel: int) -> None:
 57 |     """Populates the database with n instances of the dynamic tasks"""
 58 | 
 59 |     async with app.create() as app_context:
 60 |         tasks = []
 61 |         for i in range(n):
 62 |             interval = datetime.timedelta(seconds=random.randrange(10, 3600))
 63 |             tasks.append(app_context.schedule_task(benchmark_task, str(uuid.uuid4()), interval=interval))
 64 | 
 65 |             if len(tasks) == parallel:
 66 |                 await asyncio.gather(*tasks)
 67 |                 tasks = []
 68 | 
 69 |             if (i + 1) % 1000 == 0:
 70 |                 logger.info(f"Scheduled {i + 1} tasks")
 71 | 
 72 |         await asyncio.gather(*tasks)
 73 |         logger.info("DONE!")
 74 | 
 75 | 
 76 | async def run(
 77 |     hit_count: Any,
 78 |     staleness: Any,
 79 |     enabled_partitions: Optional[list[int]],
 80 | ) -> None:
 81 |     async with app.create() as app_context:
 82 |         app_context.add_to_context("hit_count", hit_count)
 83 |         app_context.add_to_context("staleness", staleness)
 84 |         benchmark_task.enabled_partitions = enabled_partitions
 85 | 
 86 |         logger.info(f"Starting to poll following partitions {enabled_partitions}")
 87 |         await app_context.run()
 88 | 
 89 | 
 90 | def _run(log_level: str, *args: Any, **kwargs: Any) -> None:
 91 |     # On Windows we need to setup logging again as forking is not supported
 92 |     setup(log_level)
 93 |     asyncio.run(run(*args, **kwargs))
 94 | 
 95 | 
 96 | def setup(log_level: str) -> None:
 97 |     # Make sure that this module logger always logs no matter what
 98 |     # the selected level is.
 99 |     coloredlogs.install(level="DEBUG", milliseconds=True)
100 |     logging.getLogger().setLevel(log_level)
101 |     logger.setLevel("INFO")
102 | 
103 |     try:
104 |         import uvloop
105 | 
106 |         uvloop.install()
107 |     except ImportError:
108 |         logger.info("uvloop is not available, ignoring.")
109 | 
110 | 
111 | async def report(
112 |     hit_counts: list[Any],
113 |     stalenesses: list[Any],
114 | ) -> None:
115 |     previous_hit_count = 0
116 |     previous_sample = time.perf_counter()
117 | 
118 |     while True:
119 |         await asyncio.sleep(5)
120 | 
121 |         hit_count = sum(c.value for c in hit_counts)
122 |         staleness = max(c.value for c in stalenesses)
123 |         now = time.perf_counter()
124 | 
125 |         logger.info(f"{(hit_count - previous_hit_count) / (now - previous_sample):10.2f} RPS, Staleness {staleness:.2f}s")
126 | 
127 |         previous_hit_count = hit_count
128 |         previous_sample = now
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
133 |     parser.add_argument("--log-level", default="WARNING")
134 |     subparsers = parser.add_subparsers(dest="command", required=True)
135 | 
136 |     populate_option = subparsers.add_parser("populate", help="Populate the backend with a large number of tasks")
137 |     populate_option.add_argument("-n", "--number", type=int, default=10000, help="Number of tasks to insert")
138 |     populate_option.add_argument(
139 |         "-p",
140 |         "--parallelism",
141 |         type=int,
142 |         default=50,
143 |         help="How many tasks to insert in parallel",
144 |     )
145 |     run_option = subparsers.add_parser("run", help="Run the Pyncette app")
146 |     run_option.add_argument("--processes", type=int, default=1, help="Number of processes to run")
147 |     run_option.add_argument(
148 |         "--partition-count",
149 |         type=int,
150 |         default=PARTITION_COUNT,
151 |         help="How many partitions each process should poll",
152 |     )
153 | 
154 |     options = parser.parse_args()
155 |     setup(options.log_level)
156 | 
157 |     if options.command == "run":
158 |         hit_count = [RawValue("l", 0) for _ in range(options.processes)]
159 |         staleness = [RawValue("f", 0) for _ in range(options.processes)]
160 | 
161 |         if options.partition_count * options.processes < PARTITION_COUNT:
162 |             logger.warning(f"partition_count * processes < {PARTITION_COUNT}. Not all partitions will be processed.")
163 | 
164 |         for i in range(options.processes):
165 |             enabled_partitions = sorted((i * options.partition_count + j) % PARTITION_COUNT for j in range(options.partition_count))
166 | 
167 |             job = Process(
168 |                 target=_run,
169 |                 name=str(i),
170 |                 args=(
171 |                     options.log_level,
172 |                     hit_count[i],
173 |                     staleness[i],
174 |                     list(enabled_partitions),
175 |                 ),
176 |             )
177 |             job.start()
178 | 
179 |         asyncio.run(report(hit_count, staleness))
180 | 
181 |     elif options.command == "populate":
182 |         asyncio.run(populate(options.number, options.parallelism))
183 | 


--------------------------------------------------------------------------------
/examples/data/usernames.txt:
--------------------------------------------------------------------------------
 1 | Alice
 2 | Carol
 3 | Chuck
 4 | Craig
 5 | Dan
 6 | Erin
 7 | Eve
 8 | Faythe
 9 | Frank
10 | Grace
11 | Heidi
12 | Ivan
13 | Judy
14 | Mallory
15 | Michael
16 | Niaj
17 | Olivia
18 | Oscar
19 | Peggy
20 | Rupert
21 | Sybil
22 | Trent
23 | Trudy
24 | Victor
25 | Walter
26 | Wend
27 | 


--------------------------------------------------------------------------------
/examples/delay_queue.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This example uses Pyncette to implement a reliable delay queue (persistence is needed for durability
 4 | or for running multiple instances of the app at the same time, see examples/persistence.py for details)
 5 | 
 6 | After the task instance suceeds it will not be scheduled again as with recurrent tasks, however,
 7 | if an exception is raised, it will be retried if ExecutionMode.AT_LEAST_ONCE is used.
 8 | 
 9 | """
10 | 
11 | import datetime
12 | import logging
13 | import random
14 | import uuid
15 | 
16 | from pyncette import Context
17 | from pyncette import ExecutionMode
18 | from pyncette import Pyncette
19 | 
20 | logger = logging.getLogger(__name__)
21 | 
22 | app = Pyncette()
23 | 
24 | 
25 | @app.dynamic_task(execution_mode=ExecutionMode.AT_LEAST_ONCE)
26 | async def execute_once_reliable(context: Context) -> None:
27 |     logger.info(
28 |         f"I am {context.args['username']}. If I fail, I will be retried, otherwise I will never be seen again."
29 |         f"(I was scheduled to run at {context.scheduled_at})"
30 |     )
31 | 
32 |     if random.choice([True, False]):
33 |         raise Exception("Oops")
34 | 
35 | 
36 | @app.dynamic_task(execution_mode=ExecutionMode.AT_MOST_ONCE)
37 | async def execute_once_best_effort(context: Context) -> None:
38 |     logger.info(f"I am {context.args['username']}. I will never be seen again (I was scheduled to run at {context.scheduled_at})")
39 | 
40 |     if random.choice([True, False]):
41 |         raise Exception("Oops")
42 | 
43 | 
44 | @app.task(interval=datetime.timedelta(seconds=2))
45 | async def enqueue_periodically(context: Context) -> None:
46 |     execute_at = context.scheduled_at + datetime.timedelta(seconds=random.randint(1, 5))
47 | 
48 |     await context.app_context.schedule_task(
49 |         execute_once_reliable,
50 |         str(uuid.uuid4()),
51 |         execute_at=execute_at,
52 |         username=random.choice(["Alice", "Bob", "Charlie", "Dave", "Eve"]),
53 |     )
54 | 
55 |     await context.app_context.schedule_task(
56 |         execute_once_best_effort,
57 |         str(uuid.uuid4()),
58 |         execute_at=execute_at,
59 |         username=random.choice(["Alice", "Bob", "Charlie", "Dave", "Eve"]),
60 |     )
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     app.main()
65 | 


--------------------------------------------------------------------------------
/examples/dynamic_tasks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example illustrates dynamic tasks i.e. tasks that are not pre-defined in code and
 3 | can be scheduled at runtime.
 4 | 
 5 | Marking the function with @app.dynamic_task serves as a template and individual task
 6 | instances can be scheduled with schedule_task (and unscheduled with unschedule_task).
 7 | 
 8 | Using a persistent backend, Pyncette supports efficient execution of a large number of
 9 | dynamic task instances.
10 | 
11 | """
12 | 
13 | import asyncio
14 | import datetime
15 | import logging
16 | import pathlib
17 | import random
18 | import sys
19 | 
20 | import coloredlogs
21 | 
22 | from pyncette import Context
23 | from pyncette import ExecutionMode
24 | from pyncette import Pyncette
25 | 
26 | logger = logging.getLogger(__name__)
27 | 
28 | app = Pyncette(poll_interval=datetime.timedelta(seconds=0.1))
29 | 
30 | 
31 | @app.dynamic_task(execution_mode=ExecutionMode.AT_MOST_ONCE)
32 | async def greeter(context: Context) -> None:
33 |     logger.info(f"Hello from {context.args['username']}.")
34 | 
35 |     if random.random() < 0.2:
36 |         # 1/5 chance that the task will unschedule itself. If this
37 |         # example is run for long enough, no tasks should be left.
38 |         logger.warning(f"Unscheduling {context.args['username']}")
39 |         await context.app_context.unschedule_task(context.task)
40 | 
41 | 
42 | async def main() -> None:
43 |     async with app.create() as ctx:
44 |         with (pathlib.Path(sys.path[0]) / "data" / "usernames.txt").open() as f:
45 |             usernames = f.read().splitlines()
46 | 
47 |         for username in usernames:
48 |             interval = datetime.timedelta(seconds=random.uniform(5, 20))
49 |             logger.info(f"Scheduling {username} to run every {interval}")
50 |             await ctx.schedule_task(
51 |                 greeter,
52 |                 # Mandatory unique name for the task instance
53 |                 username,
54 |                 interval=interval,
55 |                 # All the extra parameters will be available to the
56 |                 # the task in context.args
57 |                 username=username,
58 |             )
59 | 
60 |         await ctx.run()
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     coloredlogs.install(level="INFO", milliseconds=True, logger=logger)
65 |     asyncio.run(main())
66 | 


--------------------------------------------------------------------------------
/examples/fixtures_and_middlewares.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This example illustrates the use of fixtures and middlewares.
 4 | 
 5 | Middlewares are functions that wrap the execution of every defined task, so they are a good
 6 | place to put cross-cutting concerns such as logging, database session management, metrics, ...
 7 | 
 8 | Fixtures can be thought of application-level middlewares. They wrap the lifecycle of the entire
 9 | Pyncette app and can be used to perform initialization, cleanup and can inject resources such as
10 | service clients to the task context.
11 | 
12 | """
13 | 
14 | import asyncio
15 | import logging
16 | import pathlib
17 | import random
18 | import time
19 | from collections.abc import AsyncIterator
20 | from typing import TextIO
21 | 
22 | from pyncette import Context
23 | from pyncette import Pyncette
24 | from pyncette import PyncetteContext
25 | from pyncette.model import NextFunc
26 | 
27 | logger = logging.getLogger(__name__)
28 | 
29 | app = Pyncette()
30 | 
31 | 
32 | @app.fixture(name="log")
33 | async def logfile_fixture(app_context: PyncetteContext) -> AsyncIterator[TextIO]:
34 |     logger.info("Using log file logfile.txt")
35 | 
36 |     with pathlib.Path("./logfile.txt").open("a") as f:
37 |         # Yielding from fixture gives an object that will be available in
38 |         # context.<fixture name> for all tasks (and middlewares)
39 |         yield f
40 | 
41 |     # This will run on graceful shutdown of the Pyncette app
42 |     logger.info("Log file closed")
43 | 
44 | 
45 | @app.middleware
46 | async def timer_middleware(context: Context, next: NextFunc) -> None:
47 |     start_time = time.time()
48 |     try:
49 |         await next()
50 |     finally:
51 |         duration = time.time() - start_time
52 |         print(f"Task {context.task.name} took {duration:,.2}s.", file=context.log)
53 |         context.log.flush()
54 | 
55 | 
56 | @app.task(schedule="* * * * * */2")
57 | async def slow_task(context: Context) -> None:
58 |     await asyncio.sleep(random.uniform(0, 1))
59 | 
60 | 
61 | @app.task(schedule="* * * * * */2")
62 | async def fast_task(context: Context) -> None:
63 |     pass
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     app.main()
68 | 


--------------------------------------------------------------------------------
/examples/healthcheck.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This example illustrates the use of healthcheck HTTP server. It exposes the /health endpoint
 4 | which returns 200 if last successfull poll was less than 2 poll intervals ago, 500 otherwise.
 5 | 
 6 |     curl localhost:8080/health
 7 | 
 8 | """
 9 | 
10 | import asyncio
11 | import logging
12 | import random
13 | 
14 | from pyncette import Context
15 | from pyncette import Pyncette
16 | from pyncette.executor import SynchronousExecutor
17 | from pyncette.healthcheck import use_healthcheck_server
18 | 
19 | logger = logging.getLogger(__name__)
20 | 
21 | # We use the SynchronousExecutor so long-running tasks will delay
22 | # cause polling to stall and simulate unhealthiness.
23 | app = Pyncette(executor_cls=SynchronousExecutor)
24 | use_healthcheck_server(app, port=8080)
25 | 
26 | 
27 | @app.task(schedule="* * * * * */2")
28 | async def hello_world(context: Context) -> None:
29 |     if random.choice([True, False]):
30 |         await asyncio.sleep(4)
31 |     logger.info("Hello, world!")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     app.main()
36 | 


--------------------------------------------------------------------------------
/examples/heartbeat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This example demonstrates the heartbeating functionality, which allows for the lease on the
 4 | task to be extended. This can be useful if tasks have an unpredictable run time to minimize
 5 | the risk of another instance taking over the lease.
 6 | 
 7 | Heartbeating can be either cooperative or automatic.
 8 | 
 9 | """
10 | 
11 | import asyncio
12 | import datetime
13 | import logging
14 | 
15 | from pyncette import Context
16 | from pyncette import Pyncette
17 | from pyncette.utils import with_heartbeat
18 | 
19 | logger = logging.getLogger(__name__)
20 | app = Pyncette()
21 | 
22 | 
23 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2))
24 | async def cooperative_heartbeat(context: Context) -> None:
25 |     logger.info("Hello, world!")
26 |     for _ in range(5):
27 |         await asyncio.sleep(1)
28 |         await context.heartbeat()
29 |     logger.info("Goodbye, world!")
30 | 
31 | 
32 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2))
33 | async def cooperative_heartbeat_lease_expired(context: Context) -> None:
34 |     logger.info("Hello, world!")
35 |     await asyncio.sleep(3)
36 |     # This will raise an exception as we no longer have lease at this point
37 |     await context.heartbeat()
38 |     logger.info("Goodbye, world!")
39 | 
40 | 
41 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2))
42 | @with_heartbeat()
43 | async def automatic_heartbeat(context: Context) -> None:
44 |     """
45 |     Tasks decorated with with_heartbeat will automatically heartbeat in background
46 |     whenever we have less than 1/2 of the time remaining on the lease
47 |     """
48 | 
49 |     logger.info("Hello, world!")
50 |     await asyncio.sleep(5)
51 |     logger.info("Goodbye, world!")
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     app.main()
56 | 


--------------------------------------------------------------------------------
/examples/persistence.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | This example stores the state of the scheduler in a variety of backends supported by Pyncette
 4 | 
 5 | By having a persistent backend, you can run multiple multiple processes and they will coordinate
 6 | execution among them, making sure that tasks are only executed by one of them on schedule.
 7 | 
 8 | """
 9 | 
10 | import logging
11 | 
12 | from pyncette import Context
13 | from pyncette import Pyncette
14 | from pyncette.dynamodb import dynamodb_repository
15 | from pyncette.mysql import mysql_repository
16 | from pyncette.postgres import postgres_repository
17 | from pyncette.redis import redis_repository
18 | 
19 | logger = logging.getLogger(__name__)
20 | 
21 | sqlite_app = Pyncette(sqlite_database="pyncette.db")
22 | 
23 | postgres_app = Pyncette(
24 |     repository_factory=postgres_repository,
25 |     # PostgreSQL connection string
26 |     postgres_url="postgres://postgres@localhost/pyncette",
27 |     # The table name
28 |     postgres_table_name="example123",
29 |     # If set to true, Pyncette will assume the table exists and will not try to create it
30 |     postgres_skip_table_create=False,
31 |     # Batch size for querying dynamic tasks
32 |     batch_size=10,
33 | )
34 | 
35 | dynamodb_app = Pyncette(
36 |     repository_factory=dynamodb_repository,
37 |     # Optional endpoint URL (if e.g. using Localstack instead of actual DynamoDB)
38 |     dynamodb_endpoint=None,
39 |     # AWS region name
40 |     dynamodb_region_name="eu-west-1",
41 |     # The name of the DynamoDB table.
42 |     dynamodb_table_name="pyncette",
43 |     # Optional partition key prefix allowing multiple independent Pyncette instances
44 |     # to use the same table.
45 |     dynamodb_partition_prefix="example123",
46 |     # If set to true, Pyncette will assume the table exists and will not try to create it
47 |     dynamodb_skip_table_create=False,
48 |     # Batch size for querying dynamic tasks
49 |     batch_size=10,
50 | )
51 | 
52 | redis_app = Pyncette(
53 |     repository_factory=redis_repository,
54 |     # Redis URL
55 |     redis_url="redis://localhost",
56 |     # Key prefix in Redis, allowing multiple Pyncette apps to share the same
57 |     # Redis instance
58 |     redis_namespace="example123",
59 |     # Timeout in seconds for Redis operations
60 |     redis_timeout=10,
61 |     # Batch size for querying dynamic tasks
62 |     batch_size=10,
63 | )
64 | 
65 | 
66 | mysql_app = Pyncette(
67 |     repository_factory=mysql_repository,  # type: ignore
68 |     # MySQL host
69 |     mysql_host="localhost",
70 |     # MySQL database name
71 |     mysql_database="pyncette",
72 |     # MySQL username
73 |     mysql_user="pyncette",
74 |     # Optional MySQL password
75 |     mysql_password="password",  # noqa: S106
76 |     # The table name
77 |     mysql_table_name="example123",
78 |     # Optional MySQL port
79 |     mysql_port=3306,
80 |     # If set to true, Pyncette will assume the table exists and will not try to create it
81 |     mysql_skip_table_create=False,
82 |     # Batch size for querying dynamic tasks
83 |     batch_size=10,
84 | )
85 | 
86 | # Choose one of the above
87 | app = sqlite_app
88 | 
89 | 
90 | @app.task(schedule="* * * * * */2")
91 | async def hello_world(context: Context) -> None:
92 |     logger.info("Hello, world!")
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     app.main()
97 | 


--------------------------------------------------------------------------------
/examples/prometheus_metrics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | Pyncette ships with an optional Prometheus instrumentation based on the official prometheus_client
 4 | Python package. It includes the following metrics:
 5 | 
 6 | - Tick duration [Histogram]
 7 | - Tick volume [Counter]
 8 | - Tick failures [Counter]
 9 | - Number of currently executing ticks [Gauge]
10 | - Task duration [Histogram]
11 | - Task volume [Counter]
12 | - Task failures [Counter]
13 | - Number of currently executing tasks [Gauge]
14 | - Task run staleness (i.e. how far behind the scheduled time the actual executions are) [Histogram]
15 | - Repository operation duration [Histogram]
16 | - Repository operation volume [Counter]
17 | - Repository operation volume [Failures]
18 | - Number of currently repository operations [Gauge]
19 | 
20 | It pushes the metrics to default registry (prometheus_client.REGISTRY), so it can be combined with other
21 | code alongside it.
22 | 
23 | To see the exported metrics while running this example, use something like
24 | 
25 |     curl localhost:9699/metrics
26 | 
27 | """
28 | 
29 | import asyncio
30 | import datetime
31 | import logging
32 | import random
33 | import uuid
34 | 
35 | from prometheus_client import start_http_server
36 | 
37 | from pyncette import Context
38 | from pyncette import FailureMode
39 | from pyncette import Pyncette
40 | from pyncette.prometheus import use_prometheus
41 | 
42 | logger = logging.getLogger(__name__)
43 | 
44 | app = Pyncette()
45 | use_prometheus(app)
46 | 
47 | 
48 | @app.task(schedule="* * * * * */2")
49 | async def hello_world(context: Context) -> None:
50 |     logger.info("Hello, world!")
51 | 
52 | 
53 | @app.task(schedule="* * * * * */2")
54 | async def sleepy_time(context: Context) -> None:
55 |     logger.info("Hello, bed!")
56 |     await asyncio.sleep(random.random() * 5)
57 | 
58 | 
59 | @app.task(schedule="* * * * * */2", failure_mode=FailureMode.UNLOCK)
60 | async def oopsie_daisy(context: Context) -> None:
61 |     if random.choice([True, False]):
62 |         raise Exception("Something went wrong :(")
63 | 
64 | 
65 | @app.dynamic_task()
66 | async def execute_once(context: Context) -> None:
67 |     logger.info(f"Hello, world from {context.task}")
68 |     await context.app_context.unschedule_task(context.task)
69 | 
70 | 
71 | @app.task(interval=datetime.timedelta(seconds=1))
72 | async def schedule_execute_once(context: Context) -> None:
73 |     await context.app_context.schedule_task(execute_once, str(uuid.uuid4()), interval=datetime.timedelta(seconds=1))
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     start_http_server(port=9699, addr="0.0.0.0")  # noqa: S104
78 |     app.main()
79 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: Pyncette
  2 | site_description: A reliable distributed scheduler with pluggable storage backends for Async Python
  3 | site_url: https://pyncette.readthedocs.io
  4 | repo_url: https://github.com/tibordp/pyncette
  5 | repo_name: tibordp/pyncette
  6 | edit_uri: edit/master/docs/
  7 | 
  8 | theme:
  9 |   name: material
 10 |   palette:
 11 |     # Light mode
 12 |     - media: "(prefers-color-scheme: light)"
 13 |       scheme: default
 14 |       primary: indigo
 15 |       accent: indigo
 16 |       toggle:
 17 |         icon: material/brightness-7
 18 |         name: Switch to dark mode
 19 |     # Dark mode
 20 |     - media: "(prefers-color-scheme: dark)"
 21 |       scheme: slate
 22 |       primary: indigo
 23 |       accent: indigo
 24 |       toggle:
 25 |         icon: material/brightness-4
 26 |         name: Switch to light mode
 27 |   features:
 28 |     - navigation.instant
 29 |     - navigation.tracking
 30 |     - navigation.tabs
 31 |     - navigation.sections
 32 |     - navigation.expand
 33 |     - navigation.top
 34 |     - search.suggest
 35 |     - search.highlight
 36 |     - content.code.copy
 37 |     - content.code.annotate
 38 | 
 39 | plugins:
 40 |   - search
 41 |   - mkdocstrings:
 42 |       handlers:
 43 |         python:
 44 |           paths: [src]
 45 |           inventories:
 46 |             - https://docs.python.org/3/objects.inv
 47 |             - https://docs.aiohttp.org/en/stable/objects.inv
 48 |           options:
 49 |             docstring_style: numpy
 50 |             docstring_section_style: table
 51 |             show_source: false
 52 |             show_root_heading: true
 53 |             show_root_full_path: false
 54 |             show_symbol_type_heading: true
 55 |             show_symbol_type_toc: true
 56 |             signature_crossrefs: true
 57 |             separate_signature: true
 58 |             line_length: 80
 59 |             members_order: source
 60 |             group_by_category: true
 61 |             show_if_no_docstring: true
 62 |             show_docstring_attributes: true
 63 |             show_docstring_functions: true
 64 |             show_docstring_classes: true
 65 |             show_docstring_modules: true
 66 |             show_signature_annotations: true
 67 |             annotations_path: brief
 68 |             inherited_members: false
 69 |             filters:
 70 |               - "!^_"
 71 |             merge_init_into_class: true
 72 |             docstring_options:
 73 |               ignore_init_summary: true
 74 | 
 75 | markdown_extensions:
 76 |   - admonition
 77 |   - attr_list
 78 |   - def_list
 79 |   - footnotes
 80 |   - meta
 81 |   - md_in_html
 82 |   - toc:
 83 |       permalink: true
 84 |   - pymdownx.arithmatex:
 85 |       generic: true
 86 |   - pymdownx.betterem:
 87 |       smart_enable: all
 88 |   - pymdownx.caret
 89 |   - pymdownx.details
 90 |   - pymdownx.highlight:
 91 |       anchor_linenums: true
 92 |   - pymdownx.inlinehilite
 93 |   - pymdownx.keys
 94 |   - pymdownx.mark
 95 |   - pymdownx.smartsymbols
 96 |   - pymdownx.superfences
 97 |   - pymdownx.tabbed:
 98 |       alternate_style: true
 99 |   - pymdownx.tasklist:
100 |       custom_checkbox: true
101 |   - pymdownx.tilde
102 | 
103 | nav:
104 |   - Home:
105 |       - Overview: index.md
106 |       - Changelog: changelog.md
107 |   - Usage:
108 |       - Installation: installation.md
109 |       - Usage: usage.md
110 |       - Backends: backends.md
111 |       - Advanced Usage: advanced_usage.md
112 |   - API Reference: api-reference.md
113 |   - Contributing: contributing.md
114 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "pyncette"
  7 | version = "1.0.0"
  8 | description = "A reliable distributed scheduler with pluggable storage backends"
  9 | readme = "README.md"
 10 | license = {text = "MIT"}
 11 | authors = [
 12 |     {name = "Tibor Djurica Potpara", email = "tibor.djurica@ojdip.net"},
 13 | ]
 14 | keywords = ["scheduler", "cron", "async", "distributed", "task-queue", "asyncio"]
 15 | classifiers = [
 16 |     "Development Status :: 5 - Production/Stable",
 17 |     "Intended Audience :: Developers",
 18 |     "License :: OSI Approved :: MIT License",
 19 |     "Operating System :: Unix",
 20 |     "Operating System :: POSIX",
 21 |     "Operating System :: Microsoft :: Windows",
 22 |     "Programming Language :: Python",
 23 |     "Programming Language :: Python :: 3.9",
 24 |     "Programming Language :: Python :: 3.10",
 25 |     "Programming Language :: Python :: 3.11",
 26 |     "Programming Language :: Python :: 3.12",
 27 |     "Programming Language :: Python :: 3.13",
 28 |     "Programming Language :: Python :: 3.14",
 29 |     "Programming Language :: Python :: Implementation :: CPython",
 30 |     "Topic :: Utilities",
 31 | ]
 32 | requires-python = ">=3.9"
 33 | dependencies = [
 34 |     "croniter>=1.3.14",
 35 |     "aiosqlite>=0.19.0",
 36 |     "aiohttp>=3.8.4",
 37 |     "python-dateutil>=2.8.2",
 38 |     "coloredlogs",
 39 | ]
 40 | 
 41 | [project.optional-dependencies]
 42 | redis = ["redis>=4.5.4"]
 43 | prometheus = ["prometheus_client>=0.16.0"]
 44 | postgres = ["asyncpg>=0.27.0"]
 45 | dynamodb = ["aioboto3>=11.1.0"]
 46 | mysql = ["aiomysql>=0.1.1", "cryptography>=40.0.2"]
 47 | uvloop = ["uvloop>=0.16.0"]
 48 | all = [
 49 |     "redis>=4.5.4",
 50 |     "prometheus_client>=0.16.0",
 51 |     "asyncpg>=0.27.0",
 52 |     "aioboto3>=11.1.0",
 53 |     "aiomysql>=0.1.1",
 54 |     "cryptography>=40.0.2",
 55 |     "uvloop>=0.22.1",
 56 | ]
 57 | dev = [
 58 |     "pytest",
 59 |     "pytest-asyncio",
 60 |     "pytest-cov",
 61 |     "pre-commit",
 62 |     "ty",
 63 |     "ruff",
 64 |     "mkdocs-material",
 65 |     "mkdocstrings[python]",
 66 | ]
 67 | 
 68 | [project.urls]
 69 | Documentation = "https://tibordp.github.io/pyncette/"
 70 | Changelog = "https://tibordp.github.io/pyncette/changelog/"
 71 | "Issue Tracker" = "https://github.com/tibordp/pyncette/issues"
 72 | Homepage = "https://github.com/tibordp/pyncette"
 73 | 
 74 | [tool.hatch.build.targets.wheel]
 75 | packages = ["src/pyncette"]
 76 | 
 77 | [tool.hatch.build.targets.sdist]
 78 | include = [
 79 |     "/src",
 80 |     "/tests",
 81 |     "/docs",
 82 |     "/examples",
 83 |     "*.md",
 84 |     "*.cfg",
 85 |     "*.yml",
 86 |     "*.yaml",
 87 |     "*.toml",
 88 |     "*.txt",
 89 | ]
 90 | 
 91 | [tool.ruff]
 92 | extend-exclude = ["static", "ci/templates"]
 93 | line-length = 140
 94 | src = ["src", "tests"]
 95 | target-version = "py39"
 96 | 
 97 | [tool.ruff.format]
 98 | docstring-code-format = true
 99 | docstring-code-line-length = 80
100 | 
101 | [tool.ruff.lint]
102 | ignore = [
103 |     "RUF001", # ruff-specific rules ambiguous-unicode-character-string
104 |     "PLC0415", # import not at top of file
105 |     "S608", # SQL injection - we specifically inject table name
106 |     "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
107 | ]
108 | 
109 | [tool.ty.rules]
110 | unresolved-attribute = "ignore"
111 | 
112 | [tool.pytest.ini_options]
113 | testpaths = ["tests"]
114 | python_files = ["test_*.py"]
115 | python_classes = ["Test*"]
116 | python_functions = ["test_*"]
117 | markers = [
118 |     "asyncio: mark test as async",
119 |     "integration: mark test as integration test requiring external services",
120 | ]
121 | asyncio_mode = "auto"
122 | asyncio_default_fixture_loop_scope = "function"
123 | 
124 | [tool.coverage.paths]
125 | source = ["src", "*/site-packages"]
126 | 
127 | [tool.coverage.run]
128 | branch = true
129 | source = ["pyncette", "tests"]
130 | omit = ["tests/utils/*"]
131 | parallel = true
132 | 
133 | [tool.coverage.report]
134 | show_missing = true
135 | precision = 2
136 | exclude_lines = [
137 |     "if TYPE_CHECKING:",
138 |     "assert False",
139 |     "pragma: no cover",
140 | ]
141 | omit = ["*migrations*"]
142 | 


--------------------------------------------------------------------------------
/src/pyncette/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "1.0.0"
 2 | 
 3 | from .model import Context
 4 | from .model import ExecutionMode
 5 | from .model import FailureMode
 6 | from .pyncette import Pyncette
 7 | from .pyncette import PyncetteContext
 8 | 
 9 | __all__ = ["Context", "ExecutionMode", "FailureMode", "Pyncette", "PyncetteContext"]
10 | 


--------------------------------------------------------------------------------
/src/pyncette/errors.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | 
 6 | class PyncetteException(Exception):
 7 |     """Base exception for Pyncette"""
 8 | 
 9 | 
10 | class LeaseLostException(PyncetteException):
11 |     """Signals that the lease on the task was lost"""
12 | 
13 |     task: Task
14 | 
15 |     def __init__(self, task: Task):
16 |         super().__init__(f"Lease on the task {task.canonical_name} was lost.")
17 |         self.task = task
18 | 
19 | 
20 | if TYPE_CHECKING:
21 |     from pyncette.task import Task
22 | 


--------------------------------------------------------------------------------
/src/pyncette/executor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import asyncio
 4 | import contextlib
 5 | import logging
 6 | from typing import Any
 7 | from collections.abc import Awaitable
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class SynchronousExecutor(contextlib.AbstractAsyncContextManager):
13 |     def __init__(self, **kwargs: dict[str, Any]):
14 |         pass
15 | 
16 |     async def __aenter__(self) -> SynchronousExecutor:
17 |         return self
18 | 
19 |     async def __aexit__(
20 |         self,
21 |         exc_type: type[BaseException] | None,
22 |         exc_value: BaseException | None,
23 |         traceback: Any | None,
24 |     ) -> None:
25 |         pass
26 | 
27 |     async def spawn_task(self, task: Awaitable) -> None:
28 |         await task
29 | 
30 | 
31 | class DefaultExecutor(contextlib.AbstractAsyncContextManager):
32 |     """Manages the spawned tasks running in background"""
33 | 
34 |     _tasks: dict[object, asyncio.Task]
35 |     _semaphore: asyncio.Semaphore
36 | 
37 |     def __init__(self, **kwargs: Any) -> None:
38 |         self._tasks = {}
39 |         concurrency_limit = kwargs.get("concurrency_limit", 100)
40 |         self._semaphore = asyncio.Semaphore(concurrency_limit)
41 | 
42 |     async def __aenter__(self) -> DefaultExecutor:
43 |         return self
44 | 
45 |     async def __aexit__(
46 |         self,
47 |         exc_type: type[BaseException] | None,
48 |         exc_value: BaseException | None,
49 |         traceback: Any | None,
50 |     ) -> None:
51 |         if self._tasks:
52 |             logging.debug(f"{exc_type}, {exc_value}, {traceback}")
53 |             if exc_type == asyncio.CancelledError:
54 |                 logger.warning("Cancelling remaining tasks.")
55 |                 for task in self._tasks.values():
56 |                     task.cancel()
57 | 
58 |             logger.info("Waiting for remaining tasks to finish.")
59 |             await asyncio.wait(self._tasks.values())
60 | 
61 |     async def spawn_task(self, task: Awaitable) -> None:
62 |         identity = object()
63 | 
64 |         async def _task_wrapper(awaitable: Awaitable) -> None:
65 |             try:
66 |                 await awaitable
67 |             finally:
68 |                 self._tasks.pop(identity)
69 |                 self._semaphore.release()
70 | 
71 |         await self._semaphore.acquire()
72 |         self._tasks[identity] = asyncio.create_task(_task_wrapper(task))
73 | 


--------------------------------------------------------------------------------
/src/pyncette/healthcheck.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | from collections.abc import AsyncIterator
 4 | from collections.abc import Awaitable
 5 | from typing import Callable
 6 | from typing import Optional
 7 | 
 8 | from aiohttp import web
 9 | 
10 | from pyncette import pyncette
11 | from pyncette.pyncette import Pyncette
12 | from pyncette.pyncette import PyncetteContext
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | async def default_healthcheck(app_context: PyncetteContext) -> bool:
18 |     utcnow = pyncette._current_time()
19 |     last_tick = app_context.last_tick
20 |     grace_period = app_context._app._poll_interval * 2
21 | 
22 |     return last_tick is not None and (utcnow - last_tick < grace_period)
23 | 
24 | 
25 | def use_healthcheck_server(
26 |     app: Pyncette,
27 |     port: int = 8080,
28 |     bind_address: Optional[str] = None,
29 |     healthcheck_handler: Callable[[PyncetteContext], Awaitable[bool]] = default_healthcheck,
30 | ) -> None:
31 |     """
32 |     Decorate Pyncette app with a healthcheck endpoint served as a HTTP endpoint.
33 | 
34 |     :param app: Pyncette app
35 |     :param port: The local port to bind to
36 |     :param bind_address: The local address to bind to
37 |     :healthcheck_handler: A coroutine that determines health status
38 |     """
39 | 
40 |     async def healthcheck_fixture(
41 |         app_context: PyncetteContext,
42 |     ) -> AsyncIterator[asyncio.AbstractServer]:
43 |         async def handler(request: web.BaseRequest) -> web.Response:
44 |             if request.method != "GET":
45 |                 return web.Response(status=405, text="Method not allowed")
46 |             try:
47 |                 is_healthy = await healthcheck_handler(app_context)
48 |             except asyncio.CancelledError:
49 |                 raise
50 |             except Exception as e:
51 |                 logger.warning("Exception raised in healthcheck handler", exc_info=e)
52 |                 is_healthy = False
53 | 
54 |             if is_healthy:
55 |                 return web.Response(status=200, text="OK")
56 |             else:
57 |                 return web.Response(status=500, text="Not OK")
58 | 
59 |         loop = asyncio.get_event_loop()
60 |         server = await loop.create_server(web.Server(handler), bind_address, port)
61 |         logger.info(f"Healthcheck listening on {port}")
62 | 
63 |         try:
64 |             yield server
65 |         finally:
66 |             server.close()
67 |             await server.wait_closed()
68 | 
69 |     app.use_fixture("_healthcheck", healthcheck_fixture)
70 | 


--------------------------------------------------------------------------------
/src/pyncette/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import datetime
  4 | from dataclasses import dataclass
  5 | from enum import Enum
  6 | from typing import TYPE_CHECKING
  7 | from typing import Any
  8 | from collections.abc import AsyncIterator
  9 | from collections.abc import Awaitable
 10 | from typing import Callable
 11 | from typing import NewType
 12 | from typing import Protocol
 13 | from typing import TypeVar
 14 | 
 15 | T = TypeVar("T")
 16 | Decorator = Callable[[T], T]
 17 | Lease = NewType("Lease", object)
 18 | ContinuationToken = NewType("ContinuationToken", object)
 19 | 
 20 | # https://github.com/python/mypy/issues/708
 21 | 
 22 | 
 23 | class Heartbeater(Protocol):
 24 |     def __call__(self) -> Awaitable[None]:
 25 |         "Heartbeats on the message"
 26 | 
 27 | 
 28 | class Context:
 29 |     """Task execution context. This class can have dynamic attributes."""
 30 | 
 31 |     app_context: pyncette.PyncetteContext
 32 |     task: pyncette.task.Task
 33 |     scheduled_at: datetime.datetime
 34 |     _lease: Lease | None
 35 |     heartbeat: Heartbeater
 36 |     args: dict[str, Any]
 37 | 
 38 |     if TYPE_CHECKING:
 39 | 
 40 |         def __getattr__(self, name: str) -> Any: ...
 41 | 
 42 |         def __setattr__(self, name: str, value: Any) -> Any: ...
 43 | 
 44 | 
 45 | class TaskFunc(Protocol):
 46 |     def __call__(self, context: Context) -> Awaitable[None]:
 47 |         "Executes the task"
 48 | 
 49 | 
 50 | class PartitionSelector(Protocol):
 51 |     def __call__(self, partition_count: int, task_id: str) -> int:
 52 |         "Gets the partition number for a given task id"
 53 | 
 54 | 
 55 | class NextFunc(Protocol):
 56 |     def __call__(self) -> Awaitable[None]:
 57 |         "Enter the next middleware or the task body"
 58 | 
 59 | 
 60 | class MiddlewareFunc(Protocol):
 61 |     def __call__(self, context: Context, next: NextFunc) -> Awaitable[None]:
 62 |         "Executes the middleware"
 63 | 
 64 | 
 65 | class FixtureFunc(Protocol):
 66 |     def __call__(self, app_context: pyncette.PyncetteContext) -> AsyncIterator[Any]:
 67 |         "Executes the fixture"
 68 | 
 69 | 
 70 | class ResultType(Enum):
 71 |     """Status returned by polling the task"""
 72 | 
 73 |     MISSING = 0
 74 |     PENDING = 1
 75 |     READY = 2
 76 |     LOCKED = 3
 77 |     LEASE_MISMATCH = 4
 78 | 
 79 | 
 80 | class ExecutionMode(Enum):
 81 |     """The execution mode for a Pyncette task."""
 82 | 
 83 |     AT_LEAST_ONCE = 0
 84 |     AT_MOST_ONCE = 1
 85 | 
 86 | 
 87 | class FailureMode(Enum):
 88 |     """What should happen when a task fails."""
 89 | 
 90 |     NONE = 0
 91 |     UNLOCK = 1
 92 |     COMMIT = 2
 93 | 
 94 | 
 95 | @dataclass
 96 | class PollResponse:
 97 |     """The result of a task poll"""
 98 | 
 99 |     result: ResultType
100 |     scheduled_at: datetime.datetime
101 |     lease: Lease | None
102 | 
103 | 
104 | @dataclass
105 | class QueryResponse:
106 |     """The result of a task query"""
107 | 
108 |     tasks: list[tuple[pyncette.task.Task, Lease]]
109 |     continuation_token: ContinuationToken | None
110 | 
111 | 
112 | if TYPE_CHECKING:
113 |     import pyncette
114 |     import pyncette.task
115 | 


--------------------------------------------------------------------------------
/src/pyncette/mysql.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import contextlib
  3 | import datetime
  4 | import json
  5 | import logging
  6 | import re
  7 | import uuid
  8 | from contextlib import asynccontextmanager
  9 | from typing import Any
 10 | from collections.abc import AsyncIterator
 11 | from typing import Optional
 12 | 
 13 | import aiomysql
 14 | import dateutil.tz
 15 | import pymysql
 16 | 
 17 | from pyncette.errors import PyncetteException
 18 | from pyncette.model import ContinuationToken
 19 | from pyncette.model import ExecutionMode
 20 | from pyncette.model import Lease
 21 | from pyncette.model import PollResponse
 22 | from pyncette.model import QueryResponse
 23 | from pyncette.model import ResultType
 24 | from pyncette.repository import Repository
 25 | from pyncette.task import Task
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | def _from_timestamp(timestamp: Optional[float]) -> Optional[datetime.datetime]:
 31 |     if timestamp is None:
 32 |         return None
 33 |     else:
 34 |         return datetime.datetime.fromtimestamp(timestamp, dateutil.tz.UTC)
 35 | 
 36 | 
 37 | def _to_timestamp(date: Optional[datetime.datetime]) -> Optional[float]:
 38 |     if date is None:
 39 |         return None
 40 |     else:
 41 |         return date.timestamp()
 42 | 
 43 | 
 44 | _CONTINUATION_TOKEN = ContinuationToken(object())
 45 | 
 46 | 
 47 | class MySQLRepository(Repository):
 48 |     _pool: aiomysql.Pool
 49 |     _batch_size: int
 50 |     _table_name: str
 51 | 
 52 |     def __init__(
 53 |         self,
 54 |         pool: aiomysql.Pool,
 55 |         **kwargs: Any,
 56 |     ):
 57 |         self._pool = pool
 58 |         self._table_name = kwargs.get("mysql_table_name", "pyncette_tasks")
 59 |         self._batch_size = kwargs.get("batch_size", 100)
 60 | 
 61 |         if self._batch_size < 1:
 62 |             raise ValueError("Batch size must be greater than 0")
 63 |         if not re.match(r"^[a-z_]+$", self._table_name):
 64 |             raise ValueError("Table name can only contain lower-case letters and underscores")
 65 | 
 66 |     async def initialize(self) -> None:
 67 |         async with self._transaction() as cursor:
 68 |             await cursor.execute(
 69 |                 f"""
 70 |                 CREATE TABLE IF NOT EXISTS {self._table_name} (
 71 |                     name VARCHAR(256) PRIMARY KEY,
 72 |                     parent_name VARCHAR(256),
 73 |                     locked_until DOUBLE,
 74 |                     locked_by VARCHAR(256),
 75 |                     execute_after DOUBLE,
 76 |                     task_spec TEXT
 77 |                 );
 78 |                 """
 79 |             )
 80 | 
 81 |             try:
 82 |                 await cursor.execute(
 83 |                     f"""
 84 |                     CREATE INDEX due_tasks_{self._table_name}
 85 |                     ON {self._table_name} (parent_name, (GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0))));
 86 |                     """
 87 |                 )
 88 |             except pymysql.err.OperationalError as e:
 89 |                 code, _msg = e.args
 90 |                 # Index already exists
 91 |                 if code != 1061:
 92 |                     raise
 93 | 
 94 |     async def poll_dynamic_task(
 95 |         self,
 96 |         utc_now: datetime.datetime,
 97 |         task: Task,
 98 |         continuation_token: Optional[ContinuationToken] = None,
 99 |     ) -> QueryResponse:
100 |         async with self._transaction() as cursor:
101 |             locked_by = str(uuid.uuid4())
102 |             locked_until = utc_now + task.lease_duration
103 | 
104 |             await cursor.execute(
105 |                 f"""
106 |                 SELECT name, task_spec FROM {self._table_name}
107 |                 WHERE parent_name = %s AND GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= %s
108 |                 ORDER BY GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC
109 |                 LIMIT %s
110 |                 FOR UPDATE SKIP LOCKED
111 |                 """,
112 |                 (
113 |                     task.canonical_name,
114 |                     _to_timestamp(utc_now),
115 |                     self._batch_size,
116 |                 ),
117 |             )
118 |             ready_tasks = await cursor.fetchall()
119 | 
120 |             await cursor.executemany(
121 |                 f"""
122 |                 UPDATE {self._table_name}
123 |                 SET
124 |                     locked_until = %s,
125 |                     locked_by = %s
126 |                 WHERE name = %s
127 |                 """,
128 |                 [(_to_timestamp(locked_until), locked_by, record["name"]) for record in ready_tasks],
129 |             )
130 | 
131 |             logger.debug(f"poll_dynamic_task returned {ready_tasks}")
132 | 
133 |             return QueryResponse(
134 |                 tasks=[
135 |                     (
136 |                         task.instantiate_from_spec(json.loads(record["task_spec"])),
137 |                         Lease(locked_by),
138 |                     )
139 |                     for record in ready_tasks
140 |                 ],
141 |                 # May result in an extra round-trip if there were exactly
142 |                 # batch_size tasks available, but we deem this an acceptable
143 |                 # tradeoff.
144 |                 continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None,
145 |             )
146 | 
147 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
148 |         assert task.parent_task is not None
149 | 
150 |         async with self._transaction() as cursor:
151 |             execute_at = _to_timestamp(task.get_next_execution(utc_now, None))
152 |             task_spec = json.dumps(task.as_spec())
153 | 
154 |             await cursor.execute(
155 |                 f"""
156 |                 INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after)
157 |                 VALUES (%s, %s, %s, %s)
158 |                 ON DUPLICATE KEY UPDATE
159 |                     task_spec = %s,
160 |                     execute_after = %s,
161 |                     locked_by = NULL,
162 |                     locked_until = NULL
163 |                 """,
164 |                 (
165 |                     task.canonical_name,
166 |                     task.parent_task.canonical_name,
167 |                     task_spec,
168 |                     execute_at,
169 |                     task_spec,
170 |                     execute_at,
171 |                 ),
172 |             )
173 | 
174 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
175 |         async with self._transaction() as cursor:
176 |             await cursor.execute(
177 |                 f"DELETE FROM {self._table_name} WHERE name = %s",
178 |                 (task.canonical_name,),
179 |             )
180 | 
181 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse:
182 |         async with self._transaction() as cursor:
183 |             await cursor.execute(
184 |                 f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE",
185 |                 (task.canonical_name,),
186 |             )
187 |             record = await cursor.fetchone()
188 |             logger.debug(f"poll_task returned {record}")
189 | 
190 |             update = False
191 |             if record is None:
192 |                 # Regular (non-dynamic) tasks will be implicitly created on first poll,
193 |                 # but dynamic task instances must be explicitely created to prevent spurious
194 |                 # poll from re-creating them after being deleted.
195 |                 if task.parent_task is not None:
196 |                     raise PyncetteException("Task not found")
197 | 
198 |                 execute_after = task.get_next_execution(utc_now, None)
199 |                 locked_until = None
200 |                 locked_by = None
201 |                 update = True
202 |             else:
203 |                 execute_after = _from_timestamp(record["execute_after"])
204 |                 locked_until = _from_timestamp(record["locked_until"])
205 |                 locked_by = record["locked_by"]
206 | 
207 |             assert execute_after is not None
208 |             scheduled_at = execute_after
209 | 
210 |             if locked_until is not None and locked_until > utc_now and (lease != locked_by):
211 |                 result = ResultType.LOCKED
212 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE:
213 |                 execute_after = task.get_next_execution(utc_now, execute_after)
214 |                 result = ResultType.READY
215 |                 locked_until = None
216 |                 locked_by = None
217 |                 update = True
218 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE:
219 |                 locked_until = utc_now + task.lease_duration
220 |                 locked_by = str(uuid.uuid4())
221 |                 result = ResultType.READY
222 |                 update = True
223 |             else:
224 |                 result = ResultType.PENDING
225 | 
226 |             if update:
227 |                 await self._update_record(
228 |                     cursor,
229 |                     task,
230 |                     locked_until,
231 |                     locked_by,
232 |                     execute_after,
233 |                 )
234 | 
235 |             return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by)
236 | 
237 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
238 |         async with self._transaction() as cursor:
239 |             await cursor.execute(
240 |                 f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE",
241 |                 (task.canonical_name,),
242 |             )
243 | 
244 |             record = await cursor.fetchone()
245 |             logger.debug(f"commit_task returned {record}")
246 | 
247 |             if not record:
248 |                 logger.warning(f"Task {task} not found, skipping.")
249 |                 return
250 | 
251 |             if record["locked_by"] != lease:
252 |                 logger.warning(f"Lease lost on task {task}, skipping.")
253 |                 return
254 | 
255 |             await self._update_record(
256 |                 cursor,
257 |                 task,
258 |                 None,
259 |                 None,
260 |                 task.get_next_execution(utc_now, _from_timestamp(record["execute_after"])),
261 |             )
262 | 
263 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]:
264 |         async with self._transaction() as cursor:
265 |             locked_until = utc_now + task.lease_duration
266 |             await cursor.execute(
267 |                 f"""
268 |                 UPDATE {self._table_name}
269 |                 SET
270 |                     locked_until = %s
271 |                 WHERE name = %s AND locked_by = %s
272 |                 """,
273 |                 (
274 |                     _to_timestamp(locked_until),
275 |                     task.canonical_name,
276 |                     lease,
277 |                 ),
278 |             )
279 |             if cursor.rowcount == 1:
280 |                 return lease
281 |             else:
282 |                 return None
283 | 
284 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
285 |         async with self._transaction() as cursor:
286 |             await cursor.execute(
287 |                 f"""
288 |                 UPDATE {self._table_name}
289 |                 SET
290 |                     locked_by = NULL,
291 |                     locked_until = NULL
292 |                 WHERE name = %s AND locked_by = %s
293 |                 """,
294 |                 (
295 |                     task.canonical_name,
296 |                     lease,
297 |                 ),
298 |             )
299 | 
300 |     @asynccontextmanager
301 |     async def _transaction(self) -> AsyncIterator[aiomysql.Cursor]:
302 |         async with self._pool.acquire() as connection:
303 |             try:
304 |                 async with connection.cursor(aiomysql.DictCursor) as cursor:
305 |                     yield cursor
306 |             except Exception:
307 |                 await connection.rollback()
308 |                 raise
309 |             else:
310 |                 await connection.commit()
311 | 
312 |     async def _update_record(
313 |         self,
314 |         cursor: aiomysql.Cursor,
315 |         task: Task,
316 |         locked_until: Optional[datetime.datetime],
317 |         locked_by: Optional[str],
318 |         execute_after: Optional[datetime.datetime],
319 |     ) -> None:
320 |         if execute_after is None:
321 |             await cursor.execute(
322 |                 f"DELETE FROM {self._table_name} WHERE name = %s",
323 |                 (task.canonical_name,),
324 |             )
325 |         else:
326 |             await cursor.execute(
327 |                 f"""
328 |                 INSERT INTO {self._table_name} (name, locked_until, locked_by, execute_after)
329 |                 VALUES (%s, %s, %s, %s)
330 |                 ON DUPLICATE KEY UPDATE
331 |                     locked_until = %s,
332 |                     locked_by = %s,
333 |                     execute_after = %s
334 |                 """,
335 |                 (
336 |                     task.canonical_name,
337 |                     _to_timestamp(locked_until),
338 |                     locked_by,
339 |                     _to_timestamp(execute_after),
340 |                     _to_timestamp(locked_until),
341 |                     locked_by,
342 |                     _to_timestamp(execute_after),
343 |                 ),
344 |             )
345 | 
346 | 
347 | @contextlib.asynccontextmanager
348 | async def mysql_repository(
349 |     *,
350 |     mysql_host: str,
351 |     mysql_user: str,
352 |     mysql_database: str,
353 |     mysql_password: Optional[str] = None,
354 |     mysql_port: int = 3306,
355 |     **kwargs: Any,
356 | ) -> AsyncIterator[MySQLRepository]:
357 |     """Factory context manager that initializes the connection to MySQL"""
358 |     mysql_pool = await aiomysql.create_pool(
359 |         host=mysql_host,
360 |         port=mysql_port,
361 |         user=mysql_user,
362 |         password=mysql_password,
363 |         db=mysql_database,
364 |         loop=asyncio.get_running_loop(),
365 |     )
366 |     try:
367 |         repository = MySQLRepository(mysql_pool, **kwargs)
368 |         if not kwargs.get("mysql_skip_table_create", False):
369 |             await repository.initialize()
370 | 
371 |         yield repository
372 |     finally:
373 |         mysql_pool.close()
374 |         await mysql_pool.wait_closed()
375 | 


--------------------------------------------------------------------------------
/src/pyncette/postgres.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import datetime
  3 | import json
  4 | import logging
  5 | import re
  6 | import uuid
  7 | from contextlib import asynccontextmanager
  8 | from typing import Any
  9 | from collections.abc import AsyncIterator
 10 | from typing import Optional
 11 | 
 12 | import asyncpg
 13 | 
 14 | from pyncette.errors import PyncetteException
 15 | from pyncette.model import ContinuationToken
 16 | from pyncette.model import ExecutionMode
 17 | from pyncette.model import Lease
 18 | from pyncette.model import PollResponse
 19 | from pyncette.model import QueryResponse
 20 | from pyncette.model import ResultType
 21 | from pyncette.repository import Repository
 22 | from pyncette.task import Task
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | _CONTINUATION_TOKEN = ContinuationToken(object())
 28 | 
 29 | 
 30 | class PostgresRepository(Repository):
 31 |     _pool: asyncpg.pool.Pool
 32 |     _batch_size: int
 33 |     _table_name: str
 34 | 
 35 |     def __init__(
 36 |         self,
 37 |         pool: asyncpg.pool.Pool,
 38 |         **kwargs: Any,
 39 |     ):
 40 |         self._pool = pool
 41 |         self._table_name = kwargs.get("postgres_table_name", "pyncette_tasks")
 42 |         self._batch_size = kwargs.get("batch_size", 100)
 43 | 
 44 |         if self._batch_size < 1:
 45 |             raise ValueError("Batch size must be greater than 0")
 46 |         if not re.match(r"^[a-z_]+$", self._table_name):
 47 |             raise ValueError("Table name can only contain lower-case letters and underscores")
 48 | 
 49 |     async def initialize(self) -> None:
 50 |         async with self._transaction() as connection:
 51 |             await connection.execute(
 52 |                 f"""
 53 |                 CREATE TABLE IF NOT EXISTS {self._table_name} (
 54 |                     name text PRIMARY KEY,
 55 |                     parent_name text,
 56 |                     locked_until timestamptz,
 57 |                     locked_by uuid,
 58 |                     execute_after timestamptz,
 59 |                     task_spec json
 60 |                 );
 61 |                 CREATE INDEX IF NOT EXISTS due_tasks_{self._table_name}
 62 |                 ON {self._table_name} (parent_name, GREATEST(locked_until, execute_after));
 63 |                 """
 64 |             )
 65 | 
 66 |     async def poll_dynamic_task(
 67 |         self,
 68 |         utc_now: datetime.datetime,
 69 |         task: Task,
 70 |         continuation_token: Optional[ContinuationToken] = None,
 71 |     ) -> QueryResponse:
 72 |         async with self._transaction() as connection:
 73 |             locked_by = uuid.uuid4()
 74 |             locked_until = utc_now + task.lease_duration
 75 | 
 76 |             ready_tasks = await connection.fetch(
 77 |                 f"""
 78 |                 UPDATE {self._table_name} a
 79 |                 SET
 80 |                     locked_until = $4,
 81 |                     locked_by = $5
 82 |                 FROM (
 83 |                     SELECT name FROM {self._table_name}
 84 |                     WHERE parent_name = $1 AND GREATEST(locked_until, execute_after) <= $2
 85 |                     ORDER BY GREATEST(locked_until, execute_after) ASC
 86 |                     LIMIT $3
 87 |                     FOR UPDATE SKIP LOCKED
 88 |                 ) b
 89 |                 WHERE a.name = b.name
 90 |                 RETURNING *
 91 |                 """,
 92 |                 task.canonical_name,
 93 |                 utc_now,
 94 |                 self._batch_size,
 95 |                 locked_until,
 96 |                 locked_by,
 97 |             )
 98 |             logger.debug(f"poll_dynamic_task returned {ready_tasks}")
 99 | 
100 |             return QueryResponse(
101 |                 tasks=[
102 |                     (
103 |                         task.instantiate_from_spec(json.loads(record["task_spec"])),
104 |                         Lease(locked_by),
105 |                     )
106 |                     for record in ready_tasks
107 |                 ],
108 |                 # May result in an extra round-trip if there were exactly
109 |                 # batch_size tasks available, but we deem this an acceptable
110 |                 # tradeoff.
111 |                 continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None,
112 |             )
113 | 
114 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
115 |         assert task.parent_task is not None
116 | 
117 |         async with self._transaction() as connection:
118 |             result = await connection.execute(
119 |                 f"""
120 |                 INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after)
121 |                 VALUES ($1, $2, $3, $4)
122 |                 ON CONFLICT (name) DO UPDATE
123 |                 SET
124 |                     task_spec = $3,
125 |                     execute_after = $4,
126 |                     locked_by = NULL,
127 |                     locked_until = NULL
128 |                 """,
129 |                 task.canonical_name,
130 |                 task.parent_task.canonical_name,
131 |                 json.dumps(task.as_spec()),
132 |                 task.get_next_execution(utc_now, None),
133 |             )
134 |             logger.debug(f"register_task returned {result}")
135 | 
136 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
137 |         async with self._transaction() as connection:
138 |             await connection.execute(f"DELETE FROM {self._table_name} WHERE name = $1", task.canonical_name)
139 | 
140 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse:
141 |         async with self._transaction() as connection:
142 |             record = await connection.fetchrow(
143 |                 f"SELECT * FROM {self._table_name} WHERE name = $1 FOR UPDATE",
144 |                 task.canonical_name,
145 |             )
146 |             logger.debug(f"poll_task returned {record}")
147 | 
148 |             update = False
149 |             if record is None:
150 |                 # Regular (non-dynamic) tasks will be implicitly created on first poll,
151 |                 # but dynamic task instances must be explicitely created to prevent spurious
152 |                 # poll from re-creating them after being deleted.
153 |                 if task.parent_task is not None:
154 |                     raise PyncetteException("Task not found")
155 | 
156 |                 execute_after = task.get_next_execution(utc_now, None)
157 |                 locked_until = None
158 |                 locked_by = None
159 |                 update = True
160 |             else:
161 |                 execute_after = record["execute_after"]
162 |                 locked_until = record["locked_until"]
163 |                 locked_by = record["locked_by"]
164 | 
165 |             assert execute_after is not None
166 |             scheduled_at = execute_after
167 | 
168 |             if locked_until is not None and locked_until > utc_now and (lease != locked_by):
169 |                 result = ResultType.LOCKED
170 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE:
171 |                 execute_after = task.get_next_execution(utc_now, execute_after)
172 |                 result = ResultType.READY
173 |                 locked_until = None
174 |                 locked_by = None
175 |                 update = True
176 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE:
177 |                 locked_until = utc_now + task.lease_duration
178 |                 locked_by = uuid.uuid4()
179 |                 result = ResultType.READY
180 |                 update = True
181 |             else:
182 |                 result = ResultType.PENDING
183 | 
184 |             if update:
185 |                 await self._update_record(
186 |                     connection,
187 |                     task,
188 |                     locked_until,
189 |                     locked_by,
190 |                     execute_after,
191 |                 )
192 | 
193 |             return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by)
194 | 
195 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
196 |         async with self._transaction() as connection:
197 |             record = await connection.fetchrow(
198 |                 f"SELECT * FROM {self._table_name} WHERE name = $1 FOR UPDATE",
199 |                 task.canonical_name,
200 |             )
201 |             logger.debug(f"commit_task returned {record}")
202 | 
203 |             if not record:
204 |                 logger.warning(f"Task {task} not found, skipping.")
205 |                 return
206 | 
207 |             if record["locked_by"] != lease:
208 |                 logger.warning(f"Lease lost on task {task}, skipping.")
209 |                 return
210 | 
211 |             await self._update_record(
212 |                 connection,
213 |                 task,
214 |                 None,
215 |                 None,
216 |                 task.get_next_execution(utc_now, record["execute_after"]),
217 |             )
218 | 
219 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]:
220 |         async with self._transaction() as connection:
221 |             locked_until = utc_now + task.lease_duration
222 |             result = await connection.execute(
223 |                 f"""
224 |                 UPDATE {self._table_name}
225 |                 SET
226 |                     locked_until = $1
227 |                 WHERE name = $2 AND locked_by = $3
228 |                 """,
229 |                 locked_until,
230 |                 task.canonical_name,
231 |                 lease,
232 |             )
233 |             logger.debug(f"extend_lease returned {result}")
234 |             if result == "UPDATE 1":
235 |                 return lease
236 |             else:
237 |                 return None
238 | 
239 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
240 |         async with self._transaction() as connection:
241 |             result = await connection.execute(
242 |                 f"""
243 |                 UPDATE {self._table_name}
244 |                 SET
245 |                     locked_by = NULL,
246 |                     locked_until = NULL
247 |                 WHERE name = $1 AND locked_by = $2
248 |                 """,
249 |                 task.canonical_name,
250 |                 lease,
251 |             )
252 |             logger.debug(f"unlock_task returned {result}")
253 | 
254 |     @asynccontextmanager
255 |     async def _transaction(self) -> AsyncIterator[asyncpg.Connection]:
256 |         async with self._pool.acquire() as connection:
257 |             async with connection.transaction():
258 |                 yield connection
259 | 
260 |     async def _update_record(
261 |         self,
262 |         connection: asyncpg.Connection,
263 |         task: Task,
264 |         locked_until: Optional[datetime.datetime],
265 |         locked_by: Optional[uuid.UUID],
266 |         execute_after: Optional[datetime.datetime],
267 |     ) -> None:
268 |         if execute_after is None:
269 |             result = await connection.execute(f"DELETE FROM {self._table_name} WHERE name = $1", task.canonical_name)
270 |         else:
271 |             result = await connection.execute(
272 |                 f"""
273 |                 INSERT INTO {self._table_name} (name, locked_until, locked_by, execute_after)
274 |                 VALUES ($1, $2, $3, $4)
275 |                 ON CONFLICT (name) DO UPDATE
276 |                 SET
277 |                     locked_until = $2,
278 |                     locked_by = $3,
279 |                     execute_after = $4
280 |                 """,
281 |                 task.canonical_name,
282 |                 locked_until,
283 |                 locked_by,
284 |                 execute_after,
285 |             )
286 |         logger.debug(f"update_record returned {result}")
287 | 
288 | 
289 | @contextlib.asynccontextmanager
290 | async def postgres_repository(**kwargs: Any) -> AsyncIterator[PostgresRepository]:
291 |     """Factory context manager for repository that initializes the connection to Postgres"""
292 |     postgres_pool = await asyncpg.create_pool(kwargs["postgres_url"])
293 |     try:
294 |         repository = PostgresRepository(postgres_pool, **kwargs)
295 |         if not kwargs.get("postgres_skip_table_create", False):
296 |             await repository.initialize()
297 | 
298 |         yield repository
299 |     finally:
300 |         await postgres_pool.close()
301 | 


--------------------------------------------------------------------------------
/src/pyncette/prometheus.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import datetime
  3 | import math
  4 | import time
  5 | from typing import Any
  6 | from collections.abc import AsyncIterator
  7 | from collections.abc import Awaitable
  8 | from typing import Callable
  9 | from typing import Optional
 10 | 
 11 | from prometheus_client import Counter
 12 | from prometheus_client import Gauge
 13 | from prometheus_client import Histogram
 14 | 
 15 | from . import pyncette
 16 | from .model import Context
 17 | from .model import ContinuationToken
 18 | from .model import Lease
 19 | from .model import PollResponse
 20 | from .model import QueryResponse
 21 | from .pyncette import Pyncette
 22 | from .pyncette import PyncetteContext
 23 | from .repository import Repository
 24 | from .repository import RepositoryFactory
 25 | from .task import Task
 26 | 
 27 | TASK_LABELS = ["task_name"]
 28 | 
 29 | 
 30 | def _get_task_labels(task: Task) -> dict[str, str]:
 31 |     # Instances of dynamic tasks can have high cardinality, so we choose the task template name
 32 |     return {"task_name": task.parent_task.name if task.parent_task else task.name}
 33 | 
 34 | 
 35 | class OperationMetricSet:
 36 |     """Collection of Prometheus metrics representing a logical operation"""
 37 | 
 38 |     requests: Counter
 39 |     requests_duration: Histogram
 40 |     exceptions: Counter
 41 |     requests_in_progress: Gauge
 42 | 
 43 |     def __init__(self, operation_name: str, labels: list[str]):
 44 |         self.requests = Counter(
 45 |             f"pyncette_{operation_name}_total",
 46 |             f"Total count of {operation_name} operations",
 47 |             labels,
 48 |         )
 49 |         self.requests_duration = Histogram(
 50 |             f"pyncette_{operation_name}_duration_seconds",
 51 |             f"Histogram of {operation_name} processing time",
 52 |             labels,
 53 |         )
 54 |         self.exceptions = Counter(
 55 |             f"pyncette_{operation_name}_failures_total",
 56 |             f"Total count of failed {operation_name} failures",
 57 |             [*labels, "exception_type"],
 58 |         )
 59 |         self.requests_in_progress = Gauge(
 60 |             f"pyncette_{operation_name}_in_progress",
 61 |             f"Gauge of {operation_name} operations currently being processed",
 62 |             labels,
 63 |         )
 64 | 
 65 |     @contextlib.asynccontextmanager
 66 |     async def measure(self, **labels: str) -> AsyncIterator[None]:
 67 |         """An async context manager that measures the execution of the wrapped code"""
 68 |         if labels:
 69 |             self.requests_in_progress.labels(**labels).inc()
 70 |             self.requests.labels(**labels).inc()
 71 |         else:
 72 |             self.requests_in_progress.inc()
 73 |             self.requests.inc()
 74 | 
 75 |         before_time = time.perf_counter()
 76 |         try:
 77 |             yield
 78 |         except Exception as e:
 79 |             self.exceptions.labels(**labels, exception_type=type(e).__name__).inc()
 80 |             raise e from None
 81 |         finally:
 82 |             if labels:
 83 |                 self.requests_duration.labels(**labels).observe(time.perf_counter() - before_time)
 84 |                 self.requests_in_progress.labels(**labels).dec()
 85 |             else:
 86 |                 self.requests_duration.observe(time.perf_counter() - before_time)
 87 |                 self.requests_in_progress.dec()
 88 | 
 89 | 
 90 | class MeteredRepository(Repository):
 91 |     """A wrapper for repository that exposes metrics to Prometheus"""
 92 | 
 93 |     def __init__(self, metric_set: OperationMetricSet, inner_repository: Repository):
 94 |         self._metric_set = metric_set
 95 |         self._inner = inner_repository
 96 | 
 97 |     async def poll_dynamic_task(
 98 |         self,
 99 |         utc_now: datetime.datetime,
100 |         task: Task,
101 |         continuation_token: Optional[ContinuationToken] = None,
102 |     ) -> QueryResponse:
103 |         """Queries the dynamic tasks for execution"""
104 |         async with self._metric_set.measure(operation="poll_dynamic_task", **_get_task_labels(task)):
105 |             return await self._inner.poll_dynamic_task(utc_now, task, continuation_token)
106 | 
107 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
108 |         """Registers a dynamic task"""
109 |         async with self._metric_set.measure(operation="register_task", **_get_task_labels(task)):
110 |             return await self._inner.register_task(utc_now, task)
111 | 
112 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
113 |         """Deregisters a dynamic task implementation"""
114 |         async with self._metric_set.measure(operation="unregister_task", **_get_task_labels(task)):
115 |             return await self._inner.unregister_task(utc_now, task)
116 | 
117 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse:
118 |         """Polls the task to determine whether it is ready for execution"""
119 |         async with self._metric_set.measure(operation="poll_task", **_get_task_labels(task)):
120 |             return await self._inner.poll_task(utc_now, task, lease)
121 | 
122 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
123 |         """Commits the task, which signals a successful run."""
124 |         async with self._metric_set.measure(operation="commit_task", **_get_task_labels(task)):
125 |             return await self._inner.commit_task(utc_now, task, lease)
126 | 
127 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]:
128 |         """Extends the lease on the task. Returns the new lease if lease was still valid."""
129 |         async with self._metric_set.measure(operation="extend_lease", **_get_task_labels(task)):
130 |             return await self._inner.extend_lease(utc_now, task, lease)
131 | 
132 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
133 |         """Unlocks the task, making it eligible for retries in case execution failed."""
134 |         async with self._metric_set.measure(operation="unlock_task", **_get_task_labels(task)):
135 |             return await self._inner.unlock_task(utc_now, task, lease)
136 | 
137 | 
138 | _task_metric_set = OperationMetricSet("tasks", TASK_LABELS)
139 | _task_staleness = Histogram(
140 |     "pyncette_tasks_staleness_seconds",
141 |     "Histogram of staleness of task executions (difference between scheduled and actual time)",
142 |     TASK_LABELS,
143 |     buckets=(
144 |         0.05,
145 |         0.1,
146 |         0.25,
147 |         0.5,
148 |         0.75,
149 |         1.0,
150 |         2.5,
151 |         5.0,
152 |         7.5,
153 |         10.0,
154 |         25.0,
155 |         50.0,
156 |         75.0,
157 |         100.0,
158 |         250.0,
159 |         500.0,
160 |         750.0,
161 |         1000.0,
162 |         math.inf,
163 |     ),
164 | )
165 | 
166 | 
167 | async def prometheus_middleware(context: Context, next: Callable[[], Awaitable[None]]) -> None:
168 |     """Middleware that exposes task execution metrics to Prometheus"""
169 |     labels = _get_task_labels(context.task)
170 |     staleness = pyncette._current_time() - context.scheduled_at
171 |     _task_staleness.labels(**labels).observe(staleness.total_seconds())
172 |     async with _task_metric_set.measure(**labels):
173 |         await next()
174 | 
175 | 
176 | _repository_metric_set = OperationMetricSet("repository_ops", ["operation", *TASK_LABELS])
177 | 
178 | _ticks_metric_set = OperationMetricSet("ticks", [])
179 | 
180 | 
181 | def with_prometheus_repository(
182 |     repository_factory: RepositoryFactory,
183 | ) -> RepositoryFactory:
184 |     """Wraps the repository factory into one that exposes the metrics via Prometheus"""
185 | 
186 |     @contextlib.asynccontextmanager
187 |     async def _repository_factory(**kwargs: Any) -> AsyncIterator[MeteredRepository]:
188 |         async with repository_factory(**kwargs) as inner_repository:
189 |             yield MeteredRepository(_repository_metric_set, inner_repository)
190 | 
191 |     return _repository_factory
192 | 
193 | 
194 | async def prometheus_fixture(app_context: PyncetteContext) -> AsyncIterator[None]:
195 |     tick_func = app_context._tick
196 | 
197 |     async def _metered_tick(*args: Any, **kwargs: Any) -> Any:
198 |         async with _ticks_metric_set.measure():
199 |             return await tick_func(*args, **kwargs)
200 | 
201 |     app_context._tick = _metered_tick  # type: ignore
202 |     yield
203 | 
204 | 
205 | def use_prometheus(
206 |     app: Pyncette,
207 |     measure_repository: bool = True,
208 |     measure_ticks: bool = True,
209 |     measure_tasks: bool = True,
210 | ) -> None:
211 |     """
212 |     Decorate Pyncette app with Prometheus metric exporter.
213 | 
214 |     :param measure_repository: Whether to measure repository operations
215 |     :param measure_ticks: Whether to measure ticks
216 |     :param measure_tasks: Whether to measure individual task executions
217 |     """
218 |     if measure_repository:
219 |         app._repository_factory = with_prometheus_repository(app._repository_factory)
220 |     if measure_ticks:
221 |         app.use_fixture("_prometheus", prometheus_fixture)
222 |     if measure_tasks:
223 |         app.use_middleware(prometheus_middleware)
224 | 


--------------------------------------------------------------------------------
/src/pyncette/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tibordp/pyncette/053bf10747b9301b5b4993c0c25c2a6eb63ab5d2/src/pyncette/py.typed


--------------------------------------------------------------------------------
/src/pyncette/redis/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import contextlib
  4 | import datetime
  5 | import json
  6 | import logging
  7 | import uuid
  8 | from dataclasses import dataclass
  9 | from importlib.resources import read_text
 10 | from typing import Any
 11 | from collections.abc import AsyncIterator
 12 | 
 13 | import redis
 14 | from redis import asyncio as aioredis
 15 | 
 16 | from pyncette.errors import PyncetteException
 17 | from pyncette.model import ContinuationToken
 18 | from pyncette.model import Lease
 19 | from pyncette.model import PollResponse
 20 | from pyncette.model import QueryResponse
 21 | from pyncette.model import ResultType
 22 | from pyncette.repository import Repository
 23 | from pyncette.task import Task
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | _CONTINUATION_TOKEN = ContinuationToken(object())
 29 | 
 30 | 
 31 | class _LuaScript:
 32 |     """A wrapper for Redis lua scripts that automaticaly reloads it if e.g. SCRIPT FLUSH is invoked"""
 33 | 
 34 |     _script: str
 35 |     _sha: str | None
 36 | 
 37 |     def __init__(self, script_path: str):
 38 |         self._script = read_text(__name__, script_path)
 39 |         self._sha = None
 40 | 
 41 |     async def register(self, client: aioredis.Redis) -> None:
 42 |         self._sha = await client.script_load(self._script)
 43 | 
 44 |     async def execute(
 45 |         self,
 46 |         client: aioredis.Redis,
 47 |         keys: list[Any] | None = None,
 48 |         args: list[Any] | None = None,
 49 |     ) -> Any:
 50 |         if self._sha is None:
 51 |             await self.register(client)
 52 | 
 53 |         keys = keys or []
 54 |         args = args or []
 55 | 
 56 |         for _ in range(3):
 57 |             try:
 58 |                 return await client.evalsha(self._sha, len(keys), *keys, *args)  # ty: ignore[invalid-await]
 59 |             except redis.exceptions.NoScriptError:
 60 |                 logger.warning("We seem to have lost the LUA script, reloading...")
 61 |                 await self.register(client)
 62 | 
 63 |         raise PyncetteException("Could not reload the Lua script.")
 64 | 
 65 | 
 66 | @dataclass
 67 | class _ManageScriptResponse:
 68 |     result: ResultType
 69 |     version: int
 70 |     execute_after: datetime.datetime | None
 71 |     locked_until: datetime.datetime | None
 72 |     task_spec: dict[str, Any] | None
 73 |     locked_by: str | None
 74 | 
 75 |     @classmethod
 76 |     def from_response(cls, response: list[bytes]) -> _ManageScriptResponse:
 77 |         return cls(
 78 |             result=ResultType[response[0].decode()],
 79 |             version=int(response[1] or 0),
 80 |             execute_after=None if response[2] is None else datetime.datetime.fromisoformat(response[2].decode()),
 81 |             locked_until=None if response[3] is None else datetime.datetime.fromisoformat(response[3].decode()),
 82 |             locked_by=None if response[4] is None else response[4].decode(),
 83 |             task_spec=None if response[5] is None else json.loads(response[5]),
 84 |         )
 85 | 
 86 | 
 87 | def _create_dynamic_task(task: Task, response_data: list[bytes]) -> tuple[Task, Lease]:
 88 |     task_data = _ManageScriptResponse.from_response(response_data)
 89 |     assert task_data.task_spec is not None
 90 | 
 91 |     return (task.instantiate_from_spec(task_data.task_spec), Lease(task_data))
 92 | 
 93 | 
 94 | class RedisRepository(Repository):
 95 |     """Redis-backed store for Pyncete task execution data"""
 96 | 
 97 |     _redis_client: aioredis.Redis
 98 |     _namespace: str
 99 |     _manage_script: _LuaScript
100 |     _poll_dynamic_script: _LuaScript
101 | 
102 |     def __init__(self, redis_client: aioredis.Redis, **kwargs: Any):
103 |         self._redis_client = redis_client
104 |         self._namespace = kwargs.get("redis_namespace", "")
105 |         self._batch_size = kwargs.get("batch_size", 100)
106 |         self._poll_dynamic_script = _LuaScript("poll_dynamic.lua")
107 |         self._manage_script = _LuaScript("manage.lua")
108 | 
109 |         if self._batch_size < 1:
110 |             raise ValueError("Batch size must be greater than 0")
111 | 
112 |     async def register_scripts(self) -> None:
113 |         """Registers the Lua scripts used by the implementation ahead of time"""
114 |         await self._poll_dynamic_script.register(self._redis_client)
115 |         await self._manage_script.register(self._redis_client)
116 | 
117 |     async def poll_dynamic_task(
118 |         self,
119 |         utc_now: datetime.datetime,
120 |         task: Task,
121 |         continuation_token: ContinuationToken | None = None,
122 |     ) -> QueryResponse:
123 |         new_locked_until = utc_now + task.lease_duration
124 |         response = await self._poll_dynamic_script.execute(
125 |             self._redis_client,
126 |             keys=[self._get_task_index_key(task)],
127 |             args=[
128 |                 utc_now.isoformat(),
129 |                 self._batch_size,
130 |                 new_locked_until.isoformat(),
131 |                 str(uuid.uuid4()),
132 |             ],
133 |         )
134 |         logger.debug(f"query_lua script returned [{self._batch_size}] {response}")
135 | 
136 |         return QueryResponse(
137 |             tasks=[_create_dynamic_task(task, response_data) for response_data in response[1:]],
138 |             continuation_token=_CONTINUATION_TOKEN if response[0] == b"HAS_MORE" else None,
139 |         )
140 | 
141 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
142 |         execute_after = task.get_next_execution(utc_now, None)
143 |         assert execute_after is not None
144 | 
145 |         await self._manage_record(
146 |             task,
147 |             "REGISTER",
148 |             execute_after.isoformat(),
149 |             json.dumps(task.as_spec()),
150 |         )
151 | 
152 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
153 |         await self._manage_record(task, "UNREGISTER")
154 | 
155 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Lease | None = None) -> PollResponse:
156 |         # Nominally, we need at least two round-trips to Redis since the next execute_after is calculated
157 |         # in Python code due to extra flexibility. This is why we have optimistic locking below to ensure that
158 |         # the next execution time was calculated using a correct base if another process modified it in between.
159 |         # In most cases, however, we can assume that the base time has not changed since the last invocation,
160 |         # so by caching it, we can poll a task using a single round-trip (if we are wrong, the loop below will still
161 |         # ensure correctness as the version will not match).
162 |         last_lease: _ManageScriptResponse | None = getattr(task, "_last_lease", None)
163 |         if isinstance(lease, _ManageScriptResponse):
164 |             version, execute_after, locked_by = (
165 |                 lease.version,
166 |                 lease.execute_after,
167 |                 lease.locked_by,
168 |             )
169 |         elif last_lease is not None:
170 |             logger.debug("Using cached values for execute_after")
171 |             version, execute_after, locked_by = (
172 |                 last_lease.version,
173 |                 last_lease.execute_after,
174 |                 str(uuid.uuid4()),
175 |             )
176 |         else:
177 |             # By default we assume that the task is brand new
178 |             version, execute_after, locked_by = (
179 |                 0,
180 |                 None,
181 |                 str(uuid.uuid4()),
182 |             )
183 | 
184 |         new_locked_until = utc_now + task.lease_duration
185 |         for _ in range(5):
186 |             next_execution = task.get_next_execution(utc_now, execute_after)
187 |             response = await self._manage_record(
188 |                 task,
189 |                 "POLL",
190 |                 task.execution_mode.name,
191 |                 "REGULAR" if task.parent_task is None else "DYNAMIC",
192 |                 utc_now.isoformat(),
193 |                 version,
194 |                 next_execution.isoformat() if next_execution is not None else "",
195 |                 new_locked_until.isoformat(),
196 |                 locked_by,
197 |             )
198 |             task._last_lease = response  # type: ignore
199 | 
200 |             if response.result == ResultType.LEASE_MISMATCH:
201 |                 logger.debug("Lease mismatch, retrying.")
202 |                 execute_after = response.execute_after
203 |                 version = response.version
204 |             elif response.result == ResultType.MISSING:
205 |                 raise PyncetteException("Task not found")
206 |             else:
207 |                 return PollResponse(
208 |                     result=response.result,
209 |                     scheduled_at=execute_after,  # ty: ignore[invalid-argument-type]
210 |                     lease=Lease(response),
211 |                 )
212 | 
213 |         raise PyncetteException("Unable to acquire the lock on the task due to contention")
214 | 
215 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
216 |         assert isinstance(lease, _ManageScriptResponse)
217 |         next_execution = task.get_next_execution(utc_now, lease.execute_after)
218 |         response = await self._manage_record(
219 |             task,
220 |             "COMMIT",
221 |             lease.version,
222 |             lease.locked_by,
223 |             next_execution.isoformat() if next_execution is not None else "",
224 |         )
225 |         task._last_lease = response  # type: ignore
226 |         if response.result == ResultType.LEASE_MISMATCH:
227 |             logger.info("Not commiting, as we have lost the lease")
228 | 
229 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
230 |         assert isinstance(lease, _ManageScriptResponse)
231 |         response = await self._manage_record(task, "UNLOCK", lease.version, lease.locked_by)
232 |         task._last_lease = response  # type: ignore
233 |         if response.result == ResultType.LEASE_MISMATCH:
234 |             logger.info("Not unlocking, as we have lost the lease")
235 | 
236 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Lease | None:
237 |         assert isinstance(lease, _ManageScriptResponse)
238 |         new_locked_until = utc_now + task.lease_duration
239 |         response = await self._manage_record(task, "EXTEND", lease.version, lease.locked_by, new_locked_until.isoformat())
240 |         task._last_lease = response  # type: ignore
241 | 
242 |         if response.result == ResultType.READY:
243 |             return Lease(response)
244 |         else:
245 |             return None
246 | 
247 |     async def _manage_record(self, task: Task, *args: Any) -> _ManageScriptResponse:
248 |         response = await self._manage_script.execute(
249 |             self._redis_client,
250 |             keys=[
251 |                 self._get_task_record_key(task),
252 |                 self._get_task_index_key(task.parent_task),
253 |             ],
254 |             args=list(args),
255 |         )
256 |         logger.debug(f"manage_lua script returned {response}")
257 |         return _ManageScriptResponse.from_response(response)
258 | 
259 |     def _get_task_record_key(self, task: Task) -> str:
260 |         return f"pyncette:{self._namespace}:task:{task.canonical_name}"
261 | 
262 |     def _get_task_index_key(self, task: Task | None) -> str:
263 |         # A prefix-coded index key, so there are no restrictions on task names.
264 |         index_name = f"index:{task.canonical_name}" if task else "index"
265 |         return f"pyncette:{self._namespace}:{index_name}"
266 | 
267 | 
268 | @contextlib.asynccontextmanager
269 | async def redis_repository(**kwargs: Any) -> AsyncIterator[RedisRepository]:
270 |     """Factory context manager for Redis repository that initializes the connection to Redis"""
271 |     if not isinstance(kwargs["redis_url"], str):
272 |         raise PyncetteException("Redis URL is required")
273 | 
274 |     async with aioredis.from_url(kwargs["redis_url"]) as redis_pool:
275 |         repository = RedisRepository(redis_pool, **kwargs)
276 |         await repository.register_scripts()
277 |         yield repository
278 | 


--------------------------------------------------------------------------------
/src/pyncette/redis/manage.lua:
--------------------------------------------------------------------------------
  1 | local version, execute_after, locked_until, locked_by, task_spec = unpack(redis.call('hmget', KEYS[1], 'version', 'execute_after', 'locked_until', 'locked_by', 'task_spec'))
  2 | local key_exists = version ~= false
  3 | local result
  4 | 
  5 | local function getIndexKey()
  6 |     if not locked_until or locked_until < execute_after then
  7 |         return execute_after .. '_' .. KEYS[1]
  8 |     else
  9 |         return locked_until .. '_' .. KEYS[1]
 10 |     end
 11 | end
 12 | 
 13 | local function setKey(attr, val)
 14 |     if val == false then
 15 |         redis.call('hdel', KEYS[1], attr)
 16 |     else
 17 |         redis.call('hset', KEYS[1], attr, val)
 18 |     end
 19 | end
 20 | 
 21 | -- Update the task data while also updating the index
 22 | local function updateRecord(new_execute_after, new_locked_until, new_locked_by, new_task_spec)
 23 |     redis.call('zrem', KEYS[2], getIndexKey())
 24 |     version, execute_after, locked_until, locked_by, task_spec = version + 1, new_execute_after, new_locked_until, new_locked_by, new_task_spec
 25 |     setKey('version', version)
 26 |     setKey('execute_after', execute_after)
 27 |     setKey('locked_until', locked_until)
 28 |     setKey('locked_by', locked_by)
 29 |     setKey('task_spec', task_spec)
 30 |     redis.call('zadd', KEYS[2], 0, getIndexKey())
 31 | end
 32 | 
 33 | local function deleteRecord()
 34 |     redis.call('zrem', KEYS[2], getIndexKey())
 35 |     version, execute_after, locked_until, locked_by, task_spec = false, false, false, false, false
 36 |     redis.call('del', KEYS[1])
 37 | end
 38 | 
 39 | 
 40 | if ARGV[1] == 'POLL' then
 41 |     local _, mode, task_type, utc_now, incoming_version, incoming_execute_after, incoming_locked_until, incoming_locked_by = unpack(ARGV)
 42 | 
 43 |     if not key_exists and task_type == "REGULAR"  then
 44 |         version, execute_after = incoming_version, incoming_execute_after
 45 |         redis.call('hmset', KEYS[1], 'version', version, 'execute_after', execute_after)
 46 |         redis.call('zadd', KEYS[2], 0, getIndexKey())
 47 |     end
 48 | 
 49 |     if not key_exists and task_type == "DYNAMIC"  then
 50 |         result = "MISSING"
 51 |     elseif locked_until and utc_now < locked_until and not (version == incoming_version and locked_by == incoming_locked_by) then
 52 |         result = "LOCKED"
 53 |     elseif execute_after <= utc_now and version ~= incoming_version then
 54 |         result = "LEASE_MISMATCH"
 55 |     elseif execute_after <= utc_now and mode == 'AT_MOST_ONCE' and incoming_execute_after == '' then
 56 |         deleteRecord()
 57 |         result = "READY"
 58 |     elseif execute_after <= utc_now and mode == 'AT_MOST_ONCE' then
 59 |         updateRecord(incoming_execute_after, false, false, task_spec)
 60 |         result = "READY"
 61 |     elseif execute_after <= utc_now and mode == 'AT_LEAST_ONCE' then
 62 |         updateRecord(execute_after, incoming_locked_until, incoming_locked_by, task_spec)
 63 |         result = "READY"
 64 |     else
 65 |         result = "PENDING"
 66 |     end
 67 | elseif ARGV[1] == 'COMMIT' then
 68 |     local _, incoming_version, incoming_locked_by, incoming_execute_after = unpack(ARGV)
 69 | 
 70 |     if not (version == incoming_version and locked_by == incoming_locked_by) then
 71 |         result = "LEASE_MISMATCH"
 72 |     elseif incoming_execute_after == '' then
 73 |         deleteRecord()
 74 |         result = "READY"
 75 |     else
 76 |         updateRecord(incoming_execute_after, false, false, task_spec)
 77 |         result = "READY"
 78 |     end
 79 | elseif ARGV[1] == 'UNLOCK' then
 80 |     local _, incoming_version, incoming_locked_by = unpack(ARGV)
 81 | 
 82 |     if version == incoming_version and locked_by == incoming_locked_by then
 83 |         updateRecord(execute_after, false, false, task_spec)
 84 |         result = "READY"
 85 |     else
 86 |         result = "LEASE_MISMATCH"
 87 |     end
 88 | elseif ARGV[1] == 'EXTEND' then
 89 |     local _, incoming_version, incoming_locked_by, incoming_locked_until = unpack(ARGV)
 90 | 
 91 |     if version == incoming_version and locked_by == incoming_locked_by then
 92 |         updateRecord(execute_after, incoming_locked_until, incoming_locked_by, task_spec)
 93 |         result = "READY"
 94 |     else
 95 |         result = "LEASE_MISMATCH"
 96 |     end
 97 | elseif ARGV[1] == 'REGISTER' then
 98 |     local _, incoming_execute_after, incoming_task_spec = unpack(ARGV)
 99 | 
100 |     if not key_exists then
101 |         version, execute_after, task_spec = 0, incoming_execute_after, incoming_task_spec
102 |         redis.call('hmset', KEYS[1], 'version', version, 'execute_after', execute_after, 'task_spec', task_spec)
103 |         redis.call('zadd', KEYS[2], 0, getIndexKey())
104 |     else
105 |         updateRecord(incoming_execute_after, false, false, incoming_task_spec)
106 |     end
107 | 
108 |     result = "READY"
109 | elseif ARGV[1] == 'UNREGISTER' then
110 |     if key_exists then
111 |         deleteRecord()
112 |     end
113 | 
114 |     result = "READY"
115 | end
116 | 
117 | return { result, version, execute_after, locked_until, locked_by, task_spec}
118 | 


--------------------------------------------------------------------------------
/src/pyncette/redis/poll_dynamic.lua:
--------------------------------------------------------------------------------
 1 | local utc_now, limit, incoming_locked_until, incoming_locked_by = unpack(ARGV)
 2 | limit = tonumber(limit)
 3 | 
 4 | local tasksets = redis.call('zrangebylex', KEYS[1], '-', '(' .. utc_now .. '`', 'LIMIT', 0, limit + 1)
 5 | local results = { "READY" }
 6 | 
 7 | for key,value in pairs(tasksets) do
 8 |     local task_name = value:gmatch('_(.*)')()
 9 |     local version, execute_after, locked_until, locked_by, task_spec = unpack(redis.call('hmget', task_name, 'version', 'execute_after', 'locked_until', 'locked_by', 'task_spec'))
10 | 
11 |     local function getIndexKey()
12 |         if not locked_until or locked_until < execute_after then
13 |             return execute_after .. '_' .. task_name
14 |         else
15 |             return locked_until .. '_' .. task_name
16 |         end
17 |     end
18 | 
19 |     redis.call('zrem', KEYS[1], getIndexKey())
20 |     version, locked_until, locked_by = version + 1, incoming_locked_until, incoming_locked_by
21 |     redis.call('hmset', task_name, 'version', version, 'locked_until', locked_until, 'locked_by', locked_by)
22 |     redis.call('zadd', KEYS[1], 0, getIndexKey())
23 | 
24 |     results[key + 1] = { "READY", version, execute_after, locked_until, locked_by, task_spec }
25 |     if key == limit then
26 |         results[1] = "HAS_MORE"
27 |         break
28 |     end
29 | end
30 | 
31 | return results
32 | 


--------------------------------------------------------------------------------
/src/pyncette/repository.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import datetime
 3 | import logging
 4 | from typing import Any
 5 | from typing import AsyncContextManager
 6 | from typing import Optional
 7 | from typing import Protocol
 8 | 
 9 | from .model import ContinuationToken
10 | from .model import Lease
11 | from .model import PollResponse
12 | from .model import QueryResponse
13 | from .task import Task
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class Repository(abc.ABC):
19 |     """Abstract base class representing a store for Pyncette tasks"""
20 | 
21 |     @abc.abstractmethod
22 |     async def poll_dynamic_task(
23 |         self,
24 |         utc_now: datetime.datetime,
25 |         task: Task,
26 |         continuation_token: Optional[ContinuationToken] = None,
27 |     ) -> QueryResponse:
28 |         """Queries the dynamic tasks for execution"""
29 | 
30 |     @abc.abstractmethod
31 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
32 |         """Registers a dynamic task"""
33 | 
34 |     @abc.abstractmethod
35 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
36 |         """Deregisters a dynamic task implementation"""
37 | 
38 |     @abc.abstractmethod
39 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse:
40 |         """Polls the task to determine whether it is ready for execution"""
41 | 
42 |     @abc.abstractmethod
43 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
44 |         """Commits the task, which signals a successful run."""
45 | 
46 |     @abc.abstractmethod
47 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]:
48 |         """Extends the lease on the task. Returns the new lease if lease was still valid."""
49 | 
50 |     @abc.abstractmethod
51 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
52 |         """Unlocks the task, making it eligible for retries in case execution failed."""
53 | 
54 | 
55 | class RepositoryFactory(Protocol):
56 |     """A factory context manager for creating a repository"""
57 | 
58 |     def __call__(self, **kwargs: Any) -> AsyncContextManager[Repository]:
59 |         """Creates a context manager managing the lifecycle of the repository."""
60 | 


--------------------------------------------------------------------------------
/src/pyncette/sqlite.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import contextlib
  3 | import datetime
  4 | import json
  5 | import logging
  6 | import re
  7 | import uuid
  8 | from typing import Any
  9 | from collections.abc import AsyncIterator
 10 | from typing import Optional
 11 | from typing import cast
 12 | 
 13 | import aiosqlite
 14 | import dateutil.tz
 15 | 
 16 | from pyncette.errors import PyncetteException
 17 | from pyncette.model import ContinuationToken
 18 | from pyncette.model import ExecutionMode
 19 | from pyncette.model import Lease
 20 | from pyncette.model import PollResponse
 21 | from pyncette.model import QueryResponse
 22 | from pyncette.model import ResultType
 23 | from pyncette.repository import Repository
 24 | from pyncette.task import Task
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | 
 29 | def _from_timestamp(timestamp: Optional[float]) -> Optional[datetime.datetime]:
 30 |     if timestamp is None:
 31 |         return None
 32 |     else:
 33 |         return datetime.datetime.fromtimestamp(timestamp, dateutil.tz.UTC)
 34 | 
 35 | 
 36 | def _to_timestamp(date: Optional[datetime.datetime]) -> Optional[float]:
 37 |     if date is None:
 38 |         return None
 39 |     else:
 40 |         return date.timestamp()
 41 | 
 42 | 
 43 | _CONTINUATION_TOKEN = ContinuationToken(object())
 44 | 
 45 | 
 46 | class SqliteRepository(Repository):
 47 |     _connection: aiosqlite.Connection
 48 |     _batch_size: int
 49 |     _table_name: str
 50 |     _lock: asyncio.Lock
 51 | 
 52 |     def __init__(
 53 |         self,
 54 |         connection: aiosqlite.Connection,
 55 |         **kwargs: Any,
 56 |     ):
 57 |         self._connection = connection
 58 |         self._table_name = kwargs.get("sqlite_table_name", "pyncette_tasks")
 59 |         self._batch_size = kwargs.get("batch_size", 100)
 60 |         self._lock = asyncio.Lock()
 61 | 
 62 |         if self._batch_size < 1:
 63 |             raise ValueError("Batch size must be greater than 0")
 64 |         if not re.match(r"^[a-z_]+$", self._table_name):
 65 |             raise ValueError("Table name can only contain lower-case letters and underscores")
 66 | 
 67 |     async def initialize(self) -> None:
 68 |         async with self._transaction():
 69 |             await self._connection.executescript(
 70 |                 f"""
 71 |                 CREATE TABLE IF NOT EXISTS {self._table_name} (
 72 |                     name PRIMARY KEY,
 73 |                     parent_name,
 74 |                     locked_until timestamp,
 75 |                     locked_by,
 76 |                     execute_after timestamp,
 77 |                     task_spec
 78 |                 );
 79 |                 CREATE INDEX IF NOT EXISTS due_tasks_{self._table_name}
 80 |                 ON {self._table_name} (parent_name, MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)));
 81 |                 """
 82 |             )
 83 | 
 84 |     async def poll_dynamic_task(
 85 |         self,
 86 |         utc_now: datetime.datetime,
 87 |         task: Task,
 88 |         continuation_token: Optional[ContinuationToken] = None,
 89 |     ) -> QueryResponse:
 90 |         async with self._transaction(explicit_begin=True):
 91 |             locked_by = uuid.uuid4()
 92 |             locked_until = utc_now + task.lease_duration
 93 | 
 94 |             ready_tasks = await self._connection.execute_fetchall(
 95 |                 f"""SELECT * FROM {self._table_name}
 96 |                 WHERE parent_name = :parent_name AND MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= :utc_now
 97 |                 ORDER BY MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC
 98 |                 LIMIT :batch_size
 99 |                 """,
100 |                 {
101 |                     "parent_name": task.canonical_name,
102 |                     "utc_now": _to_timestamp(utc_now),
103 |                     "batch_size": self._batch_size,
104 |                 },
105 |             )
106 | 
107 |             concrete_tasks = [task.instantiate_from_spec(json.loads(record["task_spec"])) for record in ready_tasks]
108 |             await self._connection.executemany(
109 |                 f"""
110 |                 UPDATE {self._table_name}
111 |                 SET
112 |                     locked_until = :locked_until,
113 |                     locked_by = :locked_by
114 |                 WHERE name = :name
115 |                 """,
116 |                 [
117 |                     {
118 |                         "name": concrete_task.canonical_name,
119 |                         "locked_until": _to_timestamp(locked_until),
120 |                         "locked_by": str(locked_by),
121 |                     }
122 |                     for concrete_task in concrete_tasks
123 |                 ],
124 |             )
125 | 
126 |             return QueryResponse(
127 |                 tasks=[(concrete_task, Lease(locked_by)) for concrete_task in concrete_tasks],
128 |                 continuation_token=_CONTINUATION_TOKEN if len(concrete_tasks) == self._batch_size else None,
129 |             )
130 | 
131 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
132 |         async with self._transaction(explicit_begin=True):
133 |             assert task.parent_task is not None
134 |             record = await self._connection.execute_fetchall(
135 |                 f"SELECT 1 FROM {self._table_name} WHERE name = :name",
136 |                 {"name": task.canonical_name},
137 |             )
138 | 
139 |             if record:
140 |                 await self._connection.execute_fetchall(
141 |                     f"""
142 |                     UPDATE {self._table_name}
143 |                     SET
144 |                         task_spec = :task_spec,
145 |                         execute_after = :execute_after,
146 |                         locked_until = NULL,
147 |                         locked_by = NULL
148 |                     WHERE
149 |                         name = :name
150 |                     """,
151 |                     {
152 |                         "name": task.canonical_name,
153 |                         "task_spec": json.dumps(task.as_spec()),
154 |                         "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)),
155 |                     },
156 |                 )
157 |             else:
158 |                 await self._connection.execute_fetchall(
159 |                     f"""
160 |                     INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after)
161 |                     VALUES (:name, :parent_name, :task_spec, :execute_after)
162 |                     """,
163 |                     {
164 |                         "name": task.canonical_name,
165 |                         "parent_name": task.parent_task.canonical_name,
166 |                         "task_spec": json.dumps(task.as_spec()),
167 |                         "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)),
168 |                     },
169 |                 )
170 | 
171 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
172 |         async with self._transaction():
173 |             await self._connection.execute_fetchall(
174 |                 f"DELETE FROM {self._table_name} WHERE name = :name",
175 |                 {"name": task.canonical_name},
176 |             )
177 | 
178 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse:
179 |         async with self._transaction(explicit_begin=True):
180 |             records = await self._connection.execute_fetchall(
181 |                 f"SELECT * FROM {self._table_name} WHERE name = :name",
182 |                 {"name": task.canonical_name},
183 |             )
184 | 
185 |             if not records:
186 |                 # Regular (non-dynamic) tasks will be implicitly created on first poll,
187 |                 # but dynamic task instances must be explicitely created to prevent spurious
188 |                 # poll from re-creating them after being deleted.
189 |                 if task.parent_task is not None:
190 |                     raise PyncetteException("Task not found")
191 | 
192 |                 locked_until = None
193 |                 locked_by = None
194 |                 execute_after = task.get_next_execution(utc_now, None)
195 |                 await self._connection.execute_fetchall(
196 |                     f"""
197 |                     INSERT INTO {self._table_name} (name, execute_after)
198 |                     VALUES (:name, :execute_after)
199 |                     """,
200 |                     {
201 |                         "name": task.canonical_name,
202 |                         "execute_after": _to_timestamp(execute_after),
203 |                     },
204 |                 )
205 |             else:
206 |                 record = next(iter(records))
207 |                 execute_after = cast(datetime.datetime, _from_timestamp(record["execute_after"]))
208 |                 locked_until = _from_timestamp(record["locked_until"])
209 |                 locked_by = record["locked_by"]
210 | 
211 |             assert execute_after is not None
212 |             scheduled_at = execute_after
213 | 
214 |             if locked_until is not None and locked_until > utc_now and (str(lease) != locked_by):
215 |                 result = ResultType.LOCKED
216 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE:
217 |                 execute_after = task.get_next_execution(utc_now, execute_after)
218 |                 result = ResultType.READY
219 |                 locked_until = None
220 |                 locked_by = None
221 |                 await self._update_record(
222 |                     task,
223 |                     locked_until,
224 |                     locked_by,
225 |                     execute_after,
226 |                 )
227 |             elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE:
228 |                 locked_until = utc_now + task.lease_duration
229 |                 locked_by = uuid.uuid4()
230 |                 result = ResultType.READY
231 |                 await self._update_record(
232 |                     task,
233 |                     locked_until,
234 |                     locked_by,
235 |                     execute_after,
236 |                 )
237 |             else:
238 |                 result = ResultType.PENDING
239 | 
240 |             return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by)
241 | 
242 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
243 |         async with self._transaction(explicit_begin=True):
244 |             records = await self._connection.execute_fetchall(
245 |                 f"SELECT * FROM {self._table_name} WHERE name = :name",
246 |                 {"name": task.canonical_name},
247 |             )
248 | 
249 |             if not records:
250 |                 logger.warning(f"Task {task} not found, skipping.")
251 |                 return
252 | 
253 |             record = next(iter(records))
254 |             if record["locked_by"] != str(lease):
255 |                 logger.warning(f"Lease lost on task {task}, skipping.")
256 |                 return
257 | 
258 |             execute_after = datetime.datetime.fromtimestamp(record["execute_after"], dateutil.tz.UTC) if record["execute_after"] else None
259 |             await self._update_record(
260 |                 task,
261 |                 None,
262 |                 None,
263 |                 task.get_next_execution(utc_now, execute_after),
264 |             )
265 | 
266 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]:
267 |         async with self._transaction():
268 |             locked_until = utc_now + task.lease_duration
269 |             async with await self._connection.execute(
270 |                 f"""
271 |                 UPDATE {self._table_name}
272 |                 SET
273 |                     locked_until = :locked_until
274 |                 WHERE name = :name AND locked_by = :locked_by
275 |                 """,
276 |                 {
277 |                     "locked_until": _to_timestamp(locked_until),
278 |                     "name": task.canonical_name,
279 |                     "locked_by": str(lease),
280 |                 },
281 |             ) as cursor:
282 |                 if cursor.rowcount == 1:
283 |                     return lease
284 |                 else:
285 |                     return None
286 | 
287 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
288 |         async with self._transaction():
289 |             await self._connection.execute_fetchall(
290 |                 f"""
291 |                 UPDATE {self._table_name}
292 |                 SET
293 |                     locked_by = NULL,
294 |                     locked_until = NULL
295 |                 WHERE name = :name AND locked_by = :locked_by
296 |                 """,
297 |                 {
298 |                     "name": task.canonical_name,
299 |                     "locked_by": str(lease),
300 |                 },
301 |             )
302 | 
303 |     async def _update_record(
304 |         self,
305 |         task: Task,
306 |         locked_until: Optional[datetime.datetime],
307 |         locked_by: Optional[uuid.UUID],
308 |         execute_after: Optional[datetime.datetime],
309 |     ) -> None:
310 |         if execute_after is None:
311 |             await self._connection.execute_fetchall(
312 |                 f"DELETE FROM {self._table_name} WHERE name = :name",
313 |                 {"name": task.canonical_name},
314 |             )
315 |         else:
316 |             await self._connection.execute_fetchall(
317 |                 f"""
318 |                 UPDATE {self._table_name}
319 |                 SET
320 |                     locked_until = :locked_until,
321 |                     locked_by = :locked_by,
322 |                     execute_after = :execute_after
323 |                 WHERE name = :name
324 |                 """,
325 |                 {
326 |                     "name": task.canonical_name,
327 |                     "locked_until": _to_timestamp(locked_until),
328 |                     "locked_by": str(locked_by),
329 |                     "execute_after": _to_timestamp(execute_after),
330 |                 },
331 |             )
332 | 
333 |     @contextlib.asynccontextmanager
334 |     async def _transaction(self, explicit_begin: bool = False) -> AsyncIterator[None]:
335 |         async with self._lock:
336 |             # If we only execute a single DML statement, the transaction will be implicitly open
337 |             # but if we start with a SELECT, we need to be in a transaction explicitely.
338 |             await self._connection.execute_fetchall("BEGIN")
339 |             try:
340 |                 yield
341 |             except Exception:
342 |                 await self._connection.rollback()
343 |                 raise
344 |             else:
345 |                 await self._connection.commit()
346 | 
347 | 
348 | @contextlib.asynccontextmanager
349 | async def sqlite_repository(**kwargs: Any) -> AsyncIterator[SqliteRepository]:
350 |     """Factory context manager for Sqlite repository that initializes the connection to Sqlite"""
351 | 
352 |     async with aiosqlite.connect(kwargs.get("sqlite_database", ":memory:")) as connection:
353 |         connection.row_factory = aiosqlite.Row
354 |         repository = SqliteRepository(connection, **kwargs)
355 |         await repository.initialize()
356 |         yield repository
357 | 


--------------------------------------------------------------------------------
/src/pyncette/task.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import datetime
  4 | import hashlib
  5 | import json
  6 | import logging
  7 | from typing import Any
  8 | from collections.abc import Awaitable
  9 | 
 10 | import dateutil.tz
 11 | from croniter import croniter
 12 | 
 13 | from .model import Context
 14 | from .model import ExecutionMode
 15 | from .model import FailureMode
 16 | from .model import PartitionSelector
 17 | from .model import TaskFunc
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class Task:
 23 |     """The base unit of execution"""
 24 | 
 25 |     name: str
 26 |     task_func: TaskFunc
 27 |     schedule: str | None
 28 |     interval: datetime.timedelta | None
 29 |     execute_at: datetime.datetime | None
 30 |     timezone: str | None
 31 |     fast_forward: bool
 32 |     failure_mode: FailureMode
 33 |     execution_mode: ExecutionMode
 34 |     lease_duration: datetime.timedelta
 35 |     parent_task: Task | None
 36 |     extra_args: dict[str, Any]
 37 |     _enabled: bool
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         *,
 42 |         name: str,
 43 |         func: TaskFunc,
 44 |         enabled: bool = True,
 45 |         dynamic: bool = False,
 46 |         parent_task: Task | None = None,
 47 |         schedule: str | None = None,
 48 |         interval: datetime.timedelta | None = None,
 49 |         execute_at: datetime.datetime | None = None,
 50 |         timezone: str | None = None,
 51 |         fast_forward: bool = False,
 52 |         failure_mode: FailureMode = FailureMode.NONE,
 53 |         execution_mode: ExecutionMode = ExecutionMode.AT_LEAST_ONCE,
 54 |         lease_duration: datetime.timedelta = datetime.timedelta(seconds=60),
 55 |         **kwargs: Any,
 56 |     ):
 57 |         self._enabled = enabled
 58 |         self.name = name
 59 |         self.task_func = func
 60 | 
 61 |         self.dynamic = dynamic
 62 |         self.parent_task = parent_task
 63 | 
 64 |         self.schedule = schedule
 65 |         self.interval = interval
 66 |         self.timezone = timezone
 67 |         self.fast_forward = fast_forward
 68 |         self.failure_mode = failure_mode
 69 |         self.execute_at = execute_at
 70 |         self.execution_mode = execution_mode
 71 |         self.lease_duration = lease_duration
 72 |         self.extra_args = kwargs
 73 | 
 74 |         self._validate()
 75 | 
 76 |     def _validate(self) -> None:
 77 |         if self.execution_mode == ExecutionMode.AT_MOST_ONCE and self.failure_mode != FailureMode.NONE:
 78 |             raise ValueError("failure_mode is not applicable when execution_mode is AT_MOST_ONCE")
 79 | 
 80 |         if not self.dynamic:
 81 |             schedule_specs = [spec for spec in [self.schedule, self.interval, self.execute_at] if spec is not None]
 82 |             if len(schedule_specs) != 1:
 83 |                 raise ValueError("Exactly one of the following must be specified: schedule, interval, execute_at")
 84 |             if self.schedule is None and self.timezone is not None:
 85 |                 raise ValueError("Timezone may only be specified when cron schedule is used")
 86 |             if self.schedule is not None:
 87 |                 croniter.expand(self.schedule)
 88 | 
 89 |         if self.parent_task is None and self.execute_at is not None:
 90 |             raise ValueError("execute_at is only supported for dynamic tasks")
 91 | 
 92 |         if dateutil.tz.gettz(self.timezone) is None:
 93 |             raise ValueError(f"Invalid timezone specifier '{self.timezone}'.")
 94 | 
 95 |         try:
 96 |             json.dumps(self.extra_args)
 97 |         except Exception as e:
 98 |             raise ValueError(f"Extra parameters must be JSON serializable ({e})") from None
 99 | 
100 |     def get_next_execution(
101 |         self,
102 |         utc_now: datetime.datetime,
103 |         last_execution: datetime.datetime | None,
104 |     ) -> datetime.datetime | None:
105 |         if self.execute_at is not None:
106 |             return self.execute_at.astimezone(dateutil.tz.UTC) if last_execution is None else None
107 | 
108 |         current_time = last_execution if last_execution is not None else utc_now
109 | 
110 |         if self.interval is not None:
111 |             if not last_execution or not self.fast_forward:
112 |                 return current_time + self.interval
113 |             else:
114 |                 count = (utc_now - last_execution) // self.interval + 1
115 |                 return last_execution + (self.interval * count)
116 | 
117 |         if self.schedule is not None:
118 |             if self.timezone:
119 |                 current_time = current_time.astimezone(dateutil.tz.gettz(self.timezone))
120 | 
121 |             cron = croniter(self.schedule, start_time=current_time, ret_type=datetime.datetime)
122 | 
123 |             while True:
124 |                 next_execution = cron.get_next()
125 |                 if not next_execution:
126 |                     return None
127 |                 if not self.fast_forward or next_execution >= utc_now:
128 |                     return next_execution.astimezone(dateutil.tz.UTC)
129 | 
130 |         raise AssertionError
131 | 
132 |     def instantiate(self, name: str, **kwargs: Any) -> Task:
133 |         """Creates a concrete instance of a dynamic task"""
134 | 
135 |         if not self.dynamic:
136 |             raise ValueError("Cannot instantiate a non-dynamic task")
137 | 
138 |         extra_args: dict[str, Any] = {
139 |             "schedule": self.schedule,
140 |             "interval": self.interval,
141 |             "timezone": self.timezone,
142 |             "execute_at": self.execute_at,
143 |             **self.extra_args,
144 |             **kwargs,
145 |         }
146 | 
147 |         return Task(
148 |             name=name,
149 |             func=self.task_func,
150 |             fast_forward=self.fast_forward,
151 |             failure_mode=self.failure_mode,
152 |             execution_mode=self.execution_mode,
153 |             lease_duration=self.lease_duration,
154 |             parent_task=self,
155 |             **extra_args,
156 |         )
157 | 
158 |     @property
159 |     def enabled(self) -> bool:
160 |         return self._enabled
161 | 
162 |     @enabled.setter
163 |     def enabled(self, value: bool) -> None:
164 |         self._enabled = value
165 | 
166 |     @property
167 |     def canonical_name(self) -> str:
168 |         """A unique identifier for a task instance"""
169 |         if self.parent_task is not None:
170 |             return "{}:{}".format(
171 |                 self.parent_task.canonical_name,
172 |                 self.name.replace(":", "::"),
173 |             )
174 |         else:
175 |             return self.name.replace(":", "::")
176 | 
177 |     def as_spec(self) -> dict[str, Any]:
178 |         """Serializes all the attributes to task spec"""
179 |         return {
180 |             "name": self.name,
181 |             "schedule": self.schedule,
182 |             "interval": self.interval.total_seconds() if self.interval is not None else None,
183 |             "execute_at": self.execute_at.isoformat() if self.execute_at is not None else None,
184 |             "timezone": self.timezone,
185 |             "extra_args": self.extra_args,
186 |         }
187 | 
188 |     def instantiate_from_spec(self, task_spec: dict[str, Any]) -> Task:
189 |         """Deserializes all the attributes from task spec"""
190 |         return self.instantiate(
191 |             name=task_spec["name"],
192 |             schedule=task_spec["schedule"],
193 |             interval=datetime.timedelta(seconds=task_spec["interval"]) if task_spec["interval"] is not None else None,
194 |             timezone=task_spec["timezone"],
195 |             execute_at=datetime.datetime.fromisoformat(task_spec["execute_at"]) if task_spec["execute_at"] is not None else None,
196 |             **task_spec["extra_args"],
197 |         )
198 | 
199 |     def __call__(self, context: Context) -> Awaitable[None]:
200 |         return self.task_func(context)
201 | 
202 |     def __str__(self) -> str:
203 |         return self.canonical_name
204 | 
205 | 
206 | def _default_partition_selector(partition_count: int, task_id: str) -> int:
207 |     algo = hashlib.sha1()  # noqa: S324
208 |     algo.update(task_id.encode("utf-8"))
209 |     max_value = int.from_bytes(b"\xff" * algo.digest_size, "big") + 1
210 |     digest = int.from_bytes(algo.digest(), "big")
211 | 
212 |     return (digest * partition_count) // max_value
213 | 
214 | 
215 | class _TaskPartition(Task):
216 |     partition_id: int
217 |     _parent: PartitionedTask
218 | 
219 |     def __init__(self, parent: PartitionedTask, partition_id: int, **kwargs: Any):
220 |         super().__init__(dynamic=True, **kwargs)
221 |         self._parent = parent
222 |         self.partition_id = partition_id
223 | 
224 |     @property
225 |     def enabled(self) -> bool:
226 |         return self._parent.enabled and (self._parent.enabled_partitions is None or self.partition_id in self._parent.enabled_partitions)
227 | 
228 |     @enabled.setter
229 |     def enabled(self, value: bool) -> None:
230 |         raise ValueError("Use enabled_partitions to disable polling a partition.")
231 | 
232 |     @property
233 |     def canonical_name(self) -> str:
234 |         """A unique identifier for a task instance"""
235 | 
236 |         assert self.parent_task is None
237 |         return "{}:{}".format(self.name.replace(":", "::"), self.partition_id)
238 | 
239 | 
240 | class PartitionedTask(Task):
241 |     _kwargs: Any
242 |     partition_count: int
243 |     partition_selector: PartitionSelector
244 |     enabled_partitions: list[int] | None
245 | 
246 |     def __init__(
247 |         self,
248 |         *,
249 |         partition_count: int,
250 |         partition_selector: PartitionSelector = _default_partition_selector,
251 |         enabled_partitions: list[int] | None = None,
252 |         **kwargs: Any,
253 |     ):
254 |         if partition_count < 1:
255 |             raise ValueError("Partition count must be greater than or equal to 1")
256 | 
257 |         super().__init__(dynamic=True, **kwargs)
258 | 
259 |         self.partition_count = partition_count
260 |         self.partition_selector = partition_selector
261 |         self.enabled_partitions = enabled_partitions
262 |         self._kwargs = kwargs
263 | 
264 |     def get_partitions(self) -> list[Task]:
265 |         return [_TaskPartition(self, partition_id=partition_id, **self._kwargs) for partition_id in range(self.partition_count)]
266 | 
267 |     def instantiate(self, name: str, **kwargs: Any) -> Task:
268 |         """Creates a concrete instance of a dynamic task"""
269 | 
270 |         partition_id = self.partition_selector(self.partition_count, name)
271 |         shard = _TaskPartition(self, partition_id=partition_id, **self._kwargs)
272 | 
273 |         return shard.instantiate(name, **kwargs)
274 | 


--------------------------------------------------------------------------------
/src/pyncette/utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | from functools import wraps
 4 | 
 5 | from .errors import LeaseLostException
 6 | from .model import Context
 7 | from .model import Decorator
 8 | from .model import TaskFunc
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | DEFAULT_LEASE_REMAINING_RATIO = 0.5
13 | 
14 | 
15 | def with_heartbeat(
16 |     lease_remaining_ratio: float = DEFAULT_LEASE_REMAINING_RATIO,
17 |     cancel_on_lease_lost: bool = False,
18 | ) -> Decorator[TaskFunc]:
19 |     """
20 |     Decorate the task to use automatic heartbeating in background.
21 | 
22 |     :param lease_remaining_ratio: Number between 0 and 1. The ratio between elapsed time and the lease duration when heartbeating will be performed. Default is 0.5.
23 |     :param cancel_on_lease_lost: Whether the task should be cancelled if lease expires. Default is False.
24 |     """
25 |     if lease_remaining_ratio <= 0 or lease_remaining_ratio >= 1:
26 |         raise ValueError("Lease remaining ratio must be in (0, 1)")
27 | 
28 |     def decorator(func: TaskFunc) -> TaskFunc:
29 |         @wraps(func)
30 |         async def _func(context: Context) -> None:
31 |             body = asyncio.ensure_future(func(context))
32 | 
33 |             async def _heartbeater() -> None:
34 |                 delay_duration = context.task.lease_duration.total_seconds() * lease_remaining_ratio
35 |                 while True:
36 |                     await asyncio.sleep(delay_duration)
37 |                     try:
38 |                         await asyncio.shield(context.heartbeat())
39 |                     except LeaseLostException:
40 |                         if cancel_on_lease_lost:
41 |                             body.cancel()
42 |                         # Regardless of whether we want the task body to continue
43 |                         # executing, it makes no sense to continue heartbeating
44 |                         # since the lease has already been lost.
45 |                         return
46 |                     except Exception as e:
47 |                         # There may be transient errors while heartbeating. In this case
48 |                         # ignore them until the next heartbeat interval.
49 |                         logger.warning(f"Heartbeating on {context.task} failed", exc_info=e)
50 | 
51 |             heartbeater = asyncio.create_task(_heartbeater())
52 |             try:
53 |                 await body
54 |             finally:
55 |                 heartbeater.cancel()
56 |                 try:
57 |                     await heartbeater
58 |                 except asyncio.CancelledError:
59 |                     pass
60 | 
61 |         return _func
62 | 
63 |     return decorator
64 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import datetime
  3 | import os
  4 | import random
  5 | import time
  6 | from contextlib import asynccontextmanager
  7 | 
  8 | import dateutil.tz
  9 | import pytest
 10 | 
 11 | import pyncette
 12 | from pyncette.dynamodb import dynamodb_repository
 13 | from pyncette.mysql import mysql_repository
 14 | from pyncette.postgres import postgres_repository
 15 | from pyncette.redis import redis_repository
 16 | from pyncette.sqlite import sqlite_repository
 17 | from utils.timemachine import TimeMachine
 18 | 
 19 | 
 20 | @pytest.fixture
 21 | def timemachine(monkeypatch):
 22 |     timemachine = TimeMachine(datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=dateutil.tz.UTC))
 23 |     monkeypatch.setattr(pyncette.pyncette, "_current_time", timemachine.utcnow)
 24 |     monkeypatch.setattr(asyncio, "sleep", timemachine.sleep)
 25 |     monkeypatch.setattr(asyncio, "wait_for", timemachine.wait_for)
 26 |     monkeypatch.setattr(time, "perf_counter", timemachine.perf_counter)
 27 |     return timemachine
 28 | 
 29 | 
 30 | def wrap_factory(factory, timemachine):
 31 |     @asynccontextmanager
 32 |     async def wrapped_factory(*args, **kwargs):
 33 |         async with factory(*args, **kwargs) as repo:
 34 |             yield timemachine.decorate_io(repo)
 35 | 
 36 |     return timemachine.decorate_io(wrapped_factory)
 37 | 
 38 | 
 39 | def random_table_name():
 40 |     return "pyncette_{}".format("".join([chr(random.randint(ord("a"), ord("z"))) for _ in range(10)]))
 41 | 
 42 | 
 43 | # Define new configurations here
 44 | 
 45 | 
 46 | class PostgresBackend:
 47 |     __name__ = "postgres"
 48 |     is_persistent = True
 49 | 
 50 |     def get_args(self, timemachine):
 51 |         return {
 52 |             "repository_factory": wrap_factory(postgres_repository, timemachine),
 53 |             "postgres_table_name": random_table_name(),
 54 |             "postgres_url": os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"),
 55 |         }
 56 | 
 57 | 
 58 | class RedisBackend:
 59 |     __name__ = "redis"
 60 |     is_persistent = True
 61 | 
 62 |     def get_args(self, timemachine):
 63 |         return {
 64 |             "repository_factory": wrap_factory(redis_repository, timemachine),
 65 |             "redis_namespace": random_table_name(),
 66 |             "redis_timeout": 10,
 67 |             "redis_url": os.environ.get("REDIS_URL", "redis://localhost"),
 68 |         }
 69 | 
 70 | 
 71 | class SqlitePersistedBackend:
 72 |     __name__ = "sqlite_persisted"
 73 |     is_persistent = True
 74 | 
 75 |     def get_args(self, timemachine):
 76 |         return {
 77 |             "repository_factory": wrap_factory(sqlite_repository, timemachine),
 78 |             "sqlite_database": os.environ.get("SQLITE_DATABASE", "pyncette.db"),
 79 |             "sqlite_table_name": random_table_name(),
 80 |         }
 81 | 
 82 | 
 83 | class DynamoDBBackend:
 84 |     __name__ = "dynamodb"
 85 |     is_persistent = True
 86 | 
 87 |     def get_args(self, timemachine):
 88 |         return {
 89 |             "repository_factory": wrap_factory(dynamodb_repository, timemachine),
 90 |             "dynamodb_table_name": random_table_name(),
 91 |             "dynamodb_region_name": "eu-west-1",
 92 |             "dynamodb_endpoint": os.environ.get("DYNAMODB_ENDPOINT", "http://localhost:4566"),
 93 |         }
 94 | 
 95 | 
 96 | class MySQLBackend:
 97 |     __name__ = "mysql"
 98 |     is_persistent = True
 99 | 
100 |     def get_args(self, timemachine):
101 |         return {
102 |             "repository_factory": wrap_factory(mysql_repository, timemachine),
103 |             "mysql_host": os.environ.get("MYSQL_HOST", "localhost"),
104 |             "mysql_database": os.environ.get("MYSQL_DATABASE", "pyncette"),
105 |             "mysql_user": os.environ.get("MYSQL_USER", "pyncette"),
106 |             "mysql_password": os.environ.get("MYSQL_PASSWORD", "password"),
107 |             "mysql_table_name": random_table_name(),
108 |         }
109 | 
110 | 
111 | class DefaultBackend:
112 |     __name__ = "default"
113 |     is_persistent = False
114 | 
115 |     def get_args(self, timemachine):
116 |         return {"repository_factory": wrap_factory(sqlite_repository, timemachine)}
117 | 
118 | 
119 | all_backends = [
120 |     PostgresBackend(),
121 |     MySQLBackend(),
122 |     RedisBackend(),
123 |     DynamoDBBackend(),
124 |     DefaultBackend(),
125 |     SqlitePersistedBackend(),
126 | ]
127 | 
128 | 
129 | def pytest_addoption(parser):
130 |     parser.addoption(
131 |         "--backend",
132 |         action="append",
133 |         default=[],
134 |         help="list of repositories to test with",
135 |     )
136 | 
137 | 
138 | def pytest_generate_tests(metafunc):
139 |     if "backend" in metafunc.fixturenames:
140 |         metafunc.parametrize(
141 |             "backend",
142 |             [repository for repository in all_backends if repository.__name__ in metafunc.config.getoption("backend")] or all_backends,
143 |         )
144 | 


--------------------------------------------------------------------------------
/tests/test_dynamodb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from botocore.exceptions import ClientError
 5 | 
 6 | from pyncette import dynamodb
 7 | 
 8 | from conftest import random_table_name
 9 | 
10 | DYNAMODB_ENDPOINT = os.environ.get("DYNAMODB_ENDPOINT", "http://localhost:4566")
11 | 
12 | 
13 | @pytest.mark.asyncio
14 | @pytest.mark.integration
15 | async def test_dynamodb_create():
16 |     async with dynamodb.dynamodb_repository(
17 |         dynamodb_table_name=random_table_name(),
18 |         dynamodb_endpoint=DYNAMODB_ENDPOINT,
19 |         dynamodb_region_name="eu-west-1",
20 |     ) as repository:
21 |         table_status = await repository._table.table_status
22 |         assert table_status == "ACTIVE"
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | @pytest.mark.integration
27 | async def test_dynamodb_skip_table_create():
28 |     async with dynamodb.dynamodb_repository(
29 |         dynamodb_table_name=random_table_name(),
30 |         dynamodb_endpoint=DYNAMODB_ENDPOINT,
31 |         dynamodb_region_name="eu-west-1",
32 |         dynamodb_skip_table_create=True,
33 |     ) as repository:
34 |         with pytest.raises(ClientError) as e:
35 |             await repository._table.table_status
36 | 
37 |         assert e.value.response["Error"]["Code"] == "ResourceNotFoundException"
38 | 


--------------------------------------------------------------------------------
/tests/test_mysql.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | import pymysql
 5 | import pytest
 6 | 
 7 | from pyncette import mysql, Context
 8 | from pyncette.task import Task
 9 | 
10 | from conftest import random_table_name
11 | 
12 | 
13 | async def dummy_task(context: Context):
14 |     pass  # pragma: no cover
15 | 
16 | 
17 | DUMMY_TASK = Task(name="foo", func=dummy_task, schedule="* * * * *")
18 | 
19 | 
20 | @pytest.mark.asyncio
21 | @pytest.mark.integration
22 | async def test_invalid_table_name():
23 |     with pytest.raises(ValueError):
24 |         await mysql.mysql_repository(
25 |             mysql_host=os.environ.get("MYSQL_HOST", "localhost"),
26 |             mysql_database=os.environ.get("MYSQL_DATABASE", "pyncette"),
27 |             mysql_user=os.environ.get("MYSQL_USER", "pyncette"),
28 |             mysql_password=os.environ.get("MYSQL_PASSWORD", "password"),
29 |             mysql_table_name="spaces in table name",
30 |         ).__aenter__()
31 | 
32 | 
33 | @pytest.mark.asyncio
34 | @pytest.mark.integration
35 | async def test_skip_table_create():
36 |     with pytest.raises(pymysql.err.ProgrammingError):
37 |         async with mysql.mysql_repository(
38 |             mysql_host=os.environ.get("MYSQL_HOST", "localhost"),
39 |             mysql_database=os.environ.get("MYSQL_DATABASE", "pyncette"),
40 |             mysql_user=os.environ.get("MYSQL_USER", "pyncette"),
41 |             mysql_password=os.environ.get("MYSQL_PASSWORD", "password"),
42 |             mysql_table_name=random_table_name(),
43 |             mysql_skip_table_create=True,
44 |         ) as repository:
45 |             await repository.poll_task(
46 |                 datetime.datetime.now(tz=datetime.timezone.utc),
47 |                 DUMMY_TASK,
48 |             )
49 | 


--------------------------------------------------------------------------------
/tests/test_postgres.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | import asyncpg
 5 | import pytest
 6 | 
 7 | from pyncette import postgres, Context
 8 | from pyncette.task import Task
 9 | 
10 | from conftest import random_table_name
11 | 
12 | 
13 | async def dummy_task(context: Context):
14 |     pass  # pragma: no cover
15 | 
16 | 
17 | DUMMY_TASK = Task(name="foo", func=dummy_task, schedule="* * * * *")
18 | 
19 | 
20 | @pytest.mark.asyncio
21 | @pytest.mark.integration
22 | async def test_invalid_table_name():
23 |     with pytest.raises(ValueError, match="Table name"):
24 |         await postgres.postgres_repository(
25 |             postgres_url=os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"),
26 |             postgres_table_name="spaces in table name",
27 |         ).__aenter__()
28 | 
29 | 
30 | @pytest.mark.asyncio
31 | @pytest.mark.integration
32 | async def test_skip_table_create():
33 |     with pytest.raises(asyncpg.exceptions.UndefinedTableError):
34 |         async with postgres.postgres_repository(
35 |             postgres_url=os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"),
36 |             postgres_table_name=random_table_name(),
37 |             postgres_skip_table_create=True,
38 |         ) as repository:
39 |             await repository.poll_task(
40 |                 datetime.datetime.now(tz=datetime.timezone.utc),
41 |                 DUMMY_TASK,
42 |             )
43 | 


--------------------------------------------------------------------------------
/tests/test_pyncette.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import datetime
  3 | from unittest.mock import MagicMock
  4 | 
  5 | import pytest
  6 | from croniter.croniter import CroniterBadCronError
  7 | 
  8 | from pyncette import Context
  9 | from pyncette import ExecutionMode
 10 | from pyncette import FailureMode
 11 | from pyncette import Pyncette
 12 | from pyncette import PyncetteContext
 13 | from pyncette.errors import LeaseLostException
 14 | from pyncette.task import _default_partition_selector
 15 | from pyncette.utils import with_heartbeat
 16 | 
 17 | from conftest import DefaultBackend
 18 | 
 19 | 
 20 | def test_invalid_configuration():
 21 |     async def dummy(context: Context):
 22 |         pass  # pragma: no cover
 23 | 
 24 |     # Exactly one of the following must be specified: schedule, interval, execute_at
 25 |     with pytest.raises(ValueError):
 26 |         app = Pyncette()
 27 |         app.task()(dummy)
 28 | 
 29 |     with pytest.raises(ValueError):
 30 |         app = Pyncette()
 31 |         app.task(execute_at=datetime.datetime.now(tz=datetime.timezone.utc))(dummy)
 32 | 
 33 |     with pytest.raises(ValueError):
 34 |         app = Pyncette()
 35 |         app.task(interval=datetime.timedelta(seconds=2), schedule="* * * * *")(dummy)
 36 | 
 37 |     with pytest.raises(ValueError, match="Duplicate task name"):
 38 |         app = Pyncette()
 39 |         app.task(interval=datetime.timedelta(seconds=2), name="task1")(dummy)
 40 |         app.task(interval=datetime.timedelta(seconds=2), name="task1")(dummy)
 41 | 
 42 |     with pytest.raises(CroniterBadCronError):
 43 |         app = Pyncette()
 44 |         app.task(schedule="abracadabra")(dummy)
 45 | 
 46 |     with pytest.raises(
 47 |         ValueError,
 48 |         match="failure_mode is not applicable when execution_mode is AT_MOST_ONCE",
 49 |     ):
 50 |         app = Pyncette()
 51 |         app.task(execution_mode=ExecutionMode.AT_MOST_ONCE, failure_mode=FailureMode.UNLOCK)(dummy)
 52 | 
 53 |     with pytest.raises(ValueError, match=r"Invalid timezone specifier 'Gondwana/Atlantis'"):
 54 |         app = Pyncette()
 55 |         app.task(schedule="* * * * *", timezone="Gondwana/Atlantis")(dummy)
 56 | 
 57 |     with pytest.raises(ValueError):
 58 |         app = Pyncette()
 59 |         app.task(interval=datetime.timedelta(seconds=2), timezone="Europe/Dublin")(dummy)
 60 | 
 61 |     with pytest.raises(ValueError, match="Extra parameters must be JSON serializable"):
 62 |         app = Pyncette()
 63 |         app.task(schedule="* * * * *", extra_arg=object())(dummy)
 64 | 
 65 |     with pytest.raises(ValueError, match="Unable to determine name for the task"):
 66 |         app = Pyncette()
 67 |         app.task(schedule="* * * * *")(object())  # ty: ignore[invalid-argument-type]
 68 | 
 69 |     with pytest.raises(ValueError, match="Unable to determine name for the fixture"):
 70 |         app = Pyncette()
 71 |         app.fixture()(object())
 72 | 
 73 | 
 74 | def test_instantiate_non_dynamic_task():
 75 |     async def dummy(context: Context):
 76 |         pass  # pragma: no cover
 77 | 
 78 |     with pytest.raises(ValueError):
 79 |         app = Pyncette()
 80 |         app.task(schedule="* * * * *")(dummy).instantiate(name="foo")
 81 | 
 82 | 
 83 | def test_heartbeat_invalid_configuration():
 84 |     async def dummy(context: Context):
 85 |         pass  # pragma: no cover
 86 | 
 87 |     with pytest.raises(ValueError):
 88 |         with_heartbeat(lease_remaining_ratio=-1)
 89 | 
 90 |     with pytest.raises(ValueError):
 91 |         with_heartbeat(lease_remaining_ratio=2)
 92 | 
 93 | 
 94 | @pytest.mark.asyncio
 95 | async def test_dynamic_successful_task_interval():
 96 |     app = Pyncette()
 97 | 
 98 |     @app.dynamic_task()
 99 |     async def hello(context: Context) -> None:
100 |         pass  # pragma: no cover
101 | 
102 |     with pytest.raises(ValueError, match="instance name must be provided"):
103 |         async with app.create() as ctx:
104 |             await ctx.unschedule_task(hello)
105 | 
106 | 
107 | @pytest.mark.asyncio
108 | async def test_continues_heartbeating_after_exception(timemachine):
109 |     context = MagicMock()
110 |     counter = MagicMock()
111 | 
112 |     async def _heartbeat():
113 |         counter.heartbeat()
114 |         raise Exception("Fail")
115 | 
116 |     context.heartbeat = _heartbeat
117 |     context.task.lease_duration = datetime.timedelta(seconds=2)
118 | 
119 |     @with_heartbeat()
120 |     async def hello(context: Context) -> None:
121 |         await asyncio.sleep(10)
122 | 
123 |     task = asyncio.create_task(hello(context))
124 |     await timemachine.step(datetime.timedelta(seconds=10))
125 |     await task
126 |     await timemachine.unwind()
127 | 
128 |     assert counter.heartbeat.call_count == 9
129 | 
130 | 
131 | @pytest.mark.asyncio
132 | async def test_stops_heartbeating_if_lease_lost(timemachine):
133 |     context = MagicMock()
134 |     counter = MagicMock()
135 | 
136 |     async def _heartbeat():
137 |         counter.heartbeat()
138 |         raise LeaseLostException(context.task)
139 | 
140 |     context.heartbeat = _heartbeat
141 |     context.task.lease_duration = datetime.timedelta(seconds=2)
142 | 
143 |     @with_heartbeat()
144 |     async def hello(context: Context) -> None:
145 |         await asyncio.sleep(10)
146 | 
147 |     task = asyncio.create_task(hello(context))
148 |     await timemachine.step(datetime.timedelta(seconds=10))
149 |     await task
150 |     await timemachine.unwind()
151 | 
152 |     assert counter.heartbeat.call_count == 1
153 | 
154 | 
155 | def test_fixture_name_invalid():
156 |     app = Pyncette()
157 | 
158 |     async def dummy(app_context: PyncetteContext):
159 |         pass  # pragma: no cover
160 | 
161 |     with pytest.raises(ValueError):
162 |         app.use_fixture("scheduled_at", dummy)
163 | 
164 |     app.use_fixture("duplicate", dummy)
165 |     with pytest.raises(ValueError):
166 |         app.use_fixture("duplicate", dummy)
167 | 
168 | 
169 | @pytest.mark.asyncio
170 | async def test_add_to_context(timemachine):
171 |     app = Pyncette(**DefaultBackend().get_args(timemachine))
172 | 
173 |     counter = MagicMock()
174 | 
175 |     @app.task(interval=datetime.timedelta(seconds=2))
176 |     async def successful_task(context: Context) -> None:
177 |         context.hello()
178 | 
179 |     async with app.create() as ctx:
180 |         ctx.add_to_context("hello", counter)
181 |         task = asyncio.create_task(ctx.run())
182 |         await timemachine.step(datetime.timedelta(seconds=10))
183 |         ctx.shutdown()
184 |         await task
185 |         await timemachine.unwind()
186 | 
187 |     assert counter.call_count == 5
188 | 
189 | 
190 | @pytest.mark.asyncio
191 | async def test_add_to_context_invalid_name():
192 |     app = Pyncette()
193 | 
194 |     @app.fixture()
195 |     async def fixture(app_context: PyncetteContext):
196 |         yield None
197 | 
198 |     counter = MagicMock()
199 | 
200 |     @app.task(interval=datetime.timedelta(seconds=2))
201 |     async def successful_task(context: Context) -> None:
202 |         context.hello()
203 | 
204 |     async with app.create() as ctx:
205 |         ctx.add_to_context("duplicate", counter)
206 |         with pytest.raises(ValueError):
207 |             ctx.add_to_context("duplicate", counter)
208 | 
209 |         with pytest.raises(ValueError):
210 |             ctx.add_to_context("fixture", counter)
211 | 
212 |         with pytest.raises(ValueError):
213 |             ctx.add_to_context("scheduled_at", counter)
214 | 
215 | 
216 | def test_partition_count_invalid():
217 |     app = Pyncette()
218 | 
219 |     with pytest.raises(ValueError, match="Partition count must be greater than or equal to 1"):
220 | 
221 |         @app.partitioned_task(partition_count=0)
222 |         async def hello(context: Context) -> None:
223 |             pass  # pragma: no cover
224 | 
225 | 
226 | def test_default_partition_selector_does_not_change():
227 |     # BE CAREFUL IF THIS TEST BREAKS.
228 |     # This is a regression test that ensures that the default
229 |     # partition key is not changed, as that could lead to all users'
230 |     # partitions being remapped.
231 |     assert _default_partition_selector(1000000000000, "Lorem ipsum dolor sit amet") == 222413034928
232 | 


--------------------------------------------------------------------------------
/tests/test_pyncette_healthcheck.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import datetime
  3 | 
  4 | import aiohttp
  5 | import pytest
  6 | 
  7 | from pyncette import Pyncette
  8 | from pyncette.healthcheck import default_healthcheck
  9 | from pyncette.healthcheck import use_healthcheck_server
 10 | from pyncette.sqlite import sqlite_repository
 11 | 
 12 | from conftest import wrap_factory
 13 | 
 14 | 
 15 | def get_healthcheck_port(app_context):
 16 |     return app_context._root_context._healthcheck.sockets[0].getsockname()[1]
 17 | 
 18 | 
 19 | @pytest.mark.asyncio
 20 | async def test_default_healthcheck_handler_healthy(timemachine):
 21 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
 22 | 
 23 |     async with app.create() as ctx:
 24 |         task = asyncio.create_task(ctx.run())
 25 |         await timemachine.step(datetime.timedelta(seconds=1.5))
 26 |         is_healthy = await default_healthcheck(ctx)
 27 |         ctx.shutdown()
 28 |         await task
 29 |         await timemachine.unwind()
 30 | 
 31 |     assert is_healthy
 32 | 
 33 | 
 34 | @pytest.mark.asyncio
 35 | async def test_default_healthcheck_handler_unhealthy(timemachine):
 36 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
 37 | 
 38 |     async with app.create() as ctx:
 39 |         task = asyncio.create_task(ctx.run())
 40 |         # Advance time without executing calbacks
 41 |         timemachine._update_offset(timemachine.offset + datetime.timedelta(hours=1))
 42 |         is_healthy = await default_healthcheck(ctx)
 43 |         ctx.shutdown()
 44 |         await task
 45 |         await timemachine.unwind()
 46 | 
 47 |     assert not is_healthy
 48 | 
 49 | 
 50 | @pytest.mark.asyncio
 51 | async def test_healthcheck_server_success(timemachine):
 52 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
 53 | 
 54 |     async def healthcheck_handler(app_context):
 55 |         return True
 56 | 
 57 |     # Bind on random port to avoid conflict
 58 |     use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler)
 59 | 
 60 |     async with app.create() as ctx, aiohttp.ClientSession() as session:
 61 |         task = asyncio.create_task(ctx.run())
 62 |         async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp:
 63 |             assert resp.status == 200
 64 |         ctx.shutdown()
 65 |         await task
 66 |         await timemachine.unwind()
 67 | 
 68 | 
 69 | @pytest.mark.asyncio
 70 | async def test_healthcheck_server_failure(timemachine):
 71 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
 72 | 
 73 |     async def healthcheck_handler(app_context):
 74 |         return False
 75 | 
 76 |     # Bind on random port to avoid conflict
 77 |     use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler)
 78 | 
 79 |     async with app.create() as ctx, aiohttp.ClientSession() as session:
 80 |         task = asyncio.create_task(ctx.run())
 81 |         async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp:
 82 |             assert resp.status == 500
 83 |         ctx.shutdown()
 84 |         await task
 85 |         await timemachine.unwind()
 86 | 
 87 | 
 88 | @pytest.mark.asyncio
 89 | async def test_healthcheck_server_exception(timemachine):
 90 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
 91 | 
 92 |     async def healthcheck_handler(app_context):
 93 |         raise Exception("oops")
 94 | 
 95 |     # Bind on random port to avoid conflict
 96 |     use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler)
 97 | 
 98 |     async with app.create() as ctx, aiohttp.ClientSession() as session:
 99 |         task = asyncio.create_task(ctx.run())
100 |         async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp:
101 |             assert resp.status == 500
102 |         ctx.shutdown()
103 |         await task
104 |         await timemachine.unwind()
105 | 
106 | 
107 | @pytest.mark.asyncio
108 | async def test_healthcheck_server_invalid_verb(timemachine):
109 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
110 | 
111 |     async def healthcheck_handler(app_context):
112 |         pass  # pragma: no cover
113 | 
114 |     # Bind on random port to avoid conflict
115 |     use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler)
116 | 
117 |     async with app.create() as ctx, aiohttp.ClientSession() as session:
118 |         task = asyncio.create_task(ctx.run())
119 |         async with session.post(
120 |             f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health",
121 |             json={"test": "object"},
122 |         ) as resp:
123 |             assert resp.status == 405
124 |         ctx.shutdown()
125 |         await task
126 |         await timemachine.unwind()
127 | 


--------------------------------------------------------------------------------
/tests/test_pyncette_process.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import signal
 4 | import subprocess
 5 | import time
 6 | 
 7 | import pytest
 8 | 
 9 | 
10 | @pytest.mark.integration
11 | def test_signal_handling():
12 |     with subprocess.Popen(
13 |         ["coverage", "run", "-m", "tests.test_pyncette_process"],  # noqa: S607
14 |         env={**os.environ, "LOG_LEVEL": "DEBUG"},
15 |     ) as proc:
16 |         time.sleep(2)
17 |         proc.send_signal(signal.SIGINT)
18 |         ret_code = proc.wait()
19 | 
20 |     assert ret_code == 0
21 | 
22 | 
23 | @pytest.mark.integration
24 | def test_signal_handling_uvloop():
25 |     with subprocess.Popen(
26 |         ["coverage", "run", "-m", "tests.test_pyncette_process"],  # noqa: S607
27 |         env={**os.environ, "LOG_LEVEL": "DEBUG", "USE_UVLOOP": "1"},
28 |     ) as proc:
29 |         time.sleep(2)
30 |         proc.send_signal(signal.SIGINT)
31 |         ret_code = proc.wait()
32 | 
33 |     assert ret_code == 0
34 | 
35 | 
36 | @pytest.mark.integration
37 | def test_signal_handling_force():
38 |     with subprocess.Popen(
39 |         ["coverage", "run", "-m", "tests.test_pyncette_process"],  # noqa: S607
40 |         env={**os.environ, "LOG_LEVEL": "DEBUG"},
41 |     ) as proc:
42 |         time.sleep(2)
43 |         proc.send_signal(signal.SIGINT)
44 |         time.sleep(1)
45 |         proc.send_signal(signal.SIGINT)
46 |         ret_code = proc.wait()
47 | 
48 |     assert ret_code != 0
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     import asyncio
53 | 
54 |     from pyncette import Context
55 |     from pyncette import Pyncette
56 | 
57 |     app = Pyncette()
58 | 
59 |     @app.task(interval=datetime.timedelta(seconds=1))
60 |     async def foo(context: Context):
61 |         await asyncio.sleep(4)
62 | 
63 |     app.main()
64 | 


--------------------------------------------------------------------------------
/tests/test_pyncette_prometheus.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import datetime
 3 | from unittest.mock import MagicMock
 4 | 
 5 | import pytest
 6 | from prometheus_client import generate_latest
 7 | 
 8 | from pyncette import Context
 9 | from pyncette import FailureMode
10 | from pyncette import Pyncette
11 | from pyncette.prometheus import use_prometheus
12 | from pyncette.sqlite import sqlite_repository
13 | 
14 | from conftest import wrap_factory
15 | 
16 | 
17 | @pytest.mark.asyncio
18 | async def test_prometheus_metrics(timemachine):
19 |     app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine))
20 |     use_prometheus(app)
21 | 
22 |     counter = MagicMock()
23 | 
24 |     @app.dynamic_task(failure_mode=FailureMode.UNLOCK)
25 |     async def dynamic_task_1(context: Context) -> None:
26 |         counter.execute()
27 |         raise Exception("test")
28 | 
29 |     @app.task(interval=datetime.timedelta(seconds=2))
30 |     async def task_1(context: Context) -> None:
31 |         await context.heartbeat()
32 |         counter.execute()
33 | 
34 |     async with app.create() as ctx:
35 |         await ctx.schedule_task(dynamic_task_1, "1", interval=datetime.timedelta(seconds=2))
36 |         task = asyncio.create_task(ctx.run())
37 |         await timemachine.step(datetime.timedelta(seconds=10))
38 |         await ctx.unschedule_task(dynamic_task_1, "1")
39 | 
40 |         ctx.shutdown()
41 |         await task
42 |         await timemachine.unwind()
43 | 
44 |     metrics = generate_latest().decode("ascii").splitlines()
45 | 
46 |     assert 'pyncette_repository_ops_total{operation="unlock_task",task_name="dynamic_task_1"} 9.0' in metrics
47 |     assert 'pyncette_repository_ops_total{operation="commit_task",task_name="task_1"} 5.0' in metrics
48 |     assert 'pyncette_repository_ops_total{operation="poll_dynamic_task",task_name="dynamic_task_1"} 11.0' in metrics
49 |     assert 'pyncette_repository_ops_total{operation="poll_task",task_name="dynamic_task_1"} 9.0' in metrics
50 |     assert 'pyncette_repository_ops_total{operation="poll_task",task_name="task_1"} 11.0' in metrics
51 |     assert 'pyncette_repository_ops_total{operation="extend_lease",task_name="task_1"} 5.0' in metrics
52 |     assert 'pyncette_repository_ops_total{operation="register_task",task_name="dynamic_task_1"} 1.0' in metrics
53 |     assert 'pyncette_repository_ops_total{operation="unregister_task",task_name="dynamic_task_1"} 1.0' in metrics
54 |     assert 'pyncette_tasks_total{task_name="dynamic_task_1"} 9.0' in metrics
55 |     assert 'pyncette_tasks_total{task_name="task_1"} 5.0' in metrics
56 | 


--------------------------------------------------------------------------------
/tests/test_redis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from redis import asyncio as aioredis
 5 | 
 6 | from pyncette import redis
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | @pytest.mark.integration
11 | async def test_script_reload(monkeypatch):
12 |     monkeypatch.setattr(redis, "read_text", lambda *args: 'return { "SUCCESS" }')
13 | 
14 |     redis_url = os.environ.get("REDIS_URL", "redis://localhost")
15 |     redis_pool = aioredis.from_url(redis_url)
16 | 
17 |     lua_script = redis._LuaScript("dummy")
18 |     result = await lua_script.register(redis_pool)
19 |     await redis_pool.execute_command("SCRIPT", "FLUSH", "SYNC")
20 | 
21 |     result = await lua_script.execute(redis_pool, [], [])
22 | 
23 |     assert result == [b"SUCCESS"]
24 | 
25 | 
26 | @pytest.mark.asyncio
27 | @pytest.mark.integration
28 | async def test_script_register(monkeypatch):
29 |     monkeypatch.setattr(redis, "read_text", lambda *args: 'return { "SUCCESS" }')
30 | 
31 |     redis_url = os.environ.get("REDIS_URL", "redis://localhost")
32 |     redis_pool = aioredis.from_url(redis_url)
33 | 
34 |     lua_script = redis._LuaScript("dummy")
35 |     result = await lua_script.execute(redis_pool, [], [])
36 | 
37 |     assert result == [b"SUCCESS"]
38 | 


--------------------------------------------------------------------------------
/tests/test_sqlite.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pyncette import sqlite
 4 | 
 5 | 
 6 | @pytest.mark.asyncio
 7 | async def test_invalid_table_name():
 8 |     with pytest.raises(ValueError):
 9 |         await sqlite.sqlite_repository(
10 |             sqlite_table_name="spaces in table name",
11 |         ).__aenter__()
12 | 


--------------------------------------------------------------------------------
/tests/utils/fakerepository.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import contextlib
 4 | import datetime
 5 | import logging
 6 | from typing import Any
 7 | from collections.abc import AsyncIterator
 8 | 
 9 | from pyncette.model import ContinuationToken
10 | from pyncette.model import Lease
11 | from pyncette.model import PollResponse
12 | from pyncette.model import QueryResponse
13 | from pyncette.model import ResultType
14 | from pyncette.repository import Repository
15 | from pyncette.task import Task
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | _LEASE = Lease(object())
21 | _TASK_SPEC = {
22 |     "name": "fake",
23 |     "interval": None,
24 |     "timezone": None,
25 |     "execute_at": None,
26 |     "extra_args": {},
27 |     "schedule": "* * * * * *",
28 | }
29 | 
30 | 
31 | class FakeRepository(Repository):
32 |     """Redis-backed store for Pyncete task execution data"""
33 | 
34 |     _batch_size: int
35 |     _records_per_tick: int
36 | 
37 |     def __init__(self, batch_size: int, records_per_tick: int):
38 |         self._batch_size = batch_size
39 |         self._records_per_tick = records_per_tick
40 | 
41 |     async def poll_dynamic_task(
42 |         self,
43 |         utc_now: datetime.datetime,
44 |         task: Task,
45 |         continuation_token: ContinuationToken | None = None,
46 |     ) -> QueryResponse:
47 |         if isinstance(continuation_token, int):
48 |             remaining = self._records_per_tick - continuation_token
49 |         else:
50 |             remaining = self._records_per_tick
51 | 
52 |         result_count = max(remaining, self._batch_size)
53 |         remaining -= result_count
54 | 
55 |         return QueryResponse(
56 |             tasks=[
57 |                 (
58 |                     task.instantiate_from_spec(_TASK_SPEC),
59 |                     _LEASE,
60 |                 )
61 |                 for _ in range(result_count)
62 |             ],
63 |             continuation_token=remaining if remaining else None,
64 |         )
65 | 
66 |     async def register_task(self, utc_now: datetime.datetime, task: Task) -> None:
67 |         pass
68 | 
69 |     async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None:
70 |         pass
71 | 
72 |     async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Lease | None = None) -> PollResponse:
73 |         return PollResponse(result=ResultType.READY, scheduled_at=utc_now, lease=_LEASE)
74 | 
75 |     async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
76 |         pass
77 | 
78 |     async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None:
79 |         pass
80 | 
81 |     async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Lease | None:
82 |         return lease
83 | 
84 | 
85 | @contextlib.asynccontextmanager
86 | async def fake_repository(
87 |     batch_size: int = 100,
88 |     records_per_tick: int = 1000,
89 |     **kwargs: Any,
90 | ) -> AsyncIterator[FakeRepository]:
91 |     yield FakeRepository(batch_size, records_per_tick)
92 | 


--------------------------------------------------------------------------------
/tests/utils/timemachine.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import datetime
  3 | import heapq
  4 | import inspect
  5 | import logging
  6 | from functools import total_ordering
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | @total_ordering
 12 | class ScheduledTask:
 13 |     def __init__(self, execute_at, future):
 14 |         self.execute_at = execute_at
 15 |         self.future = future
 16 | 
 17 |     def __lt__(self, other):
 18 |         return self.execute_at.__lt__(other.execute_at)
 19 | 
 20 |     def __eq__(self, other):
 21 |         return self.execute_at.__eq__(other.execute_at)
 22 | 
 23 | 
 24 | class TimeMachine:
 25 |     """Utility class that allows us to mock real time in a way that plays well with asyncio without implementing a custom event loop."""
 26 | 
 27 |     def __init__(self, base_time):
 28 |         self.callbacks = []
 29 |         self.io_tasks = []
 30 |         self.base_time = base_time
 31 |         self.offset = datetime.timedelta(seconds=0)
 32 |         self.spin_iterations = 10
 33 | 
 34 |     def decorate_io(self, obj):
 35 |         """
 36 |         Decorates the class or function in a way the way that Timemachine actually waits for I/O
 37 |         operations to complete before proceeding with time shifting. From the application's point
 38 |         of view all decorated I/O operations happen instantaneously.
 39 |         """
 40 | 
 41 |         def wrapper(func):
 42 |             async def wrapped(*args, **kwargs):
 43 |                 future = asyncio.Future()
 44 |                 self.io_tasks.append(future)
 45 |                 try:
 46 |                     return await func(*args, **kwargs)
 47 |                 finally:
 48 |                     future.set_result(None)
 49 | 
 50 |             return wrapped
 51 | 
 52 |         if inspect.iscoroutinefunction(obj):
 53 |             return wrapper(obj)
 54 |         else:
 55 |             for name, fn in inspect.getmembers(obj):
 56 |                 if inspect.iscoroutinefunction(fn):
 57 |                     setattr(obj, name, wrapper(fn))
 58 |             return obj
 59 | 
 60 |     def sleep(self, delay, *args, **kwargs):
 61 |         future = asyncio.Future()
 62 |         heapq.heappush(
 63 |             self.callbacks,
 64 |             ScheduledTask(self.offset + datetime.timedelta(seconds=delay), future),
 65 |         )
 66 |         future.add_done_callback(self._remove_cancelled_sleep)
 67 |         logger.debug(f"Registering sleep {id(future)} for {delay}s (resume at T+{self.offset + datetime.timedelta(seconds=delay)})")
 68 |         return future
 69 | 
 70 |     def wait_for(self, fut, timeout, *args, **kwargs):
 71 |         if timeout is None:
 72 |             return fut
 73 | 
 74 |         future = asyncio.Future()
 75 |         fut = asyncio.ensure_future(fut)
 76 |         wait_handle = self.sleep(timeout)
 77 | 
 78 |         def _on_timeout(f):
 79 |             try:
 80 |                 future.set_exception(asyncio.TimeoutError())
 81 |             except asyncio.InvalidStateError:
 82 |                 pass
 83 | 
 84 |         def _on_completion(f):
 85 |             try:
 86 |                 future.set_result(f.result())
 87 |                 wait_handle.cancel()
 88 |             except asyncio.CancelledError:
 89 |                 pass
 90 |             except asyncio.InvalidStateError:
 91 |                 pass
 92 | 
 93 |         wait_handle.add_done_callback(_on_timeout)
 94 |         fut.add_done_callback(_on_completion)
 95 |         return future
 96 | 
 97 |     def perf_counter(self):
 98 |         return self.offset.total_seconds()
 99 | 
100 |     def utcnow(self):
101 |         return self.base_time + self.offset
102 | 
103 |     async def unwind(self):
104 |         """Jumps to "infinity". I.e. continues executing until no more sleeps appear"""
105 |         await self._spin()
106 |         while len(self.callbacks) > 0:
107 |             task = heapq.heappop(self.callbacks)
108 |             self._update_offset(task.execute_at)
109 |             try:
110 |                 task.future.set_result(None)
111 |             except asyncio.InvalidStateError:
112 |                 pass
113 |             await self._spin()
114 | 
115 |     async def step(self, delta=None):
116 |         if delta is None:
117 |             await self._spin()
118 |         else:
119 |             await self.jump_to(self.offset + delta)
120 | 
121 |     async def jump_to(self, offset):
122 |         if offset < self.offset:
123 |             raise ValueError("Cannot go back in time (yet)!")
124 | 
125 |         await self._spin()
126 |         while len(self.callbacks) > 0:
127 |             if self.callbacks[0].execute_at > offset:
128 |                 break
129 |             task = heapq.heappop(self.callbacks)
130 |             self._update_offset(task.execute_at)
131 |             try:
132 |                 task.future.set_result(None)
133 |             except asyncio.InvalidStateError:
134 |                 pass
135 |             await self._spin()
136 |         self._update_offset(offset)
137 | 
138 |     def _remove_cancelled_sleep(self, fut):
139 |         if fut.cancelled:
140 |             try:
141 |                 self.callbacks = [callback for callback in self.callbacks if callback.future is not fut]
142 |                 heapq.heapify(self.callbacks)
143 |                 logger.debug(f"Removed cancelled sleep {id(fut)}")
144 |             except ValueError:
145 |                 pass
146 | 
147 |     async def _spin(self):
148 |         for _ in range(self.spin_iterations):
149 |             # First we wait for any pending I/O futures to complete
150 |             if self.io_tasks:
151 |                 io_tasks = self.io_tasks
152 |                 self.io_tasks = []
153 |                 await asyncio.gather(*io_tasks)
154 | 
155 |             # Then we just jump to the back of the callback queue before completing
156 |             future = asyncio.Future()
157 |             loop = asyncio.get_event_loop()
158 |             loop.call_soon(future.set_result, None)
159 |             await future
160 | 
161 |     def _update_offset(self, new_offset):
162 |         if self.offset != new_offset:
163 |             self.offset = new_offset
164 |             logger.debug(f"Jumped to T+{new_offset.total_seconds()}s")
165 | 


--------------------------------------------------------------------------------