├── .bumpversion.cfg ├── .dockerignore ├── .editorconfig ├── .github └── workflows │ ├── docs.yml │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── settings.json ├── Dockerfile.dev ├── LICENSE ├── README.md ├── codecov.yml ├── docker-compose.yml ├── docker ├── Dockerfile └── entrypoint.py ├── docs ├── advanced_usage.md ├── api-reference.md ├── backends.md ├── changelog.md ├── contributing.md ├── index.md ├── installation.md └── usage.md ├── examples ├── README.md ├── basic.py ├── benchmark.py ├── data │ └── usernames.txt ├── delay_queue.py ├── dynamic_tasks.py ├── fixtures_and_middlewares.py ├── healthcheck.py ├── heartbeat.py ├── persistence.py └── prometheus_metrics.py ├── mkdocs.yml ├── pyproject.toml ├── src └── pyncette │ ├── __init__.py │ ├── dynamodb.py │ ├── errors.py │ ├── executor.py │ ├── healthcheck.py │ ├── model.py │ ├── mysql.py │ ├── postgres.py │ ├── prometheus.py │ ├── py.typed │ ├── pyncette.py │ ├── redis │ ├── __init__.py │ ├── manage.lua │ └── poll_dynamic.lua │ ├── repository.py │ ├── sqlite.py │ ├── task.py │ └── utils.py └── tests ├── conftest.py ├── test_dynamodb.py ├── test_mysql.py ├── test_postgres.py ├── test_pyncette.py ├── test_pyncette_healthcheck.py ├── test_pyncette_integration.py ├── test_pyncette_process.py ├── test_pyncette_prometheus.py ├── test_redis.py ├── test_sqlite.py └── utils ├── fakerepository.py └── timemachine.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:pyproject.toml] 7 | search = version = "{current_version}" 8 | replace = version = "{new_version}" 9 | 10 | [bumpversion:file:README.md] 11 | search = v{current_version}. 12 | replace = v{new_version}. 13 | 14 | [bumpversion:file:src/pyncette/__init__.py] 15 | search = __version__ = "{current_version}" 16 | replace = __version__ = "{new_version}" 17 | 18 | [bumpversion:file:docs/changelog.md] 19 | search = ## Unreleased 20 | replace = ## {new_version} ({now:%Y-%m-%d}) 21 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | **/*.egg-info 3 | **/__pycache__ 4 | **/.mypy_cache 5 | *.pyc 6 | .coverage 7 | htmlcov/ 8 | **/_build/ 9 | .venv/ 10 | .pytest_cache/ 11 | site/ 12 | build/ 13 | dist/ 14 | .vscode/ 15 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # see https://editorconfig.org/ 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | charset = utf-8 11 | 12 | [*.{bat,cmd,ps1}] 13 | end_of_line = crlf 14 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | permissions: 11 | contents: read 12 | pages: write 13 | id-token: write 14 | 15 | concurrency: 16 | group: "pages" 17 | cancel-in-progress: false 18 | 19 | jobs: 20 | build: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@v5 26 | with: 27 | version: "latest" 28 | - name: Set up Python 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: "3.12" 32 | - name: Install dependencies 33 | run: uv sync --extra all --extra dev 34 | - name: Build docs 35 | run: uv run mkdocs build --strict 36 | - name: Upload artifact 37 | if: github.event_name != 'pull_request' 38 | uses: actions/upload-pages-artifact@v3 39 | with: 40 | path: site/ 41 | 42 | deploy: 43 | if: github.event_name != 'pull_request' 44 | environment: 45 | name: github-pages 46 | url: ${{ steps.deployment.outputs.page_url }} 47 | runs-on: ubuntu-latest 48 | needs: build 49 | steps: 50 | - name: Deploy to GitHub Pages 51 | id: deployment 52 | uses: actions/deploy-pages@v4 53 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Install uv 15 | uses: astral-sh/setup-uv@v5 16 | with: 17 | version: "latest" 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.12" 22 | - name: Install dependencies 23 | run: uv sync --extra all --extra dev 24 | - name: Run pre-commit 25 | run: uv run pre-commit run --all-files --show-diff-on-failure 26 | - name: Run ty 27 | run: uv run ty check src examples 28 | 29 | test: 30 | runs-on: ubuntu-latest 31 | strategy: 32 | matrix: 33 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 34 | steps: 35 | - uses: actions/checkout@v4 36 | - name: Install uv 37 | uses: astral-sh/setup-uv@v5 38 | with: 39 | version: "latest" 40 | - name: Set up Python ${{ matrix.python-version }} 41 | uses: actions/setup-python@v5 42 | with: 43 | python-version: ${{ matrix.python-version }} 44 | - name: Install dependencies 45 | run: uv sync --extra all --extra dev 46 | - name: Run unit tests 47 | run: uv run pytest --cov --cov-report=term-missing --cov-report=xml -vv -m "not integration" --backend=default tests 48 | - name: Upload coverage 49 | uses: codecov/codecov-action@v5 50 | with: 51 | token: ${{ secrets.CODECOV_TOKEN }} 52 | files: ./coverage.xml 53 | flags: py${{ matrix.python-version }} 54 | 55 | integration: 56 | runs-on: ubuntu-latest 57 | timeout-minutes: 20 58 | services: 59 | postgres: 60 | image: postgres 61 | env: 62 | POSTGRES_PASSWORD: postgres 63 | POSTGRES_DB: pyncette 64 | options: >- 65 | --health-cmd pg_isready 66 | --health-interval 10s 67 | --health-timeout 5s 68 | --health-retries 5 69 | ports: 70 | - 5432:5432 71 | redis: 72 | image: redis 73 | options: >- 74 | --health-cmd "redis-cli ping" 75 | --health-interval 10s 76 | --health-timeout 5s 77 | --health-retries 5 78 | ports: 79 | - 6379:6379 80 | localstack: 81 | image: localstack/localstack 82 | env: 83 | SERVICES: dynamodb 84 | ports: 85 | - 4566:4566 86 | options: >- 87 | --health-cmd "curl -fso /dev/null http://localhost:4566/_localstack/health" 88 | --health-interval 10s 89 | --health-timeout 5s 90 | --health-retries 5 91 | mysql: 92 | image: mysql 93 | env: 94 | MYSQL_ALLOW_EMPTY_PASSWORD: "1" 95 | MYSQL_DATABASE: pyncette 96 | MYSQL_USER: pyncette 97 | MYSQL_PASSWORD: password 98 | options: >- 99 | --health-cmd "mysqladmin ping --silent" 100 | --health-interval 10s 101 | --health-timeout 5s 102 | --health-retries 5 103 | ports: 104 | - 3306:3306 105 | 106 | steps: 107 | - uses: actions/checkout@v4 108 | - name: Install uv 109 | uses: astral-sh/setup-uv@v5 110 | with: 111 | version: "latest" 112 | - name: Set up Python 113 | uses: actions/setup-python@v5 114 | with: 115 | python-version: "3.12" 116 | - name: Install dependencies 117 | run: uv sync --extra all --extra dev 118 | - name: Run integration tests 119 | env: 120 | POSTGRES_URL: postgres://postgres:postgres@localhost/postgres 121 | AWS_ACCESS_KEY_ID: "foobar" 122 | AWS_SECRET_ACCESS_KEY: "foobar" 123 | run: uv run pytest --cov --cov-report=term-missing --cov-report=xml -vv tests 124 | - name: Upload coverage 125 | uses: codecov/codecov-action@v5 126 | with: 127 | token: ${{ secrets.CODECOV_TOKEN }} 128 | files: ./coverage.xml 129 | flags: integration 130 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using uv and Twine when a release is created 2 | 3 | name: Upload Python Package 4 | 5 | on: 6 | release: 7 | types: [created] 8 | 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Install uv 15 | uses: astral-sh/setup-uv@v5 16 | with: 17 | version: "latest" 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.12' 22 | - name: Build package 23 | run: uv build 24 | - name: Publish to PyPI 25 | env: 26 | UV_PUBLISH_TOKEN: ${{ secrets.PYPI_UPLOAD_TOKEN }} 27 | run: uv publish 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | __pycache__ 3 | 4 | # C extensions 5 | *.so 6 | 7 | # Packages 8 | *.egg 9 | *.egg-info 10 | dist 11 | build 12 | eggs 13 | .eggs 14 | parts 15 | bin 16 | var 17 | sdist 18 | wheelhouse 19 | develop-eggs 20 | .installed.cfg 21 | lib 22 | lib64 23 | venv*/ 24 | pyvenv*/ 25 | pip-wheel-metadata/ 26 | .venv/ 27 | 28 | # Installer logs 29 | pip-log.txt 30 | 31 | # Unit test / coverage reports 32 | .coverage 33 | .coverage.* 34 | .pytest_cache/ 35 | nosetests.xml 36 | coverage.xml 37 | htmlcov 38 | 39 | # Translations 40 | *.mo 41 | 42 | # Mr Developer 43 | .mr.developer.cfg 44 | .project 45 | .pydevproject 46 | .idea 47 | *.iml 48 | *.komodoproject 49 | 50 | # Complexity 51 | output/*.html 52 | output/*/index.html 53 | 54 | # Documentation builds 55 | docs/_build 56 | site/ 57 | 58 | .DS_Store 59 | *~ 60 | .*.sw[po] 61 | .build 62 | .ve 63 | .env 64 | .cache 65 | .pytest 66 | .benchmarks 67 | .bootstrap 68 | .appveyor.token 69 | *.bak 70 | 71 | # Mypy Cache 72 | .mypy_cache/ 73 | 74 | ## VSCode 75 | .vscode/tags 76 | 77 | ## Pyncette 78 | pyncette.db 79 | 80 | uv.lock 81 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # To install the git pre-commit hooks run: 2 | # pre-commit install --install-hooks 3 | # To update the versions: 4 | # pre-commit autoupdate 5 | exclude: '^\.bumpversion\.cfg(/|$)' 6 | repos: 7 | - repo: https://github.com/asottile/pyupgrade 8 | rev: v3.21.0 9 | hooks: 10 | - id: pyupgrade 11 | args: [--py39-plus] 12 | 13 | # Ruff - Fast Python linter and formatter (replaces black, isort, pyupgrade, and many flake8 plugins) 14 | - repo: https://github.com/astral-sh/ruff-pre-commit 15 | rev: v0.14.0 16 | hooks: 17 | # Run the linter 18 | - id: ruff 19 | args: [--fix] 20 | # Run the formatter 21 | - id: ruff-format 22 | 23 | # Pre-commit hooks for file quality 24 | - repo: https://github.com/pre-commit/pre-commit-hooks 25 | rev: v6.0.0 26 | hooks: 27 | - id: trailing-whitespace 28 | exclude_types: [svg] 29 | - id: end-of-file-fixer 30 | exclude_types: [svg] 31 | - id: check-yaml 32 | - id: check-toml 33 | - id: check-json 34 | - id: check-added-large-files 35 | args: [--maxkb=1000] 36 | - id: check-merge-conflict 37 | - id: debug-statements 38 | - id: mixed-line-ending 39 | args: [--fix=lf] 40 | 41 | # Markdown formatting 42 | - repo: https://github.com/executablebooks/mdformat 43 | rev: 0.7.21 44 | hooks: 45 | - id: mdformat 46 | additional_dependencies: 47 | - mdformat-gfm # GitHub Flavored Markdown 48 | - mdformat-black # Format Python code blocks 49 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black", 3 | "restructuredtext.confPath": "${workspaceFolder}/docs", 4 | "python.pythonPath": "/usr/bin/python3" 5 | } 6 | -------------------------------------------------------------------------------- /Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | 4 | RUN apt-get update -y \ 5 | && apt-get install -y software-properties-common 6 | RUN add-apt-repository ppa:deadsnakes/ppa 7 | RUN apt-get update -y \ 8 | && apt-get install -y \ 9 | python3.9 \ 10 | python3.9-distutils \ 11 | python3.10 \ 12 | python3.10-distutils \ 13 | python3.11 \ 14 | python3.11-distutils \ 15 | python3.12 \ 16 | python3.13 \ 17 | python3-pip \ 18 | python3-apt \ 19 | redis-tools \ 20 | postgresql-client \ 21 | mysql-client \ 22 | git \ 23 | curl \ 24 | unzip \ 25 | groff \ 26 | && rm -rf /var/lib/apt/lists/* 27 | 28 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ 29 | && unzip awscliv2.zip \ 30 | && ./aws/install 31 | 32 | # Install uv for package management 33 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh 34 | ENV PATH="/root/.local/bin:$PATH" 35 | 36 | RUN python3 -m pip install awscli-local[ver2] 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019, Tibor Djurica Potpara 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | [![Documentation](https://img.shields.io/badge/docs-github%20pages-blue)](https://tibordp.github.io/pyncette/) 4 | [![Github Actions Build Status](https://github.com/tibordp/pyncette/workflows/Python%20package/badge.svg?branch=master)](https://github.com/tibordp/pyncette/actions?query=branch%3Amaster+workflow%3A%22Python+package%22) 5 | [![Coverage Status](https://codecov.io/gh/tibordp/pyncette/branch/master/graphs/badge.svg?branch=master)](https://codecov.io/github/tibordp/pyncette) 6 | [![PyPI Package latest release](https://img.shields.io/pypi/v/pyncette.svg)](https://pypi.org/project/pyncette) 7 | [![PyPI Wheel](https://img.shields.io/pypi/wheel/pyncette.svg)](https://pypi.org/project/pyncette) 8 | [![Supported versions](https://img.shields.io/pypi/pyversions/pyncette.svg)](https://pypi.org/project/pyncette) 9 | [![Supported implementations](https://img.shields.io/pypi/implementation/pyncette.svg)](https://pypi.org/project/pyncette) 10 | [![Commits since latest release](https://img.shields.io/github/commits-since/tibordp/pyncette/v1.0.0.svg)](https://github.com/tibordp/pyncette/compare/v1.0.0...master) 11 | 12 | A reliable distributed scheduler with pluggable storage backends for Async Python. 13 | 14 | - Free software: MIT license 15 | 16 | ## Installation 17 | 18 | Minimal installation (just SQLite persistence): 19 | 20 | ```bash 21 | pip install pyncette 22 | ``` 23 | 24 | Full installation (all the backends and Prometheus metrics exporter): 25 | 26 | ```bash 27 | pip install pyncette[all] 28 | ``` 29 | 30 | You can also install the in-development version with: 31 | 32 | ```bash 33 | pip install https://github.com/tibordp/pyncette/archive/master.zip 34 | ``` 35 | 36 | ## Documentation 37 | 38 | https://tibordp.github.io/pyncette/ 39 | 40 | ## Usage example 41 | 42 | Simple in-memory scheduler (does not persist state) 43 | 44 | ```python 45 | from pyncette import Pyncette, Context 46 | 47 | app = Pyncette() 48 | 49 | 50 | @app.task(schedule="* * * * *") 51 | async def foo(context: Context): 52 | print("This will run every minute") 53 | 54 | 55 | if __name__ == "__main__": 56 | app.main() 57 | ``` 58 | 59 | Persistent distributed cron using Redis (coordinates execution with parallel instances and survives restarts) 60 | 61 | ```python 62 | from pyncette import Pyncette, Context 63 | from pyncette.redis import redis_repository 64 | 65 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost") 66 | 67 | 68 | @app.task(schedule="* * * * * */10") 69 | async def foo(context: Context): 70 | print("This will run every 10 seconds") 71 | 72 | 73 | if __name__ == "__main__": 74 | app.main() 75 | ``` 76 | 77 | See the `examples` directory for more examples of usage. 78 | 79 | ## Use cases 80 | 81 | Pyncette is designed for reliable (at-least-once or at-most-once) execution of recurring tasks (think cronjobs) whose 82 | lifecycles are managed dynamically, but can work effectively for non-reccuring tasks too. 83 | 84 | Example use cases: 85 | 86 | - You want to perform a database backup every day at noon 87 | - You want a report to be generated daily for your 10M users at the time of their choosing 88 | - You want currency conversion rates to be refreshed every 10 seconds 89 | - You want to allow your users to schedule non-recurring emails to be sent at an arbitrary time in the future 90 | 91 | Pyncette might not be a good fit if: 92 | 93 | - You want your tasks to be scheduled to run (ideally) once as soon as possible. It is doable, but you will be better served by a general purpose reliable queue like RabbitMQ or Amazon SQS. 94 | - You need tasks to execute at sub-second intervals with low jitter. Pyncette coordinates execution on a per task-instance basis and this corrdination can add overhead and jitter. 95 | 96 | ## Supported backends 97 | 98 | Pyncette comes with an implementation for the following backends (used for persistence and coordination) out-of-the-box: 99 | 100 | - SQLite (included) 101 | - Redis (`pip install pyncette[redis]`) 102 | - PostgreSQL (`pip install pyncette[postgres]`) 103 | - MySQL 8.0+ (`pip install pyncette[mysql]`) 104 | - Amazon DynamoDB (`pip install pyncette[dynamodb]`) 105 | 106 | Pyncette imposes few requirements on the underlying datastores, so it can be extended to support other databases or 107 | custom storage formats / integrations with existing systems. For best results, the backend needs to provide: 108 | 109 | - Some sort of serialization mechanism, e.g. traditional transactions, atomic stored procedures or compare-and-swap 110 | - Efficient range queries over a secondary index, which can be eventually consistent 111 | 112 | ## Development 113 | 114 | ### Prerequisites 115 | 116 | Install [uv](https://docs.astral.sh/uv/) for fast package management: 117 | 118 | ```bash 119 | curl -LsSf https://astral.sh/uv/install.sh | sh 120 | ``` 121 | 122 | ### Setup Development Environment 123 | 124 | Sync dependencies and install the package in editable mode: 125 | 126 | ```bash 127 | uv sync --extra all --extra dev 128 | ``` 129 | 130 | ### Running Tests 131 | 132 | **Unit tests** (fast, no external dependencies): 133 | 134 | ```bash 135 | uv run pytest -m "not integration" tests 136 | ``` 137 | 138 | **Integration tests** (requires Redis, PostgreSQL, MySQL, DynamoDB): 139 | 140 | Using Docker Compose to set up all backends: 141 | 142 | ```bash 143 | docker-compose up -d 144 | docker-compose run --rm shell 145 | uv run pytest tests 146 | ``` 147 | 148 | Or manually with services running locally: 149 | 150 | ```bash 151 | uv run pytest tests 152 | ``` 153 | 154 | **Test on specific Python version**: 155 | 156 | ```bash 157 | uv venv --python 3.11 158 | uv sync --extra all --extra dev 159 | uv run pytest tests 160 | ``` 161 | 162 | ### Code Quality 163 | 164 | Run linting and type checking: 165 | 166 | ```bash 167 | uv run pre-commit run --all-files 168 | uv run ty check src examples 169 | ``` 170 | 171 | ### Building Documentation 172 | 173 | ```bash 174 | uv run mkdocs build 175 | # Or serve locally with live reload 176 | uv run mkdocs serve 177 | ``` 178 | 179 | ### Building the Package 180 | 181 | ```bash 182 | uv build 183 | ``` 184 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | patch: 4 | default: 5 | target: auto 6 | threshold: 10% 7 | project: 8 | default: 9 | target: auto 10 | threshold: 5% 11 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | postgres: 3 | image: postgres 4 | restart: always 5 | environment: 6 | POSTGRES_PASSWORD: postgres 7 | POSTGRES_DB: pyncette 8 | ports: 9 | - "5432:5432" 10 | 11 | redis: 12 | image: redis 13 | restart: always 14 | ports: 15 | - "6379:6379" 16 | 17 | localstack: 18 | image: localstack/localstack 19 | ports: 20 | - "4566:4566" 21 | environment: 22 | - SERVICES=dynamodb 23 | 24 | mysql: 25 | image: mysql 26 | ports: 27 | - "3306:3306" 28 | environment: 29 | MYSQL_ALLOW_EMPTY_PASSWORD: "1" 30 | MYSQL_DATABASE: pyncette 31 | MYSQL_USER: pyncette 32 | MYSQL_PASSWORD: password 33 | 34 | shell: 35 | build: 36 | context: . 37 | dockerfile: Dockerfile.dev 38 | command: bash 39 | working_dir: /src 40 | environment: 41 | POSTGRES_URL: "postgres://postgres:postgres@postgres/pyncette" 42 | REDIS_URL: "redis://redis" 43 | DYNAMODB_ENDPOINT: "http://localstack:4566" 44 | LOCALSTACK_HOST: "localstack" 45 | AWS_ACCESS_KEY_ID: "foobar" 46 | AWS_SECRET_ACCESS_KEY: "foobar" 47 | MYSQL_HOST: "mysql" 48 | volumes: 49 | - type: bind 50 | source: . 51 | target: /src 52 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | dumb-init \ 5 | && rm -rf /var/lib/apt/lists/* 6 | 7 | ADD . /opt/pyncette 8 | RUN pip install /opt/pyncette[all] && rm -rf /opt/ 9 | 10 | WORKDIR /pyncette 11 | ADD ./docker/entrypoint.py entrypoint.py 12 | 13 | EXPOSE 9699/tcp 14 | ENV USE_UVLOOP=1 15 | 16 | ENTRYPOINT ["/usr/bin/dumb-init", "--"] 17 | CMD ["python", "entrypoint.py"] 18 | -------------------------------------------------------------------------------- /docker/entrypoint.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from prometheus_client import start_http_server 4 | 5 | from pyncette import Context 6 | from pyncette import Pyncette 7 | from pyncette.healthcheck import use_healthcheck_server 8 | from pyncette.prometheus import use_prometheus 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | app = Pyncette(sqlite_database="pyncette.db") 13 | use_healthcheck_server(app) 14 | use_prometheus(app) 15 | 16 | 17 | @app.task(schedule="* * * * * */2") 18 | async def hello_world(context: Context): 19 | logger.info("Hello, world!") 20 | 21 | 22 | if __name__ == "__main__": 23 | start_http_server(port=9699, addr="0.0.0.0") # noqa: S104 24 | app.main() 25 | -------------------------------------------------------------------------------- /docs/advanced_usage.md: -------------------------------------------------------------------------------- 1 | # Advanced usage 2 | 3 | ## Partitioned dynamic tasks 4 | 5 | Certain backends, like Redis and Amazon DynamoDB have a natural partitioning to them. Generally, when using 6 | dynamic tasks, the task name is used as a partition key. For example, in DynamoDB, each dynamic task instance 7 | is associated with one row/document, but they all share the same partition id. 8 | 9 | Similarly for Redis, each task instance record is stored in its own key, but the index that sets them in order of 10 | next execution is stored in a single key, so a single large task will not benefit from a clustered Redis setup. 11 | 12 | If there is a very large number of dynamic task instances associated with a single task or they are polled 13 | very frequently, this can lead to hot partitions and degraded performance. There can also be limits as to how many 14 | task instances can even be stored in a single partition. For DynamoDB, the limit is 10GB. 15 | 16 | Pyncette supports transparent partitioning of tasks through `partitioned_task` decorator. 17 | 18 | ```python 19 | from pyncette import Pyncette, Context 20 | 21 | app = Pyncette() 22 | 23 | 24 | @app.partitioned_task(partition_count=32) 25 | async def hello(context: Context) -> None: 26 | print(f"Hello {context.args['username']}") 27 | 28 | 29 | async with app.create() as app_context: 30 | await asyncio.gather( 31 | app_context.schedule_task( 32 | hello, "bill_task", schedule="0 * * * *", username="bill" 33 | ), 34 | app_context.schedule_task( 35 | hello, "steve_task", schedule="20 * * * *", username="steve" 36 | ), 37 | app_context.schedule_task( 38 | hello, "john_task", schedule="40 * * * *", username="john" 39 | ), 40 | ) 41 | await app_context.run() 42 | ``` 43 | 44 | This splits the dynamic task into 32 partitions and the task instances are automatically assigned to them based on the hash of the task instance name. 45 | 46 | The default partition selector uses SHA1 hash of the instance name, but a custom selector can be provided: 47 | 48 | ```python 49 | def custom_partition_selector(partition_count: int, task_id: str) -> int: 50 | return ( 51 | hash(task_id) % partition_count 52 | ) # Do not use this, as the hash() is not stable 53 | 54 | 55 | @app.partitioned_task(partition_count=32, partition_selector=custom_partition_selector) 56 | async def hello(context: Context) -> None: 57 | print(f"Hello {context.args['username']}") 58 | ``` 59 | 60 | ### Choosing the partition count 61 | 62 | Care must be taken when selecting a pertition count, as it is not easy to change it later after tasks have already been 63 | scheduled. Changing a partition count will generally map task instances to a different partition, making them not run and also 64 | making it impossible to unschedule them through `unschedule_task`. 65 | 66 | There is also a tradeoff as the time complexity as a single Pyncette poll grows linearly with the total number of tasks (or their 67 | partitions). Setting the number of partitions too high can lead to diminished performance due to the polling overhead. 68 | 69 | It is possible to configure Pyncette to only poll certain partitions using the `enabled_partitions` parameter. This will allow the 70 | tasks to be scheduled and unscheduled by any application instance, but only the partitions selected will be polled. You may use 71 | this if you have a large number of instances for a given task in order to spread the load evenly among them. 72 | 73 | ```python 74 | @app.partitioned_task( 75 | partition_count=8, 76 | # Partitions 4, 5, 6 and 7 will not be polled 77 | enabled_partitions=[0, 1, 2, 3], 78 | ) 79 | async def hello(context: Context) -> None: 80 | print(f"Hello {context.args['username']}") 81 | ``` 82 | -------------------------------------------------------------------------------- /docs/api-reference.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | This page is automatically generated from the Python source code docstrings. 4 | 5 | ::: pyncette 6 | options: 7 | show_submodules: true 8 | -------------------------------------------------------------------------------- /docs/backends.md: -------------------------------------------------------------------------------- 1 | # Backends 2 | 3 | By default Pyncette runs without persistence. This means that the schedule is maintained in-memory and there is no coordination between multiple instances of the app. 4 | 5 | Enabling persistence allows the aplication to recover from restarts as well as the ability to run multiple instances of an app concurrently without duplicate executions of tasks. 6 | 7 | ## SQLite 8 | 9 | SQLite is the default peristence engine and is included in the base Python package. 10 | 11 | ```python 12 | from pyncette import Pyncette, Context 13 | 14 | app = Pyncette(sqlite_database="pyncette.db") 15 | 16 | 17 | @app.task(schedule="* * * * * */10") 18 | async def foo(context: Context): 19 | print("This will run every 10 seconds") 20 | 21 | 22 | if __name__ == "__main__": 23 | app.main() 24 | ``` 25 | 26 | ## Redis 27 | 28 | Redis can be enabled by passing `redis_repository` as `repository_factory` parameter to the `Pyncette` constructor. 29 | 30 | ```python 31 | from pyncette import Pyncette, Context 32 | from pyncette.redis import redis_repository 33 | 34 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost") 35 | ``` 36 | 37 | Optionally, the tasks can be namespaced if the Redis server is shared among different Pyncette apps: 38 | 39 | ```python 40 | app = Pyncette( 41 | repository_factory=redis_repository, 42 | redis_url="redis://localhost", 43 | redis_namespace="my_super_app", 44 | ) 45 | ``` 46 | 47 | ## PostgreSQL 48 | 49 | Redis can be enabled by passing `postgres_repository` as `repository_factory` parameter to the `Pyncette` constructor. 50 | 51 | ```python 52 | from pyncette import Pyncette, Context 53 | from pyncette.postgres import postgres_repository 54 | 55 | app = Pyncette( 56 | repository_factory=postgres_repository, 57 | postgres_url='postgres://postgres@localhost/pyncette' 58 | postgres_table_name='pyncette_tasks' 59 | ) 60 | ``` 61 | 62 | The table will be automatically initialized on startup if it does not exists unless `postgres_skip_table_create` is set to `True`. 63 | 64 | ## MySQL 65 | 66 | MySQL can be configured by passing `mysql_repository` as `repository_factory` parameter to the `Pyncette` constructor. 67 | 68 | The MySQL backend requires MySQL version 8.0+. 69 | 70 | ```python 71 | from pyncette import Pyncette, Context 72 | from pyncette.postgres import mysql_repository 73 | 74 | app = Pyncette( 75 | repository_factory=mysql_repository, 76 | mysql_host="localhost", 77 | mysql_database="pyncette", 78 | mysql_user="pyncette", 79 | mysql_password="password", 80 | mysql_table_name="pyncette_tasks", 81 | ) 82 | ``` 83 | 84 | The table will be automatically initialized on startup if it does not exists unless `mysql_skip_table_create` is set to `True`. 85 | 86 | ## Amazon DynamoDB 87 | 88 | Amazon DynamoDB backend can be configured with `dynamodb_repository`. 89 | 90 | ```python 91 | from pyncette import Pyncette, Context 92 | from pyncette.dynamodb import dynamodb_repository 93 | 94 | app = Pyncette( 95 | repository_factory=dynamodb_repository, 96 | dynamodb_region_name="eu-west-1", 97 | dynamodb_table_name="pyncette", 98 | ) 99 | ``` 100 | 101 | DynamoDB repository will use [ambient credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#guide-credentials), such as environment variables, `~/.aws/config` or EC2 metadata service if e.g. running on EC2 or a Kubernetes cluster with kiam/kube2iam. 102 | 103 | For convenience, an appropriate DynamoDB table will be automatically created on startup if it does not exist. The created table uses on-demand pricing model. If you would like to customize this behavior, you can manually create the table beforehand and pass `dynamodb_skip_table_create=True` in parameters. 104 | 105 | Expected table schema should look something like this 106 | 107 | ```json 108 | { 109 | "AttributeDefinitions": [ 110 | { "AttributeName": "partition_id", "AttributeType": "S" }, 111 | { "AttributeName": "ready_at", "AttributeType": "S" }, 112 | { "AttributeName": "task_id", "AttributeType": "S" } 113 | ], 114 | "KeySchema": [ 115 | { "AttributeName": "partition_id", "KeyType": "HASH" }, 116 | { "AttributeName": "task_id", "KeyType": "RANGE" } 117 | ], 118 | "LocalSecondaryIndexes": [ 119 | { 120 | "IndexName": "ready_at", 121 | "KeySchema": [ 122 | { "AttributeName": "partition_id", "KeyType": "HASH" }, 123 | { "AttributeName": "ready_at", "KeyType": "RANGE" } 124 | ], 125 | "Projection": { 126 | "ProjectionType": "ALL" 127 | } 128 | } 129 | ] 130 | } 131 | ``` 132 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.0.0 (2025-10-17) 4 | 5 | ### Breaking Changes 6 | 7 | - **Dropped Python 3.8 support** - Minimum Python version is now 3.9 8 | 9 | ### Packaging and Tooling Modernization 10 | 11 | - Migrated from `setup.py` to modern PEP 621 `pyproject.toml` with hatchling build backend 12 | - Replaced tox with uv for dependency management and testing across all workflows 13 | - Updated GitHub Actions workflows to use native uv commands (`uv sync`, `uv run`, `uv build`) 14 | - Migrated documentation from Sphinx/reStructuredText to MkDocs Material/Markdown 15 | - Set up automatic API documentation generation with mkdocstrings 16 | - Replaced mypy with ty for type checking 17 | - Consolidated linting/formatting to use Ruff (replacing black, isort, pyupgrade) 18 | - Modernized pre-commit hooks configuration 19 | - Updated Docker development environment to install uv 20 | 21 | ### Bug Fixes 22 | 23 | - Fixed Python 3.14 compatibility: Converted all SQLite SQL queries to use consistent named parameter style (`:name`) instead of mixing PostgreSQL-style (`$1`), qmark (`?`), and named parameters 24 | - Fixed latent bug in `poll_task` where lease comparison failed due to UUID vs string type mismatch 25 | - Fixed bug in `poll_dynamic_task` where optimistic locking was not working due to incorrect parameter binding 26 | 27 | ### Documentation 28 | 29 | - Converted all documentation files from `.rst` to `.md` format 30 | - Updated all development instructions to use uv commands 31 | - Added relevant PyPI keywords for better discoverability 32 | 33 | ## 0.11.0 (2024-11-25) 34 | 35 | - Add support for Python 3.12 and 3.13 36 | 37 | ## 0.10.1 (2023-05-09) 38 | 39 | - Include missing lua files in the built wheel 40 | 41 | ## 0.10.0 (2023-05-08) 42 | 43 | - Drop support for Python 3.7 44 | - Add support for Python 3.11 45 | - Modernize Python package structure and linters 46 | - Fix a few bugs and type annotations 47 | 48 | ## 0.8.1 (2021-04-08) 49 | 50 | - Improve performance for calculation of the next execution time 51 | - Add ability for repositories to pass a pagination token 52 | - Add `add_to_context()` to inject static data to context 53 | - Clean up documentation and add additional examples 54 | 55 | ## 0.8.0 (2021-04-05) 56 | 57 | - Added Amazon DynamoDB backend 58 | - Added MySQL backend 59 | - Added support for partitioned dynamic tasks 60 | 61 | ## 0.7.0 (2021-03-31) 62 | 63 | - Added support for automatic and cooperative lease heartbeating 64 | - PostgreSQL backend can now skip automatic table creation 65 | - Improved signal handling 66 | - CI: Add Codecov integration 67 | - Devenv: Run integration tests in Docker Compose 68 | 69 | ## 0.6.1 (2020-04-02) 70 | 71 | - Optimize the task querying on Postgres backend 72 | - Fix: ensure that there are no name colissions between concrete instances of different dynamic tasks 73 | - Improve fairness of polling tasks under high contention. 74 | 75 | ## 0.6.0 (2020-03-31) 76 | 77 | - Added PostgreSQL backend 78 | - Added Sqlite backend and made it the default (replacing `InMemoryRepository`) 79 | - Refactored test suite to cover all conformance/integration tests on all backends 80 | - Refactored Redis backend, simplifying the Lua scripts and improving exceptional case handling (e.g. tasks disappearing between query and poll) 81 | - Main loop only sleeps for the rest of remaining `poll_interval` before next tick instead of the full amount 82 | - General bug fixes, documentation changes, clean up 83 | 84 | ## 0.5.0 (2020-03-27) 85 | 86 | - Fixes bug where a locked dynamic task could be executed again on next tick. 87 | - poll_task is now reentrant with regards to locking. If the lease passed in matches the lease on the task, it behaves as though it were unlocked. 88 | 89 | ## 0.4.0 (2020-02-16) 90 | 91 | - Middleware support and optional metrics via Prometheus 92 | - Improved the graceful shutdown behavior 93 | - Task instance and application context are now available in the task context 94 | - Breaking change: dynamic task parameters are now accessed via `context.args['name']` instead of `context.name` 95 | - Improved examples, documentation and packaging 96 | 97 | ## 0.2.0 (2020-01-08) 98 | 99 | - Timezone support 100 | - More efficient poling when Redis backend is used 101 | 102 | ## 0.1.1 (2020-01-08) 103 | 104 | - First release that actually works. 105 | 106 | ## 0.0.0 (2019-12-31) 107 | 108 | - First release on PyPI. 109 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every 4 | little bit helps, and credit will always be given. 5 | 6 | ## Bug reports 7 | 8 | When [reporting a bug](https://github.com/tibordp/pyncette/issues) please include: 9 | 10 | - Your operating system name and version. 11 | - Any details about your local setup that might be helpful in troubleshooting. 12 | - Detailed steps to reproduce the bug. 13 | 14 | ## Documentation improvements 15 | 16 | Pyncette could always use more documentation, whether as part of the 17 | official Pyncette docs, in docstrings, or even on the web in blog posts, 18 | articles, and such. 19 | 20 | ## Feature requests and feedback 21 | 22 | The best way to send feedback is to file an issue at https://github.com/tibordp/pyncette/issues. 23 | 24 | If you are proposing a feature: 25 | 26 | - Explain in detail how it would work. 27 | - Keep the scope as narrow as possible, to make it easier to implement. 28 | - Remember that this is a volunteer-driven project, and that code contributions are welcome :) 29 | 30 | ## Development 31 | 32 | To set up `pyncette` for local development: 33 | 34 | 1. Fork [pyncette](https://github.com/tibordp/pyncette) 35 | (look for the "Fork" button). 36 | 37 | 1. Clone your fork locally: 38 | 39 | ```bash 40 | git clone git@github.com:tibordp/pyncette.git 41 | ``` 42 | 43 | 1. Create a branch for local development: 44 | 45 | ```bash 46 | git checkout -b name-of-your-bugfix-or-feature 47 | ``` 48 | 49 | Now you can make your changes locally. 50 | 51 | 1. Set up your development environment: 52 | 53 | ```bash 54 | uv sync --extra all --extra dev 55 | ``` 56 | 57 | 1. Running integration tests assumes that there will be Redis, PostgreSQL, MySQL and Localstack (for DynamoDB) running on localhost. Alternatively, there is a Docker Compose environment that will set up all the backends so that integration tests can run seamlessly: 58 | 59 | ```bash 60 | docker-compose up -d 61 | docker-compose run --rm shell 62 | ``` 63 | 64 | 1. When you're done making changes, run all the checks: 65 | 66 | ```bash 67 | # Run linting and formatting 68 | uv run pre-commit run --all-files 69 | 70 | # Run type checking 71 | uv run ty check src examples 72 | 73 | # Run tests 74 | uv run pytest tests 75 | 76 | # Build documentation 77 | uv run mkdocs build 78 | ``` 79 | 80 | 1. Commit your changes and push your branch to GitHub: 81 | 82 | ```bash 83 | git add . 84 | git commit -m "Your detailed description of your changes." 85 | git push origin name-of-your-bugfix-or-feature 86 | ``` 87 | 88 | 1. Submit a pull request through the GitHub website. 89 | 90 | If you run into issues setting up a local environment or testing the code locally, feel free to submit the PR anyway and GitHub Actions will test it for you. 91 | 92 | ## Pull Request Guidelines 93 | 94 | If you need some code review or feedback while you're developing the code just make the pull request. 95 | 96 | For merging, you should: 97 | 98 | 1. Update documentation when there's new API, functionality etc. 99 | 1. Add a note to `docs/changelog.md` about the changes. 100 | 101 | ## Tips 102 | 103 | To run a subset of tests: 104 | 105 | ```bash 106 | uv run pytest -k test_myfeature 107 | ``` 108 | 109 | To run tests for a specific Python version: 110 | 111 | ```bash 112 | uv venv --python 3.11 113 | uv sync --extra all --extra dev 114 | uv run pytest tests 115 | ``` 116 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Pyncette 2 | 3 | A reliable distributed scheduler with pluggable storage backends for Async Python. 4 | 5 | ## Overview 6 | 7 | Pyncette is designed for reliable (at-least-once or at-most-once) execution of recurring tasks (think cronjobs) whose 8 | lifecycles are managed dynamically, but can work effectively for non-reccuring tasks too. 9 | 10 | Example use cases: 11 | 12 | - You want to perform a database backup every day at noon 13 | - You want a report to be generated daily for your 10M users at the time of their choosing 14 | - You want currency conversion rates to be refreshed every 10 seconds 15 | - You want to allow your users to schedule non-recurring emails to be sent at an arbitrary time in the future 16 | 17 | Pyncette might not be a good fit if: 18 | 19 | - You want your tasks to be scheduled to run (ideally) once as soon as possible. It is doable, but you will be better served by a general purpose reliable queue like RabbitMQ or Amazon SQS. 20 | - You need tasks to execute at sub-second intervals with low jitter. Pyncette coordinates execution on a per task-instance basis and this corrdination can add overhead and jitter. 21 | 22 | ## Quick Start 23 | 24 | Simple in-memory scheduler (does not persist state) 25 | 26 | ```python 27 | from pyncette import Pyncette, Context 28 | 29 | app = Pyncette() 30 | 31 | 32 | @app.task(schedule="* * * * *") 33 | async def foo(context: Context): 34 | print("This will run every minute") 35 | 36 | 37 | if __name__ == "__main__": 38 | app.main() 39 | ``` 40 | 41 | Persistent distributed cron using Redis (coordinates execution with parallel instances and survives restarts) 42 | 43 | ```python 44 | from pyncette import Pyncette, Context 45 | from pyncette.redis import redis_repository 46 | 47 | app = Pyncette(repository_factory=redis_repository, redis_url="redis://localhost") 48 | 49 | 50 | @app.task(schedule="* * * * * */10") 51 | async def foo(context: Context): 52 | print("This will run every 10 seconds") 53 | 54 | 55 | if __name__ == "__main__": 56 | app.main() 57 | ``` 58 | 59 | See the `examples` directory for more examples of usage. 60 | 61 | ## Supported backends 62 | 63 | Pyncette comes with an implementation for the following backends (used for persistence and coordination) out-of-the-box: 64 | 65 | - SQLite (included) 66 | - Redis (`pip install pyncette[redis]`) 67 | - PostgreSQL (`pip install pyncette[postgres]`) 68 | - MySQL 8.0+ (`pip install pyncette[mysql]`) 69 | - Amazon DynamoDB (`pip install pyncette[dynamodb]`) 70 | 71 | Pyncette imposes few requirements on the underlying datastores, so it can be extended to support other databases or 72 | custom storage formats / integrations with existing systems. For best results, the backend needs to provide: 73 | 74 | - Some sort of serialization mechanism, e.g. traditional transactions, atomic stored procedures or compare-and-swap 75 | - Efficient range queries over a secondary index, which can be eventually consistent 76 | 77 | ## Features 78 | 79 | - **Reliable execution**: At-least-once or at-most-once execution guarantees 80 | - **Distributed coordination**: Run multiple instances without duplicate task execution 81 | - **Flexible scheduling**: Cron-like syntax or interval-based scheduling 82 | - **Dynamic tasks**: Register and unregister tasks at runtime 83 | - **Timezone support**: Schedule tasks in different timezones 84 | - **Heartbeating**: Keep long-running tasks alive with cooperative or automatic heartbeating 85 | - **Middleware support**: Add custom logic around task execution 86 | - **Pluggable backends**: SQLite, Redis, PostgreSQL, MySQL, and DynamoDB support 87 | 88 | ## License 89 | 90 | Free software: MIT license 91 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | At the command line: 4 | 5 | ```bash 6 | pip install pyncette 7 | ``` 8 | 9 | For installing with Redis peristence: 10 | 11 | ```bash 12 | pip install pyncette[redis] 13 | ``` 14 | 15 | For installing with MySQL peristence: 16 | 17 | ```bash 18 | pip install pyncette[mysql] 19 | ``` 20 | 21 | For installing with Amazon DynamoDB peristence: 22 | 23 | ```bash 24 | pip install pyncette[dynamodb] 25 | ``` 26 | 27 | For installing with PostgreSQL peristence: 28 | 29 | ```bash 30 | pip install pyncette[postgres] 31 | ``` 32 | 33 | For installing with Prometheus metrics exporter: 34 | 35 | ```bash 36 | pip install pyncette[prometheus] 37 | ``` 38 | 39 | For a full installation with all the extras: 40 | 41 | ```bash 42 | pip install pyncette[all] 43 | ``` 44 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | The core unit of execution in Pyncette is a `Task`. Each task is a Python coroutine that specifies what needs to be executed. 4 | 5 | ```python 6 | from pyncette import Pyncette, Context 7 | 8 | app = Pyncette() 9 | 10 | 11 | @app.task(interval=datetime.timedelta(seconds=2)) 12 | async def successful_task(context: Context) -> None: 13 | print("This will execute every second") 14 | 15 | 16 | if __name__ == "__main__": 17 | app.main() 18 | ``` 19 | 20 | ## Running the main loop 21 | 22 | The usual use case is that Pyncette runs as its own process, so the standard way to start the main loop is with `main` method of the `Pyncette`. This sets up the logging to standard output and signal handler allowing for graceful shutdown (first SIGINT initiates the graceful shutdown and the second one terminates the process). 23 | 24 | If Pyncette is run alongside other code or for customization, `create` can be used to initialize the runtime environment and then the main loop can be run with `run`: 25 | 26 | ```python 27 | import asyncio 28 | from pyncette import Pyncette 29 | 30 | app = Pyncette() 31 | 32 | ... 33 | 34 | async with app.create() as app_context: 35 | await app_context.run() 36 | ``` 37 | 38 | ## Specifying the schedule 39 | 40 | There are two ways a schedule can be specified, one is with the cron-like syntax (uses `croniter` under the hood to support the calculation): 41 | 42 | ```python 43 | @app.task(schedule="* * * * *") 44 | async def every_minute(context: Context): ... 45 | 46 | 47 | @app.task(schedule="* * * * * */10") 48 | async def every_10_seconds(context: Context): ... 49 | 50 | 51 | @app.task(schedule="20 4 * * * *") 52 | async def every_day_at_4_20_am(context: Context): ... 53 | ``` 54 | 55 | The other way is with an interval: 56 | 57 | ```python 58 | @app.task(interval=datetime.timedelta(seconds=12)) 59 | async def every_12_seconds(context: Context): ... 60 | ``` 61 | 62 | ## Customizing tasks 63 | 64 | Pyncette supports multiple different execution modes which provide different levels of reliability guarantees, depending on the nature of the task. 65 | 66 | The default task configuration: 67 | 68 | - When the task is scheduled for execution, it is locked for 60 seconds 69 | - If the task execution succeeds, the next execution is scheduled and the task is unlocked 70 | - If the task execution fails (exception is raised), the lock is not released, so it will be retried after the lease expires. 71 | - If the task execution exceeds the lease duration, it will be executed again (so there could be two executions at the same time) 72 | 73 | ### Best-effort tasks 74 | 75 | If the task is run in a best-effort mode, locking will not be employed, and the next execution will be scheduled immediately when it becomes ready. 76 | 77 | ```python 78 | from pyncette import ExecutionMode 79 | 80 | 81 | @app.task( 82 | interval=datetime.timedelta(seconds=10), execution_mode=ExecutionMode.AT_MOST_ONCE 83 | ) 84 | async def every_10_seconds(context: Context): 85 | print("Ping") 86 | ``` 87 | 88 | !!!caution 89 | If best effort is used, there is no way to retry a failed execution, and exceptions thrown by the task will only be logged. 90 | 91 | ### Failure behavior 92 | 93 | Failure behavior can be specified with `failure_mode` parameter: 94 | 95 | ```python 96 | from pyncette import ExecutionMode 97 | 98 | 99 | @app.task(interval=datetime.timedelta(seconds=10), failure_mode=FailureMode.UNLOCK) 100 | async def every_10_seconds(context: Context): 101 | print("Ping") 102 | ``` 103 | 104 | - `FailureMode.NONE` the task will stay locked until the lease expires. This is the default. 105 | - `FailureMode.UNLOCK` the task will be immediately unlocked if an exception is thrown, so it will be retried on the next tick. 106 | - `FailureMode.COMMIT` treat the exception as a success and schedule the next execution in case the exception is thrown. 107 | 108 | ### Timezone support 109 | 110 | Pyncette is timezone-aware, the timezone for a task can be specified by `timezone` parameter: 111 | 112 | ```python 113 | from pyncette import ExecutionMode 114 | 115 | 116 | @app.task(schedule="0 12 * * *", timezone="Europe/Dublin") 117 | async def task1(context: Context): 118 | print(f"Hello from Dublin!") 119 | 120 | 121 | @app.task(schedule="0 12 * * *", timezone="UTC+12") 122 | async def task2(context: Context): 123 | print(f"Hello from Камча́тка!") 124 | ``` 125 | 126 | The accepted values are all that `dateutil.tz.gettz` accepts. 127 | 128 | ### Disabling a task 129 | 130 | Tasks can be disabled by passing an `enabled=False` in the parameters. This can be used for example 131 | to conditionally enable tasks only on certain instances. 132 | 133 | ```python 134 | @app.task(schedule="* * * * *", enabled=False) 135 | async def task1(context: Context): 136 | print(f"This will never run.") 137 | ``` 138 | 139 | Tasks can be disabled also in the initialization code: 140 | 141 | ```python 142 | from pyncette import Pyncette, Context 143 | 144 | app = Pyncette() 145 | 146 | 147 | @app.task(schedule="* * * * *") 148 | async def task1(context: Context): 149 | print(f"This will never run.") 150 | 151 | 152 | async with app.create() as app_context: 153 | task1.enabled = False 154 | await app_context.run() 155 | ``` 156 | 157 | ### Task parameters 158 | 159 | The `task` decorator accepts an arbitrary number of additional parameters, which are available through the `context` parameter 160 | 161 | ```python 162 | from pyncette import ExecutionMode 163 | 164 | 165 | # If we use multiple decorators on the same coroutine, we must explicitely provide the name 166 | @app.task(name="task1", interval=datetime.timedelta(seconds=10), username="abra") 167 | @app.task(name="task2", interval=datetime.timedelta(seconds=20), username="kadabra") 168 | @app.task(name="task3", interval=datetime.timedelta(seconds=30), username="alakazam") 169 | async def task(context: Context): 170 | print(f"{context.args['username']}") 171 | ``` 172 | 173 | This allows for parametrized tasks with multiple decorators, this is an essential feature needed to support dynamic tasks. 174 | 175 | !!!note 176 | There is a restriction that all the values of the parameters must be JSON-serializable, since they are persisted in storage when dynamic tasks are used. 177 | 178 | ## Middlewares 179 | 180 | If you have common logic that should execute around every task invocation, middlewares can be used. Good examples of middlewares are ones used for logging and metrics. 181 | 182 | ```python 183 | app = Pyncette() 184 | 185 | 186 | @app.middleware 187 | async def retry(context: Context, next: Callable[[], Awaitable[None]]): 188 | # Example only, prefer to rely on Pyncette to drive task retry logic 189 | for _ in range(5): 190 | try: 191 | await next() 192 | return 193 | except Exception as e: 194 | pass 195 | raise Exception(f"Task {context.task.name} failed too many times.") 196 | 197 | 198 | @app.middleware 199 | async def logging(context: Context, next: Callable[[], Awaitable[None]]): 200 | logger.info(f"Task {context.task.name} started") 201 | try: 202 | await next() 203 | except Exception as e: 204 | logger.error(f"Task {context.task.name} failed", e) 205 | raise 206 | 207 | 208 | @app.middleware 209 | async def db_transaction(context: Context, next: Callable[[], Awaitable[None]]): 210 | context.db.begin_transaction() 211 | try: 212 | await next() 213 | except Exception: 214 | context.db.rollback() 215 | raise 216 | else: 217 | context.db.commit() 218 | ``` 219 | 220 | Middlewares execute in order they are defined. 221 | 222 | ## Fixtures 223 | 224 | Fixtures provide a convenient way for injecting dependencies into tasks, and specifying the set-up and tear-down code. They can be though of as application-level middlewares. For example, let's say we want to inject the database and a logfile as dependencies to all our tasks: 225 | 226 | ```python 227 | app = Pyncette() 228 | 229 | 230 | @app.fixture() 231 | async def db(app_context: PyncetteContext): 232 | db = await database.connect(...) 233 | try: 234 | yield db 235 | finally: 236 | await db.close() 237 | 238 | 239 | @app.fixture(name="super_log_file") 240 | async def logfile(app_context: PyncetteContext): 241 | with open("log.txt", "a") as file: 242 | yield file 243 | 244 | 245 | @app.task(interval=datetime.timedelta(seconds=2)) 246 | async def successful_task(context: Context) -> None: 247 | context.super_log_file.write("Querying the database") 248 | results = await context.db.query(...) 249 | ... 250 | ``` 251 | 252 | The lifetime of a fixture is that of a Pyncette application, i.e. the setup code for all fixtures runs before the first tick and the tear-down code runs after the graceful shutdown is initiated and all the pending tasks have finished. Like middlewares, fixtures execute in the order they are defined (and in reverse order on shutdown). 253 | 254 | ## Persistence 255 | 256 | By default Pyncette runs without persistence. This means that the schedule is mainteined in-memory and there is no coordination between multiple instances of the app. 257 | 258 | Enabling persistence allows the aplication to recover from restarts as well as the ability to run multiple instances of an app concurrently without duplicate executions of tasks. 259 | 260 | See [Backends](backends.md) for instructions on how to configure persistence for a database of your choice. 261 | 262 | ## Heartbeating 263 | 264 | If have tasks that have an unpredictable run time, it can be hard to come up with an appropriate lease duration in advance. If set too short, lease will expire, leading to duplicate task execution and if too long, there can be insufficient protection against unhealthy workers. 265 | 266 | A way to mitigate is to use heartbeating. Heartbeating will periodically extend the lease on the task as long as task is still running. Pyncette supports two approaches to heartbeating: 267 | 268 | - Cooperative heartbeating: your task periodically calls `context.heartbeat()` to extend the lease 269 | - Automatic heartbeating: your task is decorated with `with_heartbeat` and it heartbeats automatically in the background for as long as the task is executing. 270 | 271 | Beware that automatic heartbeating can potentially be dangerous if, for example, your task is stuck in an infinite loop or an I/O operation that does not have a proper time out. In this case the lease can be kept alive indefinitely and the task will not make any progress. Cooperative heartbeating may be more verbose, but offers a greater degree of control. 272 | 273 | If `context.heartbeat()` is called when the lease is already lost, the call will raise `LeaseLostException`, allowing you to bail out early, since another instance is likely already processing the same task. 274 | 275 | ```python 276 | from pyncette.utils import with_heartbeat 277 | 278 | 279 | @app.task(schedule="* * * * * */10") 280 | @with_heartbeat() 281 | async def foo(context: Context): 282 | # The task will be kept alive by the heartbeat 283 | await asyncio.sleep(3600) 284 | 285 | 286 | if __name__ == "__main__": 287 | app.main() 288 | ``` 289 | 290 | ## Dynamic tasks 291 | 292 | Pyncette supports a use case where the tasks are not necessarily known in advance with `schedule_task`. 293 | 294 | ```python 295 | @app.dynamic_task() 296 | async def hello(context: Context) -> None: 297 | print(f"Hello {context.args['username']}") 298 | 299 | 300 | async with app.create() as app_context: 301 | await asyncio.gather( 302 | app_context.schedule_task( 303 | hello, "bill_task", schedule="0 * * * *", username="bill" 304 | ), 305 | app_context.schedule_task( 306 | hello, "steve_task", schedule="20 * * * *", username="steve" 307 | ), 308 | app_context.schedule_task( 309 | hello, "john_task", schedule="40 * * * *", username="john" 310 | ), 311 | ) 312 | await app_context.run() 313 | ``` 314 | 315 | When persistence is used, the schedules and task parameters of the are persisted alongside the execution data, which allows the tasks to be registered and unregistered at will. 316 | 317 | An example use case is a web application where every user can have something happen at their chosen schedule. Polling is efficient, since the concrete instances of the dynamic class are only loaded from the storage if the are already due, instead of being polled all the time. 318 | 319 | The task instances can be removed by `unschedule_task` 320 | 321 | ```python 322 | ... 323 | 324 | async with app.create() as app_context: 325 | await app_context.schedule_task( 326 | hello, "bill_task", schedule="0 * * * *", username="bill" 327 | ) 328 | await app_context.unschedule_task(hello, "bill_task") 329 | await app_context.run() 330 | ``` 331 | 332 | !!!note 333 | If the number of dynamic tasks is large, it is a good idea to limit the batch size: 334 | 335 | ```` 336 | ```python 337 | app = Pyncette( 338 | repository_factory=redis_repository, 339 | redis_url='redis://localhost', 340 | batch_size=10 341 | ) 342 | ``` 343 | 344 | This will cause that only a specified number of dynamic tasks are scheduled for execution during a single tick, as well as allow potential multiple instances of the same app to load balance effectively. 345 | ```` 346 | 347 | ## Once-off dynamic tasks 348 | 349 | Dynamic tasks can also be scheduled to execute only once at a specific date. 350 | 351 | ```python 352 | @app.dynamic_task() 353 | async def task(context: Context) -> None: 354 | print(f"Hello {context.task.name}!") 355 | 356 | 357 | async with app.create() as app_context: 358 | await app_context.schedule_task( 359 | task, "y2k38", execute_at=datetime(2038, 1, 19, 3, 14, 7) 360 | ) 361 | await app_context.schedule_task( 362 | task, "tomorrow", execute_at=datetime.now() + timedelta(days=1) 363 | ) 364 | 365 | # This will execute once immediately, since it is already overdue 366 | await app_context.schedule_task( 367 | task, "overdue", execute_at=datetime.now() - timedelta(days=1) 368 | ) 369 | await app_context.run() 370 | ``` 371 | 372 | Once-off tasks have the same reliability guarantees as recurrent tasks, which is controlled by `execution_mode` and `failure_mode` parameters, but in case of success, they will not be scheduled again. 373 | 374 | ## Performance 375 | 376 | Tasks are executed in parallel. If you have a lot of long running tasks, you can set `concurrency_limit` in `Pyncette` constructor, as this ensures that there are at most that many executing tasks at any given time. If there are no free slots in the semaphore, this will serve as a back-pressure and ensure that we don't poll additional tasks until some of the currently executing ones finish, enabling the pending tasks to be scheduled on other instances of your app. Setting `concurrency_limit` to 1 is equivalent of serializing the execution of all the tasks. 377 | 378 | Depending on the backend used, having a dynamic task with a very large number of instances can lead to diminished performance. See [Advanced Usage](advanced_usage.md) for a way to address this issue. 379 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## [basic.py](./basic.py) 4 | 5 | Hello world example. 6 | 7 | ## [persistence.py](./persistence.py) 8 | 9 | This example stores the state of the scheduler in a variety of backends supported by Pyncette 10 | 11 | By having a persistent backend, you can run multiple multiple processes and they will coordinate 12 | execution among them, making sure that tasks are only executed by one of them on schedule. 13 | 14 | ## [dynamic_tasks.py](./dynamic_tasks.py) 15 | 16 | This example illustrates dynamic tasks i.e. tasks that are not pre-defined in code and 17 | can be scheduled at runtime. 18 | 19 | Marking the function with `@app.dynamic_task` serves as a template and individual task 20 | instances can be scheduled with `schedule_task` (and unscheduled with `unschedule_task`). 21 | 22 | Using a persistent backend, Pyncette supports efficient execution of a large number of 23 | dynamic task instances. 24 | 25 | ## [delay_queue.py](./delay_queue.py) 26 | 27 | This example uses Pyncette to implement a reliable delay queue (persistence is needed for durability 28 | or for running multiple instances of the app at the same time, see [examples/persistence.py](./persistence.py) for details) 29 | 30 | After the task instance suceeds it will not be scheduled again as with recurrent tasks, however, 31 | if an exception is raised, it will be retried if `ExecutionMode.AT_LEAST_ONCE` is used. 32 | 33 | ## [fixtures_and_middlewares.py](./fixtures_and_middlewares.py) 34 | 35 | This example illustrates the use of fixtures and middlewares. 36 | 37 | Middlewares are functions that wrap the execution of every defined task, so they are a good 38 | place to put cross-cutting concerns such as logging, database session management, metrics, ... 39 | 40 | Fixtures can be thought of application-level middlewares. They wrap the lifecycle of the entire 41 | Pyncette app and can be used to perform initialization, cleanup and can inject resources such as 42 | service clients to the task context. 43 | 44 | ## [healthcheck.py](./healthcheck.py) 45 | 46 | This example illustrates the use of healthcheck HTTP server. It exposes the /health endpoint 47 | which returns 200 if last successfull poll was less than 2 poll intervals ago, 500 otherwise. 48 | 49 | ``` 50 | curl localhost:8080/health 51 | ``` 52 | 53 | ## [heartbeat.py](./heartbeat.py) 54 | 55 | This example demonstrates the heartbeating functionality, which allows for the lease on the 56 | task to be extended. This can be useful if tasks have an unpredictable run time to minimize 57 | the risk of another instance taking over the lease. 58 | 59 | Heartbeating can be either cooperative or automatic. 60 | 61 | ## [prometheus_metrics.py](./prometheus_metrics.py) 62 | 63 | Pyncette ships with an optional Prometheus instrumentation based on the official prometheus_client 64 | Python package. It includes the following metrics: 65 | 66 | - Tick duration [Histogram] 67 | - Tick volume [Counter] 68 | - Tick failures [Counter] 69 | - Number of currently executing ticks [Gauge] 70 | - Task duration [Histogram] 71 | - Task volume [Counter] 72 | - Task failures [Counter] 73 | - Number of currently executing tasks [Gauge] 74 | - Task run staleness (i.e. how far behind the scheduled time the actual executions are) [Histogram] 75 | - Repository operation duration [Histogram] 76 | - Repository operation volume [Counter] 77 | - Repository operation volume [Failures] 78 | - Number of currently repository operations [Gauge] 79 | 80 | It pushes the metrics to default registry (`prometheus_client.REGISTRY`), so it can be combined with other 81 | code alongside it. 82 | 83 | To see the exported metrics while running this example, use something like 84 | 85 | ``` 86 | curl localhost:9699/metrics 87 | ``` 88 | 89 | ## [benchmark.py](./benchmark.py) 90 | 91 | This example schedules a large number of dynamic tasks and then runs them (in multiple processes) as a way 92 | to gauge the total throughput of Pyncette for a particular backend. 93 | 94 | To run this example, configure the selected backend in the Pyncette constructor, then run populate the database. 95 | 96 | ``` 97 | python examples/benchmark.py populate -n 98 | ``` 99 | 100 | While the tasks are populating you can run 101 | 102 | ``` 103 | python examples/benchmark.py run --processes <# of processes> 104 | ``` 105 | 106 | The process will continuously print the overall throughput (task executions per second) and the lag (seconds since the last successful tick). 107 | -------------------------------------------------------------------------------- /examples/basic.py: -------------------------------------------------------------------------------- 1 | from pyncette import Context 2 | from pyncette import Pyncette 3 | 4 | app = Pyncette() 5 | 6 | 7 | @app.task(schedule="* * * * * */5") 8 | async def hello_world(context: Context) -> None: 9 | print("Hello world!") 10 | 11 | 12 | if __name__ == "__main__": 13 | app.main() 14 | -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example schedules a large number of dynamic tasks and then runs them (in multiple processes) 3 | as a way to gauge the total throughput of Pyncette for a particular backend. 4 | 5 | To run this example, configure the selected backend in the Pyncette constructor, then run populate the database. 6 | 7 | python examples/benchmark.py populate -n 8 | 9 | While the tasks are populating you can run 10 | 11 | python examples/benchmark.py run --processes <# of processes> 12 | 13 | The process will continuously print the overall throughput (task executions per second) and the lag 14 | (seconds since the last successful tick). 15 | """ 16 | 17 | import argparse 18 | import asyncio 19 | import datetime 20 | import logging 21 | import random 22 | import time 23 | import uuid 24 | from multiprocessing import Process 25 | from multiprocessing.sharedctypes import RawValue # type: ignore 26 | from typing import Any 27 | from typing import Optional 28 | 29 | import coloredlogs 30 | 31 | from pyncette import Context 32 | from pyncette import ExecutionMode 33 | from pyncette import Pyncette 34 | from pyncette.redis import redis_repository 35 | 36 | logger = logging.getLogger(__name__) 37 | 38 | # Adjust the values below 39 | app = Pyncette( 40 | repository_factory=redis_repository, 41 | redis_url="redis://localhost", 42 | redis_namespace="benchmark", 43 | batch_size=100, 44 | ) 45 | 46 | PARTITION_COUNT = 32 47 | 48 | 49 | @app.partitioned_task(partition_count=PARTITION_COUNT, execution_mode=ExecutionMode.AT_LEAST_ONCE) 50 | async def benchmark_task(context: Context) -> None: 51 | context.hit_count.value += 1 52 | if context.app_context.last_tick is not None: 53 | context.staleness.value = (datetime.datetime.now(datetime.timezone.utc) - context.app_context.last_tick).total_seconds() 54 | 55 | 56 | async def populate(n: int, parallel: int) -> None: 57 | """Populates the database with n instances of the dynamic tasks""" 58 | 59 | async with app.create() as app_context: 60 | tasks = [] 61 | for i in range(n): 62 | interval = datetime.timedelta(seconds=random.randrange(10, 3600)) 63 | tasks.append(app_context.schedule_task(benchmark_task, str(uuid.uuid4()), interval=interval)) 64 | 65 | if len(tasks) == parallel: 66 | await asyncio.gather(*tasks) 67 | tasks = [] 68 | 69 | if (i + 1) % 1000 == 0: 70 | logger.info(f"Scheduled {i + 1} tasks") 71 | 72 | await asyncio.gather(*tasks) 73 | logger.info("DONE!") 74 | 75 | 76 | async def run( 77 | hit_count: Any, 78 | staleness: Any, 79 | enabled_partitions: Optional[list[int]], 80 | ) -> None: 81 | async with app.create() as app_context: 82 | app_context.add_to_context("hit_count", hit_count) 83 | app_context.add_to_context("staleness", staleness) 84 | benchmark_task.enabled_partitions = enabled_partitions 85 | 86 | logger.info(f"Starting to poll following partitions {enabled_partitions}") 87 | await app_context.run() 88 | 89 | 90 | def _run(log_level: str, *args: Any, **kwargs: Any) -> None: 91 | # On Windows we need to setup logging again as forking is not supported 92 | setup(log_level) 93 | asyncio.run(run(*args, **kwargs)) 94 | 95 | 96 | def setup(log_level: str) -> None: 97 | # Make sure that this module logger always logs no matter what 98 | # the selected level is. 99 | coloredlogs.install(level="DEBUG", milliseconds=True) 100 | logging.getLogger().setLevel(log_level) 101 | logger.setLevel("INFO") 102 | 103 | try: 104 | import uvloop 105 | 106 | uvloop.install() 107 | except ImportError: 108 | logger.info("uvloop is not available, ignoring.") 109 | 110 | 111 | async def report( 112 | hit_counts: list[Any], 113 | stalenesses: list[Any], 114 | ) -> None: 115 | previous_hit_count = 0 116 | previous_sample = time.perf_counter() 117 | 118 | while True: 119 | await asyncio.sleep(5) 120 | 121 | hit_count = sum(c.value for c in hit_counts) 122 | staleness = max(c.value for c in stalenesses) 123 | now = time.perf_counter() 124 | 125 | logger.info(f"{(hit_count - previous_hit_count) / (now - previous_sample):10.2f} RPS, Staleness {staleness:.2f}s") 126 | 127 | previous_hit_count = hit_count 128 | previous_sample = now 129 | 130 | 131 | if __name__ == "__main__": 132 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 133 | parser.add_argument("--log-level", default="WARNING") 134 | subparsers = parser.add_subparsers(dest="command", required=True) 135 | 136 | populate_option = subparsers.add_parser("populate", help="Populate the backend with a large number of tasks") 137 | populate_option.add_argument("-n", "--number", type=int, default=10000, help="Number of tasks to insert") 138 | populate_option.add_argument( 139 | "-p", 140 | "--parallelism", 141 | type=int, 142 | default=50, 143 | help="How many tasks to insert in parallel", 144 | ) 145 | run_option = subparsers.add_parser("run", help="Run the Pyncette app") 146 | run_option.add_argument("--processes", type=int, default=1, help="Number of processes to run") 147 | run_option.add_argument( 148 | "--partition-count", 149 | type=int, 150 | default=PARTITION_COUNT, 151 | help="How many partitions each process should poll", 152 | ) 153 | 154 | options = parser.parse_args() 155 | setup(options.log_level) 156 | 157 | if options.command == "run": 158 | hit_count = [RawValue("l", 0) for _ in range(options.processes)] 159 | staleness = [RawValue("f", 0) for _ in range(options.processes)] 160 | 161 | if options.partition_count * options.processes < PARTITION_COUNT: 162 | logger.warning(f"partition_count * processes < {PARTITION_COUNT}. Not all partitions will be processed.") 163 | 164 | for i in range(options.processes): 165 | enabled_partitions = sorted((i * options.partition_count + j) % PARTITION_COUNT for j in range(options.partition_count)) 166 | 167 | job = Process( 168 | target=_run, 169 | name=str(i), 170 | args=( 171 | options.log_level, 172 | hit_count[i], 173 | staleness[i], 174 | list(enabled_partitions), 175 | ), 176 | ) 177 | job.start() 178 | 179 | asyncio.run(report(hit_count, staleness)) 180 | 181 | elif options.command == "populate": 182 | asyncio.run(populate(options.number, options.parallelism)) 183 | -------------------------------------------------------------------------------- /examples/data/usernames.txt: -------------------------------------------------------------------------------- 1 | Alice 2 | Carol 3 | Chuck 4 | Craig 5 | Dan 6 | Erin 7 | Eve 8 | Faythe 9 | Frank 10 | Grace 11 | Heidi 12 | Ivan 13 | Judy 14 | Mallory 15 | Michael 16 | Niaj 17 | Olivia 18 | Oscar 19 | Peggy 20 | Rupert 21 | Sybil 22 | Trent 23 | Trudy 24 | Victor 25 | Walter 26 | Wend 27 | -------------------------------------------------------------------------------- /examples/delay_queue.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This example uses Pyncette to implement a reliable delay queue (persistence is needed for durability 4 | or for running multiple instances of the app at the same time, see examples/persistence.py for details) 5 | 6 | After the task instance suceeds it will not be scheduled again as with recurrent tasks, however, 7 | if an exception is raised, it will be retried if ExecutionMode.AT_LEAST_ONCE is used. 8 | 9 | """ 10 | 11 | import datetime 12 | import logging 13 | import random 14 | import uuid 15 | 16 | from pyncette import Context 17 | from pyncette import ExecutionMode 18 | from pyncette import Pyncette 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | app = Pyncette() 23 | 24 | 25 | @app.dynamic_task(execution_mode=ExecutionMode.AT_LEAST_ONCE) 26 | async def execute_once_reliable(context: Context) -> None: 27 | logger.info( 28 | f"I am {context.args['username']}. If I fail, I will be retried, otherwise I will never be seen again." 29 | f"(I was scheduled to run at {context.scheduled_at})" 30 | ) 31 | 32 | if random.choice([True, False]): 33 | raise Exception("Oops") 34 | 35 | 36 | @app.dynamic_task(execution_mode=ExecutionMode.AT_MOST_ONCE) 37 | async def execute_once_best_effort(context: Context) -> None: 38 | logger.info(f"I am {context.args['username']}. I will never be seen again (I was scheduled to run at {context.scheduled_at})") 39 | 40 | if random.choice([True, False]): 41 | raise Exception("Oops") 42 | 43 | 44 | @app.task(interval=datetime.timedelta(seconds=2)) 45 | async def enqueue_periodically(context: Context) -> None: 46 | execute_at = context.scheduled_at + datetime.timedelta(seconds=random.randint(1, 5)) 47 | 48 | await context.app_context.schedule_task( 49 | execute_once_reliable, 50 | str(uuid.uuid4()), 51 | execute_at=execute_at, 52 | username=random.choice(["Alice", "Bob", "Charlie", "Dave", "Eve"]), 53 | ) 54 | 55 | await context.app_context.schedule_task( 56 | execute_once_best_effort, 57 | str(uuid.uuid4()), 58 | execute_at=execute_at, 59 | username=random.choice(["Alice", "Bob", "Charlie", "Dave", "Eve"]), 60 | ) 61 | 62 | 63 | if __name__ == "__main__": 64 | app.main() 65 | -------------------------------------------------------------------------------- /examples/dynamic_tasks.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example illustrates dynamic tasks i.e. tasks that are not pre-defined in code and 3 | can be scheduled at runtime. 4 | 5 | Marking the function with @app.dynamic_task serves as a template and individual task 6 | instances can be scheduled with schedule_task (and unscheduled with unschedule_task). 7 | 8 | Using a persistent backend, Pyncette supports efficient execution of a large number of 9 | dynamic task instances. 10 | 11 | """ 12 | 13 | import asyncio 14 | import datetime 15 | import logging 16 | import pathlib 17 | import random 18 | import sys 19 | 20 | import coloredlogs 21 | 22 | from pyncette import Context 23 | from pyncette import ExecutionMode 24 | from pyncette import Pyncette 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | app = Pyncette(poll_interval=datetime.timedelta(seconds=0.1)) 29 | 30 | 31 | @app.dynamic_task(execution_mode=ExecutionMode.AT_MOST_ONCE) 32 | async def greeter(context: Context) -> None: 33 | logger.info(f"Hello from {context.args['username']}.") 34 | 35 | if random.random() < 0.2: 36 | # 1/5 chance that the task will unschedule itself. If this 37 | # example is run for long enough, no tasks should be left. 38 | logger.warning(f"Unscheduling {context.args['username']}") 39 | await context.app_context.unschedule_task(context.task) 40 | 41 | 42 | async def main() -> None: 43 | async with app.create() as ctx: 44 | with (pathlib.Path(sys.path[0]) / "data" / "usernames.txt").open() as f: 45 | usernames = f.read().splitlines() 46 | 47 | for username in usernames: 48 | interval = datetime.timedelta(seconds=random.uniform(5, 20)) 49 | logger.info(f"Scheduling {username} to run every {interval}") 50 | await ctx.schedule_task( 51 | greeter, 52 | # Mandatory unique name for the task instance 53 | username, 54 | interval=interval, 55 | # All the extra parameters will be available to the 56 | # the task in context.args 57 | username=username, 58 | ) 59 | 60 | await ctx.run() 61 | 62 | 63 | if __name__ == "__main__": 64 | coloredlogs.install(level="INFO", milliseconds=True, logger=logger) 65 | asyncio.run(main()) 66 | -------------------------------------------------------------------------------- /examples/fixtures_and_middlewares.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This example illustrates the use of fixtures and middlewares. 4 | 5 | Middlewares are functions that wrap the execution of every defined task, so they are a good 6 | place to put cross-cutting concerns such as logging, database session management, metrics, ... 7 | 8 | Fixtures can be thought of application-level middlewares. They wrap the lifecycle of the entire 9 | Pyncette app and can be used to perform initialization, cleanup and can inject resources such as 10 | service clients to the task context. 11 | 12 | """ 13 | 14 | import asyncio 15 | import logging 16 | import pathlib 17 | import random 18 | import time 19 | from collections.abc import AsyncIterator 20 | from typing import TextIO 21 | 22 | from pyncette import Context 23 | from pyncette import Pyncette 24 | from pyncette import PyncetteContext 25 | from pyncette.model import NextFunc 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | app = Pyncette() 30 | 31 | 32 | @app.fixture(name="log") 33 | async def logfile_fixture(app_context: PyncetteContext) -> AsyncIterator[TextIO]: 34 | logger.info("Using log file logfile.txt") 35 | 36 | with pathlib.Path("./logfile.txt").open("a") as f: 37 | # Yielding from fixture gives an object that will be available in 38 | # context. for all tasks (and middlewares) 39 | yield f 40 | 41 | # This will run on graceful shutdown of the Pyncette app 42 | logger.info("Log file closed") 43 | 44 | 45 | @app.middleware 46 | async def timer_middleware(context: Context, next: NextFunc) -> None: 47 | start_time = time.time() 48 | try: 49 | await next() 50 | finally: 51 | duration = time.time() - start_time 52 | print(f"Task {context.task.name} took {duration:,.2}s.", file=context.log) 53 | context.log.flush() 54 | 55 | 56 | @app.task(schedule="* * * * * */2") 57 | async def slow_task(context: Context) -> None: 58 | await asyncio.sleep(random.uniform(0, 1)) 59 | 60 | 61 | @app.task(schedule="* * * * * */2") 62 | async def fast_task(context: Context) -> None: 63 | pass 64 | 65 | 66 | if __name__ == "__main__": 67 | app.main() 68 | -------------------------------------------------------------------------------- /examples/healthcheck.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This example illustrates the use of healthcheck HTTP server. It exposes the /health endpoint 4 | which returns 200 if last successfull poll was less than 2 poll intervals ago, 500 otherwise. 5 | 6 | curl localhost:8080/health 7 | 8 | """ 9 | 10 | import asyncio 11 | import logging 12 | import random 13 | 14 | from pyncette import Context 15 | from pyncette import Pyncette 16 | from pyncette.executor import SynchronousExecutor 17 | from pyncette.healthcheck import use_healthcheck_server 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | # We use the SynchronousExecutor so long-running tasks will delay 22 | # cause polling to stall and simulate unhealthiness. 23 | app = Pyncette(executor_cls=SynchronousExecutor) 24 | use_healthcheck_server(app, port=8080) 25 | 26 | 27 | @app.task(schedule="* * * * * */2") 28 | async def hello_world(context: Context) -> None: 29 | if random.choice([True, False]): 30 | await asyncio.sleep(4) 31 | logger.info("Hello, world!") 32 | 33 | 34 | if __name__ == "__main__": 35 | app.main() 36 | -------------------------------------------------------------------------------- /examples/heartbeat.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This example demonstrates the heartbeating functionality, which allows for the lease on the 4 | task to be extended. This can be useful if tasks have an unpredictable run time to minimize 5 | the risk of another instance taking over the lease. 6 | 7 | Heartbeating can be either cooperative or automatic. 8 | 9 | """ 10 | 11 | import asyncio 12 | import datetime 13 | import logging 14 | 15 | from pyncette import Context 16 | from pyncette import Pyncette 17 | from pyncette.utils import with_heartbeat 18 | 19 | logger = logging.getLogger(__name__) 20 | app = Pyncette() 21 | 22 | 23 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2)) 24 | async def cooperative_heartbeat(context: Context) -> None: 25 | logger.info("Hello, world!") 26 | for _ in range(5): 27 | await asyncio.sleep(1) 28 | await context.heartbeat() 29 | logger.info("Goodbye, world!") 30 | 31 | 32 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2)) 33 | async def cooperative_heartbeat_lease_expired(context: Context) -> None: 34 | logger.info("Hello, world!") 35 | await asyncio.sleep(3) 36 | # This will raise an exception as we no longer have lease at this point 37 | await context.heartbeat() 38 | logger.info("Goodbye, world!") 39 | 40 | 41 | @app.task(schedule="* * * * * */2", lease_duration=datetime.timedelta(seconds=2)) 42 | @with_heartbeat() 43 | async def automatic_heartbeat(context: Context) -> None: 44 | """ 45 | Tasks decorated with with_heartbeat will automatically heartbeat in background 46 | whenever we have less than 1/2 of the time remaining on the lease 47 | """ 48 | 49 | logger.info("Hello, world!") 50 | await asyncio.sleep(5) 51 | logger.info("Goodbye, world!") 52 | 53 | 54 | if __name__ == "__main__": 55 | app.main() 56 | -------------------------------------------------------------------------------- /examples/persistence.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This example stores the state of the scheduler in a variety of backends supported by Pyncette 4 | 5 | By having a persistent backend, you can run multiple multiple processes and they will coordinate 6 | execution among them, making sure that tasks are only executed by one of them on schedule. 7 | 8 | """ 9 | 10 | import logging 11 | 12 | from pyncette import Context 13 | from pyncette import Pyncette 14 | from pyncette.dynamodb import dynamodb_repository 15 | from pyncette.mysql import mysql_repository 16 | from pyncette.postgres import postgres_repository 17 | from pyncette.redis import redis_repository 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | sqlite_app = Pyncette(sqlite_database="pyncette.db") 22 | 23 | postgres_app = Pyncette( 24 | repository_factory=postgres_repository, 25 | # PostgreSQL connection string 26 | postgres_url="postgres://postgres@localhost/pyncette", 27 | # The table name 28 | postgres_table_name="example123", 29 | # If set to true, Pyncette will assume the table exists and will not try to create it 30 | postgres_skip_table_create=False, 31 | # Batch size for querying dynamic tasks 32 | batch_size=10, 33 | ) 34 | 35 | dynamodb_app = Pyncette( 36 | repository_factory=dynamodb_repository, 37 | # Optional endpoint URL (if e.g. using Localstack instead of actual DynamoDB) 38 | dynamodb_endpoint=None, 39 | # AWS region name 40 | dynamodb_region_name="eu-west-1", 41 | # The name of the DynamoDB table. 42 | dynamodb_table_name="pyncette", 43 | # Optional partition key prefix allowing multiple independent Pyncette instances 44 | # to use the same table. 45 | dynamodb_partition_prefix="example123", 46 | # If set to true, Pyncette will assume the table exists and will not try to create it 47 | dynamodb_skip_table_create=False, 48 | # Batch size for querying dynamic tasks 49 | batch_size=10, 50 | ) 51 | 52 | redis_app = Pyncette( 53 | repository_factory=redis_repository, 54 | # Redis URL 55 | redis_url="redis://localhost", 56 | # Key prefix in Redis, allowing multiple Pyncette apps to share the same 57 | # Redis instance 58 | redis_namespace="example123", 59 | # Timeout in seconds for Redis operations 60 | redis_timeout=10, 61 | # Batch size for querying dynamic tasks 62 | batch_size=10, 63 | ) 64 | 65 | 66 | mysql_app = Pyncette( 67 | repository_factory=mysql_repository, # type: ignore 68 | # MySQL host 69 | mysql_host="localhost", 70 | # MySQL database name 71 | mysql_database="pyncette", 72 | # MySQL username 73 | mysql_user="pyncette", 74 | # Optional MySQL password 75 | mysql_password="password", # noqa: S106 76 | # The table name 77 | mysql_table_name="example123", 78 | # Optional MySQL port 79 | mysql_port=3306, 80 | # If set to true, Pyncette will assume the table exists and will not try to create it 81 | mysql_skip_table_create=False, 82 | # Batch size for querying dynamic tasks 83 | batch_size=10, 84 | ) 85 | 86 | # Choose one of the above 87 | app = sqlite_app 88 | 89 | 90 | @app.task(schedule="* * * * * */2") 91 | async def hello_world(context: Context) -> None: 92 | logger.info("Hello, world!") 93 | 94 | 95 | if __name__ == "__main__": 96 | app.main() 97 | -------------------------------------------------------------------------------- /examples/prometheus_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Pyncette ships with an optional Prometheus instrumentation based on the official prometheus_client 4 | Python package. It includes the following metrics: 5 | 6 | - Tick duration [Histogram] 7 | - Tick volume [Counter] 8 | - Tick failures [Counter] 9 | - Number of currently executing ticks [Gauge] 10 | - Task duration [Histogram] 11 | - Task volume [Counter] 12 | - Task failures [Counter] 13 | - Number of currently executing tasks [Gauge] 14 | - Task run staleness (i.e. how far behind the scheduled time the actual executions are) [Histogram] 15 | - Repository operation duration [Histogram] 16 | - Repository operation volume [Counter] 17 | - Repository operation volume [Failures] 18 | - Number of currently repository operations [Gauge] 19 | 20 | It pushes the metrics to default registry (prometheus_client.REGISTRY), so it can be combined with other 21 | code alongside it. 22 | 23 | To see the exported metrics while running this example, use something like 24 | 25 | curl localhost:9699/metrics 26 | 27 | """ 28 | 29 | import asyncio 30 | import datetime 31 | import logging 32 | import random 33 | import uuid 34 | 35 | from prometheus_client import start_http_server 36 | 37 | from pyncette import Context 38 | from pyncette import FailureMode 39 | from pyncette import Pyncette 40 | from pyncette.prometheus import use_prometheus 41 | 42 | logger = logging.getLogger(__name__) 43 | 44 | app = Pyncette() 45 | use_prometheus(app) 46 | 47 | 48 | @app.task(schedule="* * * * * */2") 49 | async def hello_world(context: Context) -> None: 50 | logger.info("Hello, world!") 51 | 52 | 53 | @app.task(schedule="* * * * * */2") 54 | async def sleepy_time(context: Context) -> None: 55 | logger.info("Hello, bed!") 56 | await asyncio.sleep(random.random() * 5) 57 | 58 | 59 | @app.task(schedule="* * * * * */2", failure_mode=FailureMode.UNLOCK) 60 | async def oopsie_daisy(context: Context) -> None: 61 | if random.choice([True, False]): 62 | raise Exception("Something went wrong :(") 63 | 64 | 65 | @app.dynamic_task() 66 | async def execute_once(context: Context) -> None: 67 | logger.info(f"Hello, world from {context.task}") 68 | await context.app_context.unschedule_task(context.task) 69 | 70 | 71 | @app.task(interval=datetime.timedelta(seconds=1)) 72 | async def schedule_execute_once(context: Context) -> None: 73 | await context.app_context.schedule_task(execute_once, str(uuid.uuid4()), interval=datetime.timedelta(seconds=1)) 74 | 75 | 76 | if __name__ == "__main__": 77 | start_http_server(port=9699, addr="0.0.0.0") # noqa: S104 78 | app.main() 79 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Pyncette 2 | site_description: A reliable distributed scheduler with pluggable storage backends for Async Python 3 | site_url: https://pyncette.readthedocs.io 4 | repo_url: https://github.com/tibordp/pyncette 5 | repo_name: tibordp/pyncette 6 | edit_uri: edit/master/docs/ 7 | 8 | theme: 9 | name: material 10 | palette: 11 | # Light mode 12 | - media: "(prefers-color-scheme: light)" 13 | scheme: default 14 | primary: indigo 15 | accent: indigo 16 | toggle: 17 | icon: material/brightness-7 18 | name: Switch to dark mode 19 | # Dark mode 20 | - media: "(prefers-color-scheme: dark)" 21 | scheme: slate 22 | primary: indigo 23 | accent: indigo 24 | toggle: 25 | icon: material/brightness-4 26 | name: Switch to light mode 27 | features: 28 | - navigation.instant 29 | - navigation.tracking 30 | - navigation.tabs 31 | - navigation.sections 32 | - navigation.expand 33 | - navigation.top 34 | - search.suggest 35 | - search.highlight 36 | - content.code.copy 37 | - content.code.annotate 38 | 39 | plugins: 40 | - search 41 | - mkdocstrings: 42 | handlers: 43 | python: 44 | paths: [src] 45 | inventories: 46 | - https://docs.python.org/3/objects.inv 47 | - https://docs.aiohttp.org/en/stable/objects.inv 48 | options: 49 | docstring_style: numpy 50 | docstring_section_style: table 51 | show_source: false 52 | show_root_heading: true 53 | show_root_full_path: false 54 | show_symbol_type_heading: true 55 | show_symbol_type_toc: true 56 | signature_crossrefs: true 57 | separate_signature: true 58 | line_length: 80 59 | members_order: source 60 | group_by_category: true 61 | show_if_no_docstring: true 62 | show_docstring_attributes: true 63 | show_docstring_functions: true 64 | show_docstring_classes: true 65 | show_docstring_modules: true 66 | show_signature_annotations: true 67 | annotations_path: brief 68 | inherited_members: false 69 | filters: 70 | - "!^_" 71 | merge_init_into_class: true 72 | docstring_options: 73 | ignore_init_summary: true 74 | 75 | markdown_extensions: 76 | - admonition 77 | - attr_list 78 | - def_list 79 | - footnotes 80 | - meta 81 | - md_in_html 82 | - toc: 83 | permalink: true 84 | - pymdownx.arithmatex: 85 | generic: true 86 | - pymdownx.betterem: 87 | smart_enable: all 88 | - pymdownx.caret 89 | - pymdownx.details 90 | - pymdownx.highlight: 91 | anchor_linenums: true 92 | - pymdownx.inlinehilite 93 | - pymdownx.keys 94 | - pymdownx.mark 95 | - pymdownx.smartsymbols 96 | - pymdownx.superfences 97 | - pymdownx.tabbed: 98 | alternate_style: true 99 | - pymdownx.tasklist: 100 | custom_checkbox: true 101 | - pymdownx.tilde 102 | 103 | nav: 104 | - Home: 105 | - Overview: index.md 106 | - Changelog: changelog.md 107 | - Usage: 108 | - Installation: installation.md 109 | - Usage: usage.md 110 | - Backends: backends.md 111 | - Advanced Usage: advanced_usage.md 112 | - API Reference: api-reference.md 113 | - Contributing: contributing.md 114 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pyncette" 7 | version = "1.0.0" 8 | description = "A reliable distributed scheduler with pluggable storage backends" 9 | readme = "README.md" 10 | license = {text = "MIT"} 11 | authors = [ 12 | {name = "Tibor Djurica Potpara", email = "tibor.djurica@ojdip.net"}, 13 | ] 14 | keywords = ["scheduler", "cron", "async", "distributed", "task-queue", "asyncio"] 15 | classifiers = [ 16 | "Development Status :: 5 - Production/Stable", 17 | "Intended Audience :: Developers", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: Unix", 20 | "Operating System :: POSIX", 21 | "Operating System :: Microsoft :: Windows", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | "Programming Language :: Python :: 3.13", 28 | "Programming Language :: Python :: 3.14", 29 | "Programming Language :: Python :: Implementation :: CPython", 30 | "Topic :: Utilities", 31 | ] 32 | requires-python = ">=3.9" 33 | dependencies = [ 34 | "croniter>=1.3.14", 35 | "aiosqlite>=0.19.0", 36 | "aiohttp>=3.8.4", 37 | "python-dateutil>=2.8.2", 38 | "coloredlogs", 39 | ] 40 | 41 | [project.optional-dependencies] 42 | redis = ["redis>=4.5.4"] 43 | prometheus = ["prometheus_client>=0.16.0"] 44 | postgres = ["asyncpg>=0.27.0"] 45 | dynamodb = ["aioboto3>=11.1.0"] 46 | mysql = ["aiomysql>=0.1.1", "cryptography>=40.0.2"] 47 | uvloop = ["uvloop>=0.16.0"] 48 | all = [ 49 | "redis>=4.5.4", 50 | "prometheus_client>=0.16.0", 51 | "asyncpg>=0.27.0", 52 | "aioboto3>=11.1.0", 53 | "aiomysql>=0.1.1", 54 | "cryptography>=40.0.2", 55 | "uvloop>=0.22.1", 56 | ] 57 | dev = [ 58 | "pytest", 59 | "pytest-asyncio", 60 | "pytest-cov", 61 | "pre-commit", 62 | "ty", 63 | "ruff", 64 | "mkdocs-material", 65 | "mkdocstrings[python]", 66 | ] 67 | 68 | [project.urls] 69 | Documentation = "https://tibordp.github.io/pyncette/" 70 | Changelog = "https://tibordp.github.io/pyncette/changelog/" 71 | "Issue Tracker" = "https://github.com/tibordp/pyncette/issues" 72 | Homepage = "https://github.com/tibordp/pyncette" 73 | 74 | [tool.hatch.build.targets.wheel] 75 | packages = ["src/pyncette"] 76 | 77 | [tool.hatch.build.targets.sdist] 78 | include = [ 79 | "/src", 80 | "/tests", 81 | "/docs", 82 | "/examples", 83 | "*.md", 84 | "*.cfg", 85 | "*.yml", 86 | "*.yaml", 87 | "*.toml", 88 | "*.txt", 89 | ] 90 | 91 | [tool.ruff] 92 | extend-exclude = ["static", "ci/templates"] 93 | line-length = 140 94 | src = ["src", "tests"] 95 | target-version = "py39" 96 | 97 | [tool.ruff.format] 98 | docstring-code-format = true 99 | docstring-code-line-length = 80 100 | 101 | [tool.ruff.lint] 102 | ignore = [ 103 | "RUF001", # ruff-specific rules ambiguous-unicode-character-string 104 | "PLC0415", # import not at top of file 105 | "S608", # SQL injection - we specifically inject table name 106 | "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes 107 | ] 108 | 109 | [tool.ty.rules] 110 | unresolved-attribute = "ignore" 111 | 112 | [tool.pytest.ini_options] 113 | testpaths = ["tests"] 114 | python_files = ["test_*.py"] 115 | python_classes = ["Test*"] 116 | python_functions = ["test_*"] 117 | markers = [ 118 | "asyncio: mark test as async", 119 | "integration: mark test as integration test requiring external services", 120 | ] 121 | asyncio_mode = "auto" 122 | asyncio_default_fixture_loop_scope = "function" 123 | 124 | [tool.coverage.paths] 125 | source = ["src", "*/site-packages"] 126 | 127 | [tool.coverage.run] 128 | branch = true 129 | source = ["pyncette", "tests"] 130 | omit = ["tests/utils/*"] 131 | parallel = true 132 | 133 | [tool.coverage.report] 134 | show_missing = true 135 | precision = 2 136 | exclude_lines = [ 137 | "if TYPE_CHECKING:", 138 | "assert False", 139 | "pragma: no cover", 140 | ] 141 | omit = ["*migrations*"] 142 | -------------------------------------------------------------------------------- /src/pyncette/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" 2 | 3 | from .model import Context 4 | from .model import ExecutionMode 5 | from .model import FailureMode 6 | from .pyncette import Pyncette 7 | from .pyncette import PyncetteContext 8 | 9 | __all__ = ["Context", "ExecutionMode", "FailureMode", "Pyncette", "PyncetteContext"] 10 | -------------------------------------------------------------------------------- /src/pyncette/errors.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | 6 | class PyncetteException(Exception): 7 | """Base exception for Pyncette""" 8 | 9 | 10 | class LeaseLostException(PyncetteException): 11 | """Signals that the lease on the task was lost""" 12 | 13 | task: Task 14 | 15 | def __init__(self, task: Task): 16 | super().__init__(f"Lease on the task {task.canonical_name} was lost.") 17 | self.task = task 18 | 19 | 20 | if TYPE_CHECKING: 21 | from pyncette.task import Task 22 | -------------------------------------------------------------------------------- /src/pyncette/executor.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import contextlib 5 | import logging 6 | from typing import Any 7 | from collections.abc import Awaitable 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class SynchronousExecutor(contextlib.AbstractAsyncContextManager): 13 | def __init__(self, **kwargs: dict[str, Any]): 14 | pass 15 | 16 | async def __aenter__(self) -> SynchronousExecutor: 17 | return self 18 | 19 | async def __aexit__( 20 | self, 21 | exc_type: type[BaseException] | None, 22 | exc_value: BaseException | None, 23 | traceback: Any | None, 24 | ) -> None: 25 | pass 26 | 27 | async def spawn_task(self, task: Awaitable) -> None: 28 | await task 29 | 30 | 31 | class DefaultExecutor(contextlib.AbstractAsyncContextManager): 32 | """Manages the spawned tasks running in background""" 33 | 34 | _tasks: dict[object, asyncio.Task] 35 | _semaphore: asyncio.Semaphore 36 | 37 | def __init__(self, **kwargs: Any) -> None: 38 | self._tasks = {} 39 | concurrency_limit = kwargs.get("concurrency_limit", 100) 40 | self._semaphore = asyncio.Semaphore(concurrency_limit) 41 | 42 | async def __aenter__(self) -> DefaultExecutor: 43 | return self 44 | 45 | async def __aexit__( 46 | self, 47 | exc_type: type[BaseException] | None, 48 | exc_value: BaseException | None, 49 | traceback: Any | None, 50 | ) -> None: 51 | if self._tasks: 52 | logging.debug(f"{exc_type}, {exc_value}, {traceback}") 53 | if exc_type == asyncio.CancelledError: 54 | logger.warning("Cancelling remaining tasks.") 55 | for task in self._tasks.values(): 56 | task.cancel() 57 | 58 | logger.info("Waiting for remaining tasks to finish.") 59 | await asyncio.wait(self._tasks.values()) 60 | 61 | async def spawn_task(self, task: Awaitable) -> None: 62 | identity = object() 63 | 64 | async def _task_wrapper(awaitable: Awaitable) -> None: 65 | try: 66 | await awaitable 67 | finally: 68 | self._tasks.pop(identity) 69 | self._semaphore.release() 70 | 71 | await self._semaphore.acquire() 72 | self._tasks[identity] = asyncio.create_task(_task_wrapper(task)) 73 | -------------------------------------------------------------------------------- /src/pyncette/healthcheck.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from collections.abc import AsyncIterator 4 | from collections.abc import Awaitable 5 | from typing import Callable 6 | from typing import Optional 7 | 8 | from aiohttp import web 9 | 10 | from pyncette import pyncette 11 | from pyncette.pyncette import Pyncette 12 | from pyncette.pyncette import PyncetteContext 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | async def default_healthcheck(app_context: PyncetteContext) -> bool: 18 | utcnow = pyncette._current_time() 19 | last_tick = app_context.last_tick 20 | grace_period = app_context._app._poll_interval * 2 21 | 22 | return last_tick is not None and (utcnow - last_tick < grace_period) 23 | 24 | 25 | def use_healthcheck_server( 26 | app: Pyncette, 27 | port: int = 8080, 28 | bind_address: Optional[str] = None, 29 | healthcheck_handler: Callable[[PyncetteContext], Awaitable[bool]] = default_healthcheck, 30 | ) -> None: 31 | """ 32 | Decorate Pyncette app with a healthcheck endpoint served as a HTTP endpoint. 33 | 34 | :param app: Pyncette app 35 | :param port: The local port to bind to 36 | :param bind_address: The local address to bind to 37 | :healthcheck_handler: A coroutine that determines health status 38 | """ 39 | 40 | async def healthcheck_fixture( 41 | app_context: PyncetteContext, 42 | ) -> AsyncIterator[asyncio.AbstractServer]: 43 | async def handler(request: web.BaseRequest) -> web.Response: 44 | if request.method != "GET": 45 | return web.Response(status=405, text="Method not allowed") 46 | try: 47 | is_healthy = await healthcheck_handler(app_context) 48 | except asyncio.CancelledError: 49 | raise 50 | except Exception as e: 51 | logger.warning("Exception raised in healthcheck handler", exc_info=e) 52 | is_healthy = False 53 | 54 | if is_healthy: 55 | return web.Response(status=200, text="OK") 56 | else: 57 | return web.Response(status=500, text="Not OK") 58 | 59 | loop = asyncio.get_event_loop() 60 | server = await loop.create_server(web.Server(handler), bind_address, port) 61 | logger.info(f"Healthcheck listening on {port}") 62 | 63 | try: 64 | yield server 65 | finally: 66 | server.close() 67 | await server.wait_closed() 68 | 69 | app.use_fixture("_healthcheck", healthcheck_fixture) 70 | -------------------------------------------------------------------------------- /src/pyncette/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime 4 | from dataclasses import dataclass 5 | from enum import Enum 6 | from typing import TYPE_CHECKING 7 | from typing import Any 8 | from collections.abc import AsyncIterator 9 | from collections.abc import Awaitable 10 | from typing import Callable 11 | from typing import NewType 12 | from typing import Protocol 13 | from typing import TypeVar 14 | 15 | T = TypeVar("T") 16 | Decorator = Callable[[T], T] 17 | Lease = NewType("Lease", object) 18 | ContinuationToken = NewType("ContinuationToken", object) 19 | 20 | # https://github.com/python/mypy/issues/708 21 | 22 | 23 | class Heartbeater(Protocol): 24 | def __call__(self) -> Awaitable[None]: 25 | "Heartbeats on the message" 26 | 27 | 28 | class Context: 29 | """Task execution context. This class can have dynamic attributes.""" 30 | 31 | app_context: pyncette.PyncetteContext 32 | task: pyncette.task.Task 33 | scheduled_at: datetime.datetime 34 | _lease: Lease | None 35 | heartbeat: Heartbeater 36 | args: dict[str, Any] 37 | 38 | if TYPE_CHECKING: 39 | 40 | def __getattr__(self, name: str) -> Any: ... 41 | 42 | def __setattr__(self, name: str, value: Any) -> Any: ... 43 | 44 | 45 | class TaskFunc(Protocol): 46 | def __call__(self, context: Context) -> Awaitable[None]: 47 | "Executes the task" 48 | 49 | 50 | class PartitionSelector(Protocol): 51 | def __call__(self, partition_count: int, task_id: str) -> int: 52 | "Gets the partition number for a given task id" 53 | 54 | 55 | class NextFunc(Protocol): 56 | def __call__(self) -> Awaitable[None]: 57 | "Enter the next middleware or the task body" 58 | 59 | 60 | class MiddlewareFunc(Protocol): 61 | def __call__(self, context: Context, next: NextFunc) -> Awaitable[None]: 62 | "Executes the middleware" 63 | 64 | 65 | class FixtureFunc(Protocol): 66 | def __call__(self, app_context: pyncette.PyncetteContext) -> AsyncIterator[Any]: 67 | "Executes the fixture" 68 | 69 | 70 | class ResultType(Enum): 71 | """Status returned by polling the task""" 72 | 73 | MISSING = 0 74 | PENDING = 1 75 | READY = 2 76 | LOCKED = 3 77 | LEASE_MISMATCH = 4 78 | 79 | 80 | class ExecutionMode(Enum): 81 | """The execution mode for a Pyncette task.""" 82 | 83 | AT_LEAST_ONCE = 0 84 | AT_MOST_ONCE = 1 85 | 86 | 87 | class FailureMode(Enum): 88 | """What should happen when a task fails.""" 89 | 90 | NONE = 0 91 | UNLOCK = 1 92 | COMMIT = 2 93 | 94 | 95 | @dataclass 96 | class PollResponse: 97 | """The result of a task poll""" 98 | 99 | result: ResultType 100 | scheduled_at: datetime.datetime 101 | lease: Lease | None 102 | 103 | 104 | @dataclass 105 | class QueryResponse: 106 | """The result of a task query""" 107 | 108 | tasks: list[tuple[pyncette.task.Task, Lease]] 109 | continuation_token: ContinuationToken | None 110 | 111 | 112 | if TYPE_CHECKING: 113 | import pyncette 114 | import pyncette.task 115 | -------------------------------------------------------------------------------- /src/pyncette/mysql.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import datetime 4 | import json 5 | import logging 6 | import re 7 | import uuid 8 | from contextlib import asynccontextmanager 9 | from typing import Any 10 | from collections.abc import AsyncIterator 11 | from typing import Optional 12 | 13 | import aiomysql 14 | import dateutil.tz 15 | import pymysql 16 | 17 | from pyncette.errors import PyncetteException 18 | from pyncette.model import ContinuationToken 19 | from pyncette.model import ExecutionMode 20 | from pyncette.model import Lease 21 | from pyncette.model import PollResponse 22 | from pyncette.model import QueryResponse 23 | from pyncette.model import ResultType 24 | from pyncette.repository import Repository 25 | from pyncette.task import Task 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | def _from_timestamp(timestamp: Optional[float]) -> Optional[datetime.datetime]: 31 | if timestamp is None: 32 | return None 33 | else: 34 | return datetime.datetime.fromtimestamp(timestamp, dateutil.tz.UTC) 35 | 36 | 37 | def _to_timestamp(date: Optional[datetime.datetime]) -> Optional[float]: 38 | if date is None: 39 | return None 40 | else: 41 | return date.timestamp() 42 | 43 | 44 | _CONTINUATION_TOKEN = ContinuationToken(object()) 45 | 46 | 47 | class MySQLRepository(Repository): 48 | _pool: aiomysql.Pool 49 | _batch_size: int 50 | _table_name: str 51 | 52 | def __init__( 53 | self, 54 | pool: aiomysql.Pool, 55 | **kwargs: Any, 56 | ): 57 | self._pool = pool 58 | self._table_name = kwargs.get("mysql_table_name", "pyncette_tasks") 59 | self._batch_size = kwargs.get("batch_size", 100) 60 | 61 | if self._batch_size < 1: 62 | raise ValueError("Batch size must be greater than 0") 63 | if not re.match(r"^[a-z_]+$", self._table_name): 64 | raise ValueError("Table name can only contain lower-case letters and underscores") 65 | 66 | async def initialize(self) -> None: 67 | async with self._transaction() as cursor: 68 | await cursor.execute( 69 | f""" 70 | CREATE TABLE IF NOT EXISTS {self._table_name} ( 71 | name VARCHAR(256) PRIMARY KEY, 72 | parent_name VARCHAR(256), 73 | locked_until DOUBLE, 74 | locked_by VARCHAR(256), 75 | execute_after DOUBLE, 76 | task_spec TEXT 77 | ); 78 | """ 79 | ) 80 | 81 | try: 82 | await cursor.execute( 83 | f""" 84 | CREATE INDEX due_tasks_{self._table_name} 85 | ON {self._table_name} (parent_name, (GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)))); 86 | """ 87 | ) 88 | except pymysql.err.OperationalError as e: 89 | code, _msg = e.args 90 | # Index already exists 91 | if code != 1061: 92 | raise 93 | 94 | async def poll_dynamic_task( 95 | self, 96 | utc_now: datetime.datetime, 97 | task: Task, 98 | continuation_token: Optional[ContinuationToken] = None, 99 | ) -> QueryResponse: 100 | async with self._transaction() as cursor: 101 | locked_by = str(uuid.uuid4()) 102 | locked_until = utc_now + task.lease_duration 103 | 104 | await cursor.execute( 105 | f""" 106 | SELECT name, task_spec FROM {self._table_name} 107 | WHERE parent_name = %s AND GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= %s 108 | ORDER BY GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC 109 | LIMIT %s 110 | FOR UPDATE SKIP LOCKED 111 | """, 112 | ( 113 | task.canonical_name, 114 | _to_timestamp(utc_now), 115 | self._batch_size, 116 | ), 117 | ) 118 | ready_tasks = await cursor.fetchall() 119 | 120 | await cursor.executemany( 121 | f""" 122 | UPDATE {self._table_name} 123 | SET 124 | locked_until = %s, 125 | locked_by = %s 126 | WHERE name = %s 127 | """, 128 | [(_to_timestamp(locked_until), locked_by, record["name"]) for record in ready_tasks], 129 | ) 130 | 131 | logger.debug(f"poll_dynamic_task returned {ready_tasks}") 132 | 133 | return QueryResponse( 134 | tasks=[ 135 | ( 136 | task.instantiate_from_spec(json.loads(record["task_spec"])), 137 | Lease(locked_by), 138 | ) 139 | for record in ready_tasks 140 | ], 141 | # May result in an extra round-trip if there were exactly 142 | # batch_size tasks available, but we deem this an acceptable 143 | # tradeoff. 144 | continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None, 145 | ) 146 | 147 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 148 | assert task.parent_task is not None 149 | 150 | async with self._transaction() as cursor: 151 | execute_at = _to_timestamp(task.get_next_execution(utc_now, None)) 152 | task_spec = json.dumps(task.as_spec()) 153 | 154 | await cursor.execute( 155 | f""" 156 | INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) 157 | VALUES (%s, %s, %s, %s) 158 | ON DUPLICATE KEY UPDATE 159 | task_spec = %s, 160 | execute_after = %s, 161 | locked_by = NULL, 162 | locked_until = NULL 163 | """, 164 | ( 165 | task.canonical_name, 166 | task.parent_task.canonical_name, 167 | task_spec, 168 | execute_at, 169 | task_spec, 170 | execute_at, 171 | ), 172 | ) 173 | 174 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 175 | async with self._transaction() as cursor: 176 | await cursor.execute( 177 | f"DELETE FROM {self._table_name} WHERE name = %s", 178 | (task.canonical_name,), 179 | ) 180 | 181 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: 182 | async with self._transaction() as cursor: 183 | await cursor.execute( 184 | f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE", 185 | (task.canonical_name,), 186 | ) 187 | record = await cursor.fetchone() 188 | logger.debug(f"poll_task returned {record}") 189 | 190 | update = False 191 | if record is None: 192 | # Regular (non-dynamic) tasks will be implicitly created on first poll, 193 | # but dynamic task instances must be explicitely created to prevent spurious 194 | # poll from re-creating them after being deleted. 195 | if task.parent_task is not None: 196 | raise PyncetteException("Task not found") 197 | 198 | execute_after = task.get_next_execution(utc_now, None) 199 | locked_until = None 200 | locked_by = None 201 | update = True 202 | else: 203 | execute_after = _from_timestamp(record["execute_after"]) 204 | locked_until = _from_timestamp(record["locked_until"]) 205 | locked_by = record["locked_by"] 206 | 207 | assert execute_after is not None 208 | scheduled_at = execute_after 209 | 210 | if locked_until is not None and locked_until > utc_now and (lease != locked_by): 211 | result = ResultType.LOCKED 212 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE: 213 | execute_after = task.get_next_execution(utc_now, execute_after) 214 | result = ResultType.READY 215 | locked_until = None 216 | locked_by = None 217 | update = True 218 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE: 219 | locked_until = utc_now + task.lease_duration 220 | locked_by = str(uuid.uuid4()) 221 | result = ResultType.READY 222 | update = True 223 | else: 224 | result = ResultType.PENDING 225 | 226 | if update: 227 | await self._update_record( 228 | cursor, 229 | task, 230 | locked_until, 231 | locked_by, 232 | execute_after, 233 | ) 234 | 235 | return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by) 236 | 237 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 238 | async with self._transaction() as cursor: 239 | await cursor.execute( 240 | f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE", 241 | (task.canonical_name,), 242 | ) 243 | 244 | record = await cursor.fetchone() 245 | logger.debug(f"commit_task returned {record}") 246 | 247 | if not record: 248 | logger.warning(f"Task {task} not found, skipping.") 249 | return 250 | 251 | if record["locked_by"] != lease: 252 | logger.warning(f"Lease lost on task {task}, skipping.") 253 | return 254 | 255 | await self._update_record( 256 | cursor, 257 | task, 258 | None, 259 | None, 260 | task.get_next_execution(utc_now, _from_timestamp(record["execute_after"])), 261 | ) 262 | 263 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: 264 | async with self._transaction() as cursor: 265 | locked_until = utc_now + task.lease_duration 266 | await cursor.execute( 267 | f""" 268 | UPDATE {self._table_name} 269 | SET 270 | locked_until = %s 271 | WHERE name = %s AND locked_by = %s 272 | """, 273 | ( 274 | _to_timestamp(locked_until), 275 | task.canonical_name, 276 | lease, 277 | ), 278 | ) 279 | if cursor.rowcount == 1: 280 | return lease 281 | else: 282 | return None 283 | 284 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 285 | async with self._transaction() as cursor: 286 | await cursor.execute( 287 | f""" 288 | UPDATE {self._table_name} 289 | SET 290 | locked_by = NULL, 291 | locked_until = NULL 292 | WHERE name = %s AND locked_by = %s 293 | """, 294 | ( 295 | task.canonical_name, 296 | lease, 297 | ), 298 | ) 299 | 300 | @asynccontextmanager 301 | async def _transaction(self) -> AsyncIterator[aiomysql.Cursor]: 302 | async with self._pool.acquire() as connection: 303 | try: 304 | async with connection.cursor(aiomysql.DictCursor) as cursor: 305 | yield cursor 306 | except Exception: 307 | await connection.rollback() 308 | raise 309 | else: 310 | await connection.commit() 311 | 312 | async def _update_record( 313 | self, 314 | cursor: aiomysql.Cursor, 315 | task: Task, 316 | locked_until: Optional[datetime.datetime], 317 | locked_by: Optional[str], 318 | execute_after: Optional[datetime.datetime], 319 | ) -> None: 320 | if execute_after is None: 321 | await cursor.execute( 322 | f"DELETE FROM {self._table_name} WHERE name = %s", 323 | (task.canonical_name,), 324 | ) 325 | else: 326 | await cursor.execute( 327 | f""" 328 | INSERT INTO {self._table_name} (name, locked_until, locked_by, execute_after) 329 | VALUES (%s, %s, %s, %s) 330 | ON DUPLICATE KEY UPDATE 331 | locked_until = %s, 332 | locked_by = %s, 333 | execute_after = %s 334 | """, 335 | ( 336 | task.canonical_name, 337 | _to_timestamp(locked_until), 338 | locked_by, 339 | _to_timestamp(execute_after), 340 | _to_timestamp(locked_until), 341 | locked_by, 342 | _to_timestamp(execute_after), 343 | ), 344 | ) 345 | 346 | 347 | @contextlib.asynccontextmanager 348 | async def mysql_repository( 349 | *, 350 | mysql_host: str, 351 | mysql_user: str, 352 | mysql_database: str, 353 | mysql_password: Optional[str] = None, 354 | mysql_port: int = 3306, 355 | **kwargs: Any, 356 | ) -> AsyncIterator[MySQLRepository]: 357 | """Factory context manager that initializes the connection to MySQL""" 358 | mysql_pool = await aiomysql.create_pool( 359 | host=mysql_host, 360 | port=mysql_port, 361 | user=mysql_user, 362 | password=mysql_password, 363 | db=mysql_database, 364 | loop=asyncio.get_running_loop(), 365 | ) 366 | try: 367 | repository = MySQLRepository(mysql_pool, **kwargs) 368 | if not kwargs.get("mysql_skip_table_create", False): 369 | await repository.initialize() 370 | 371 | yield repository 372 | finally: 373 | mysql_pool.close() 374 | await mysql_pool.wait_closed() 375 | -------------------------------------------------------------------------------- /src/pyncette/postgres.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import datetime 3 | import json 4 | import logging 5 | import re 6 | import uuid 7 | from contextlib import asynccontextmanager 8 | from typing import Any 9 | from collections.abc import AsyncIterator 10 | from typing import Optional 11 | 12 | import asyncpg 13 | 14 | from pyncette.errors import PyncetteException 15 | from pyncette.model import ContinuationToken 16 | from pyncette.model import ExecutionMode 17 | from pyncette.model import Lease 18 | from pyncette.model import PollResponse 19 | from pyncette.model import QueryResponse 20 | from pyncette.model import ResultType 21 | from pyncette.repository import Repository 22 | from pyncette.task import Task 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | _CONTINUATION_TOKEN = ContinuationToken(object()) 28 | 29 | 30 | class PostgresRepository(Repository): 31 | _pool: asyncpg.pool.Pool 32 | _batch_size: int 33 | _table_name: str 34 | 35 | def __init__( 36 | self, 37 | pool: asyncpg.pool.Pool, 38 | **kwargs: Any, 39 | ): 40 | self._pool = pool 41 | self._table_name = kwargs.get("postgres_table_name", "pyncette_tasks") 42 | self._batch_size = kwargs.get("batch_size", 100) 43 | 44 | if self._batch_size < 1: 45 | raise ValueError("Batch size must be greater than 0") 46 | if not re.match(r"^[a-z_]+$", self._table_name): 47 | raise ValueError("Table name can only contain lower-case letters and underscores") 48 | 49 | async def initialize(self) -> None: 50 | async with self._transaction() as connection: 51 | await connection.execute( 52 | f""" 53 | CREATE TABLE IF NOT EXISTS {self._table_name} ( 54 | name text PRIMARY KEY, 55 | parent_name text, 56 | locked_until timestamptz, 57 | locked_by uuid, 58 | execute_after timestamptz, 59 | task_spec json 60 | ); 61 | CREATE INDEX IF NOT EXISTS due_tasks_{self._table_name} 62 | ON {self._table_name} (parent_name, GREATEST(locked_until, execute_after)); 63 | """ 64 | ) 65 | 66 | async def poll_dynamic_task( 67 | self, 68 | utc_now: datetime.datetime, 69 | task: Task, 70 | continuation_token: Optional[ContinuationToken] = None, 71 | ) -> QueryResponse: 72 | async with self._transaction() as connection: 73 | locked_by = uuid.uuid4() 74 | locked_until = utc_now + task.lease_duration 75 | 76 | ready_tasks = await connection.fetch( 77 | f""" 78 | UPDATE {self._table_name} a 79 | SET 80 | locked_until = $4, 81 | locked_by = $5 82 | FROM ( 83 | SELECT name FROM {self._table_name} 84 | WHERE parent_name = $1 AND GREATEST(locked_until, execute_after) <= $2 85 | ORDER BY GREATEST(locked_until, execute_after) ASC 86 | LIMIT $3 87 | FOR UPDATE SKIP LOCKED 88 | ) b 89 | WHERE a.name = b.name 90 | RETURNING * 91 | """, 92 | task.canonical_name, 93 | utc_now, 94 | self._batch_size, 95 | locked_until, 96 | locked_by, 97 | ) 98 | logger.debug(f"poll_dynamic_task returned {ready_tasks}") 99 | 100 | return QueryResponse( 101 | tasks=[ 102 | ( 103 | task.instantiate_from_spec(json.loads(record["task_spec"])), 104 | Lease(locked_by), 105 | ) 106 | for record in ready_tasks 107 | ], 108 | # May result in an extra round-trip if there were exactly 109 | # batch_size tasks available, but we deem this an acceptable 110 | # tradeoff. 111 | continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None, 112 | ) 113 | 114 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 115 | assert task.parent_task is not None 116 | 117 | async with self._transaction() as connection: 118 | result = await connection.execute( 119 | f""" 120 | INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) 121 | VALUES ($1, $2, $3, $4) 122 | ON CONFLICT (name) DO UPDATE 123 | SET 124 | task_spec = $3, 125 | execute_after = $4, 126 | locked_by = NULL, 127 | locked_until = NULL 128 | """, 129 | task.canonical_name, 130 | task.parent_task.canonical_name, 131 | json.dumps(task.as_spec()), 132 | task.get_next_execution(utc_now, None), 133 | ) 134 | logger.debug(f"register_task returned {result}") 135 | 136 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 137 | async with self._transaction() as connection: 138 | await connection.execute(f"DELETE FROM {self._table_name} WHERE name = $1", task.canonical_name) 139 | 140 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: 141 | async with self._transaction() as connection: 142 | record = await connection.fetchrow( 143 | f"SELECT * FROM {self._table_name} WHERE name = $1 FOR UPDATE", 144 | task.canonical_name, 145 | ) 146 | logger.debug(f"poll_task returned {record}") 147 | 148 | update = False 149 | if record is None: 150 | # Regular (non-dynamic) tasks will be implicitly created on first poll, 151 | # but dynamic task instances must be explicitely created to prevent spurious 152 | # poll from re-creating them after being deleted. 153 | if task.parent_task is not None: 154 | raise PyncetteException("Task not found") 155 | 156 | execute_after = task.get_next_execution(utc_now, None) 157 | locked_until = None 158 | locked_by = None 159 | update = True 160 | else: 161 | execute_after = record["execute_after"] 162 | locked_until = record["locked_until"] 163 | locked_by = record["locked_by"] 164 | 165 | assert execute_after is not None 166 | scheduled_at = execute_after 167 | 168 | if locked_until is not None and locked_until > utc_now and (lease != locked_by): 169 | result = ResultType.LOCKED 170 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE: 171 | execute_after = task.get_next_execution(utc_now, execute_after) 172 | result = ResultType.READY 173 | locked_until = None 174 | locked_by = None 175 | update = True 176 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE: 177 | locked_until = utc_now + task.lease_duration 178 | locked_by = uuid.uuid4() 179 | result = ResultType.READY 180 | update = True 181 | else: 182 | result = ResultType.PENDING 183 | 184 | if update: 185 | await self._update_record( 186 | connection, 187 | task, 188 | locked_until, 189 | locked_by, 190 | execute_after, 191 | ) 192 | 193 | return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by) 194 | 195 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 196 | async with self._transaction() as connection: 197 | record = await connection.fetchrow( 198 | f"SELECT * FROM {self._table_name} WHERE name = $1 FOR UPDATE", 199 | task.canonical_name, 200 | ) 201 | logger.debug(f"commit_task returned {record}") 202 | 203 | if not record: 204 | logger.warning(f"Task {task} not found, skipping.") 205 | return 206 | 207 | if record["locked_by"] != lease: 208 | logger.warning(f"Lease lost on task {task}, skipping.") 209 | return 210 | 211 | await self._update_record( 212 | connection, 213 | task, 214 | None, 215 | None, 216 | task.get_next_execution(utc_now, record["execute_after"]), 217 | ) 218 | 219 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: 220 | async with self._transaction() as connection: 221 | locked_until = utc_now + task.lease_duration 222 | result = await connection.execute( 223 | f""" 224 | UPDATE {self._table_name} 225 | SET 226 | locked_until = $1 227 | WHERE name = $2 AND locked_by = $3 228 | """, 229 | locked_until, 230 | task.canonical_name, 231 | lease, 232 | ) 233 | logger.debug(f"extend_lease returned {result}") 234 | if result == "UPDATE 1": 235 | return lease 236 | else: 237 | return None 238 | 239 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 240 | async with self._transaction() as connection: 241 | result = await connection.execute( 242 | f""" 243 | UPDATE {self._table_name} 244 | SET 245 | locked_by = NULL, 246 | locked_until = NULL 247 | WHERE name = $1 AND locked_by = $2 248 | """, 249 | task.canonical_name, 250 | lease, 251 | ) 252 | logger.debug(f"unlock_task returned {result}") 253 | 254 | @asynccontextmanager 255 | async def _transaction(self) -> AsyncIterator[asyncpg.Connection]: 256 | async with self._pool.acquire() as connection: 257 | async with connection.transaction(): 258 | yield connection 259 | 260 | async def _update_record( 261 | self, 262 | connection: asyncpg.Connection, 263 | task: Task, 264 | locked_until: Optional[datetime.datetime], 265 | locked_by: Optional[uuid.UUID], 266 | execute_after: Optional[datetime.datetime], 267 | ) -> None: 268 | if execute_after is None: 269 | result = await connection.execute(f"DELETE FROM {self._table_name} WHERE name = $1", task.canonical_name) 270 | else: 271 | result = await connection.execute( 272 | f""" 273 | INSERT INTO {self._table_name} (name, locked_until, locked_by, execute_after) 274 | VALUES ($1, $2, $3, $4) 275 | ON CONFLICT (name) DO UPDATE 276 | SET 277 | locked_until = $2, 278 | locked_by = $3, 279 | execute_after = $4 280 | """, 281 | task.canonical_name, 282 | locked_until, 283 | locked_by, 284 | execute_after, 285 | ) 286 | logger.debug(f"update_record returned {result}") 287 | 288 | 289 | @contextlib.asynccontextmanager 290 | async def postgres_repository(**kwargs: Any) -> AsyncIterator[PostgresRepository]: 291 | """Factory context manager for repository that initializes the connection to Postgres""" 292 | postgres_pool = await asyncpg.create_pool(kwargs["postgres_url"]) 293 | try: 294 | repository = PostgresRepository(postgres_pool, **kwargs) 295 | if not kwargs.get("postgres_skip_table_create", False): 296 | await repository.initialize() 297 | 298 | yield repository 299 | finally: 300 | await postgres_pool.close() 301 | -------------------------------------------------------------------------------- /src/pyncette/prometheus.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import datetime 3 | import math 4 | import time 5 | from typing import Any 6 | from collections.abc import AsyncIterator 7 | from collections.abc import Awaitable 8 | from typing import Callable 9 | from typing import Optional 10 | 11 | from prometheus_client import Counter 12 | from prometheus_client import Gauge 13 | from prometheus_client import Histogram 14 | 15 | from . import pyncette 16 | from .model import Context 17 | from .model import ContinuationToken 18 | from .model import Lease 19 | from .model import PollResponse 20 | from .model import QueryResponse 21 | from .pyncette import Pyncette 22 | from .pyncette import PyncetteContext 23 | from .repository import Repository 24 | from .repository import RepositoryFactory 25 | from .task import Task 26 | 27 | TASK_LABELS = ["task_name"] 28 | 29 | 30 | def _get_task_labels(task: Task) -> dict[str, str]: 31 | # Instances of dynamic tasks can have high cardinality, so we choose the task template name 32 | return {"task_name": task.parent_task.name if task.parent_task else task.name} 33 | 34 | 35 | class OperationMetricSet: 36 | """Collection of Prometheus metrics representing a logical operation""" 37 | 38 | requests: Counter 39 | requests_duration: Histogram 40 | exceptions: Counter 41 | requests_in_progress: Gauge 42 | 43 | def __init__(self, operation_name: str, labels: list[str]): 44 | self.requests = Counter( 45 | f"pyncette_{operation_name}_total", 46 | f"Total count of {operation_name} operations", 47 | labels, 48 | ) 49 | self.requests_duration = Histogram( 50 | f"pyncette_{operation_name}_duration_seconds", 51 | f"Histogram of {operation_name} processing time", 52 | labels, 53 | ) 54 | self.exceptions = Counter( 55 | f"pyncette_{operation_name}_failures_total", 56 | f"Total count of failed {operation_name} failures", 57 | [*labels, "exception_type"], 58 | ) 59 | self.requests_in_progress = Gauge( 60 | f"pyncette_{operation_name}_in_progress", 61 | f"Gauge of {operation_name} operations currently being processed", 62 | labels, 63 | ) 64 | 65 | @contextlib.asynccontextmanager 66 | async def measure(self, **labels: str) -> AsyncIterator[None]: 67 | """An async context manager that measures the execution of the wrapped code""" 68 | if labels: 69 | self.requests_in_progress.labels(**labels).inc() 70 | self.requests.labels(**labels).inc() 71 | else: 72 | self.requests_in_progress.inc() 73 | self.requests.inc() 74 | 75 | before_time = time.perf_counter() 76 | try: 77 | yield 78 | except Exception as e: 79 | self.exceptions.labels(**labels, exception_type=type(e).__name__).inc() 80 | raise e from None 81 | finally: 82 | if labels: 83 | self.requests_duration.labels(**labels).observe(time.perf_counter() - before_time) 84 | self.requests_in_progress.labels(**labels).dec() 85 | else: 86 | self.requests_duration.observe(time.perf_counter() - before_time) 87 | self.requests_in_progress.dec() 88 | 89 | 90 | class MeteredRepository(Repository): 91 | """A wrapper for repository that exposes metrics to Prometheus""" 92 | 93 | def __init__(self, metric_set: OperationMetricSet, inner_repository: Repository): 94 | self._metric_set = metric_set 95 | self._inner = inner_repository 96 | 97 | async def poll_dynamic_task( 98 | self, 99 | utc_now: datetime.datetime, 100 | task: Task, 101 | continuation_token: Optional[ContinuationToken] = None, 102 | ) -> QueryResponse: 103 | """Queries the dynamic tasks for execution""" 104 | async with self._metric_set.measure(operation="poll_dynamic_task", **_get_task_labels(task)): 105 | return await self._inner.poll_dynamic_task(utc_now, task, continuation_token) 106 | 107 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 108 | """Registers a dynamic task""" 109 | async with self._metric_set.measure(operation="register_task", **_get_task_labels(task)): 110 | return await self._inner.register_task(utc_now, task) 111 | 112 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 113 | """Deregisters a dynamic task implementation""" 114 | async with self._metric_set.measure(operation="unregister_task", **_get_task_labels(task)): 115 | return await self._inner.unregister_task(utc_now, task) 116 | 117 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: 118 | """Polls the task to determine whether it is ready for execution""" 119 | async with self._metric_set.measure(operation="poll_task", **_get_task_labels(task)): 120 | return await self._inner.poll_task(utc_now, task, lease) 121 | 122 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 123 | """Commits the task, which signals a successful run.""" 124 | async with self._metric_set.measure(operation="commit_task", **_get_task_labels(task)): 125 | return await self._inner.commit_task(utc_now, task, lease) 126 | 127 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: 128 | """Extends the lease on the task. Returns the new lease if lease was still valid.""" 129 | async with self._metric_set.measure(operation="extend_lease", **_get_task_labels(task)): 130 | return await self._inner.extend_lease(utc_now, task, lease) 131 | 132 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 133 | """Unlocks the task, making it eligible for retries in case execution failed.""" 134 | async with self._metric_set.measure(operation="unlock_task", **_get_task_labels(task)): 135 | return await self._inner.unlock_task(utc_now, task, lease) 136 | 137 | 138 | _task_metric_set = OperationMetricSet("tasks", TASK_LABELS) 139 | _task_staleness = Histogram( 140 | "pyncette_tasks_staleness_seconds", 141 | "Histogram of staleness of task executions (difference between scheduled and actual time)", 142 | TASK_LABELS, 143 | buckets=( 144 | 0.05, 145 | 0.1, 146 | 0.25, 147 | 0.5, 148 | 0.75, 149 | 1.0, 150 | 2.5, 151 | 5.0, 152 | 7.5, 153 | 10.0, 154 | 25.0, 155 | 50.0, 156 | 75.0, 157 | 100.0, 158 | 250.0, 159 | 500.0, 160 | 750.0, 161 | 1000.0, 162 | math.inf, 163 | ), 164 | ) 165 | 166 | 167 | async def prometheus_middleware(context: Context, next: Callable[[], Awaitable[None]]) -> None: 168 | """Middleware that exposes task execution metrics to Prometheus""" 169 | labels = _get_task_labels(context.task) 170 | staleness = pyncette._current_time() - context.scheduled_at 171 | _task_staleness.labels(**labels).observe(staleness.total_seconds()) 172 | async with _task_metric_set.measure(**labels): 173 | await next() 174 | 175 | 176 | _repository_metric_set = OperationMetricSet("repository_ops", ["operation", *TASK_LABELS]) 177 | 178 | _ticks_metric_set = OperationMetricSet("ticks", []) 179 | 180 | 181 | def with_prometheus_repository( 182 | repository_factory: RepositoryFactory, 183 | ) -> RepositoryFactory: 184 | """Wraps the repository factory into one that exposes the metrics via Prometheus""" 185 | 186 | @contextlib.asynccontextmanager 187 | async def _repository_factory(**kwargs: Any) -> AsyncIterator[MeteredRepository]: 188 | async with repository_factory(**kwargs) as inner_repository: 189 | yield MeteredRepository(_repository_metric_set, inner_repository) 190 | 191 | return _repository_factory 192 | 193 | 194 | async def prometheus_fixture(app_context: PyncetteContext) -> AsyncIterator[None]: 195 | tick_func = app_context._tick 196 | 197 | async def _metered_tick(*args: Any, **kwargs: Any) -> Any: 198 | async with _ticks_metric_set.measure(): 199 | return await tick_func(*args, **kwargs) 200 | 201 | app_context._tick = _metered_tick # type: ignore 202 | yield 203 | 204 | 205 | def use_prometheus( 206 | app: Pyncette, 207 | measure_repository: bool = True, 208 | measure_ticks: bool = True, 209 | measure_tasks: bool = True, 210 | ) -> None: 211 | """ 212 | Decorate Pyncette app with Prometheus metric exporter. 213 | 214 | :param measure_repository: Whether to measure repository operations 215 | :param measure_ticks: Whether to measure ticks 216 | :param measure_tasks: Whether to measure individual task executions 217 | """ 218 | if measure_repository: 219 | app._repository_factory = with_prometheus_repository(app._repository_factory) 220 | if measure_ticks: 221 | app.use_fixture("_prometheus", prometheus_fixture) 222 | if measure_tasks: 223 | app.use_middleware(prometheus_middleware) 224 | -------------------------------------------------------------------------------- /src/pyncette/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tibordp/pyncette/053bf10747b9301b5b4993c0c25c2a6eb63ab5d2/src/pyncette/py.typed -------------------------------------------------------------------------------- /src/pyncette/redis/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import contextlib 4 | import datetime 5 | import json 6 | import logging 7 | import uuid 8 | from dataclasses import dataclass 9 | from importlib.resources import read_text 10 | from typing import Any 11 | from collections.abc import AsyncIterator 12 | 13 | import redis 14 | from redis import asyncio as aioredis 15 | 16 | from pyncette.errors import PyncetteException 17 | from pyncette.model import ContinuationToken 18 | from pyncette.model import Lease 19 | from pyncette.model import PollResponse 20 | from pyncette.model import QueryResponse 21 | from pyncette.model import ResultType 22 | from pyncette.repository import Repository 23 | from pyncette.task import Task 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | _CONTINUATION_TOKEN = ContinuationToken(object()) 29 | 30 | 31 | class _LuaScript: 32 | """A wrapper for Redis lua scripts that automaticaly reloads it if e.g. SCRIPT FLUSH is invoked""" 33 | 34 | _script: str 35 | _sha: str | None 36 | 37 | def __init__(self, script_path: str): 38 | self._script = read_text(__name__, script_path) 39 | self._sha = None 40 | 41 | async def register(self, client: aioredis.Redis) -> None: 42 | self._sha = await client.script_load(self._script) 43 | 44 | async def execute( 45 | self, 46 | client: aioredis.Redis, 47 | keys: list[Any] | None = None, 48 | args: list[Any] | None = None, 49 | ) -> Any: 50 | if self._sha is None: 51 | await self.register(client) 52 | 53 | keys = keys or [] 54 | args = args or [] 55 | 56 | for _ in range(3): 57 | try: 58 | return await client.evalsha(self._sha, len(keys), *keys, *args) # ty: ignore[invalid-await] 59 | except redis.exceptions.NoScriptError: 60 | logger.warning("We seem to have lost the LUA script, reloading...") 61 | await self.register(client) 62 | 63 | raise PyncetteException("Could not reload the Lua script.") 64 | 65 | 66 | @dataclass 67 | class _ManageScriptResponse: 68 | result: ResultType 69 | version: int 70 | execute_after: datetime.datetime | None 71 | locked_until: datetime.datetime | None 72 | task_spec: dict[str, Any] | None 73 | locked_by: str | None 74 | 75 | @classmethod 76 | def from_response(cls, response: list[bytes]) -> _ManageScriptResponse: 77 | return cls( 78 | result=ResultType[response[0].decode()], 79 | version=int(response[1] or 0), 80 | execute_after=None if response[2] is None else datetime.datetime.fromisoformat(response[2].decode()), 81 | locked_until=None if response[3] is None else datetime.datetime.fromisoformat(response[3].decode()), 82 | locked_by=None if response[4] is None else response[4].decode(), 83 | task_spec=None if response[5] is None else json.loads(response[5]), 84 | ) 85 | 86 | 87 | def _create_dynamic_task(task: Task, response_data: list[bytes]) -> tuple[Task, Lease]: 88 | task_data = _ManageScriptResponse.from_response(response_data) 89 | assert task_data.task_spec is not None 90 | 91 | return (task.instantiate_from_spec(task_data.task_spec), Lease(task_data)) 92 | 93 | 94 | class RedisRepository(Repository): 95 | """Redis-backed store for Pyncete task execution data""" 96 | 97 | _redis_client: aioredis.Redis 98 | _namespace: str 99 | _manage_script: _LuaScript 100 | _poll_dynamic_script: _LuaScript 101 | 102 | def __init__(self, redis_client: aioredis.Redis, **kwargs: Any): 103 | self._redis_client = redis_client 104 | self._namespace = kwargs.get("redis_namespace", "") 105 | self._batch_size = kwargs.get("batch_size", 100) 106 | self._poll_dynamic_script = _LuaScript("poll_dynamic.lua") 107 | self._manage_script = _LuaScript("manage.lua") 108 | 109 | if self._batch_size < 1: 110 | raise ValueError("Batch size must be greater than 0") 111 | 112 | async def register_scripts(self) -> None: 113 | """Registers the Lua scripts used by the implementation ahead of time""" 114 | await self._poll_dynamic_script.register(self._redis_client) 115 | await self._manage_script.register(self._redis_client) 116 | 117 | async def poll_dynamic_task( 118 | self, 119 | utc_now: datetime.datetime, 120 | task: Task, 121 | continuation_token: ContinuationToken | None = None, 122 | ) -> QueryResponse: 123 | new_locked_until = utc_now + task.lease_duration 124 | response = await self._poll_dynamic_script.execute( 125 | self._redis_client, 126 | keys=[self._get_task_index_key(task)], 127 | args=[ 128 | utc_now.isoformat(), 129 | self._batch_size, 130 | new_locked_until.isoformat(), 131 | str(uuid.uuid4()), 132 | ], 133 | ) 134 | logger.debug(f"query_lua script returned [{self._batch_size}] {response}") 135 | 136 | return QueryResponse( 137 | tasks=[_create_dynamic_task(task, response_data) for response_data in response[1:]], 138 | continuation_token=_CONTINUATION_TOKEN if response[0] == b"HAS_MORE" else None, 139 | ) 140 | 141 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 142 | execute_after = task.get_next_execution(utc_now, None) 143 | assert execute_after is not None 144 | 145 | await self._manage_record( 146 | task, 147 | "REGISTER", 148 | execute_after.isoformat(), 149 | json.dumps(task.as_spec()), 150 | ) 151 | 152 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 153 | await self._manage_record(task, "UNREGISTER") 154 | 155 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Lease | None = None) -> PollResponse: 156 | # Nominally, we need at least two round-trips to Redis since the next execute_after is calculated 157 | # in Python code due to extra flexibility. This is why we have optimistic locking below to ensure that 158 | # the next execution time was calculated using a correct base if another process modified it in between. 159 | # In most cases, however, we can assume that the base time has not changed since the last invocation, 160 | # so by caching it, we can poll a task using a single round-trip (if we are wrong, the loop below will still 161 | # ensure correctness as the version will not match). 162 | last_lease: _ManageScriptResponse | None = getattr(task, "_last_lease", None) 163 | if isinstance(lease, _ManageScriptResponse): 164 | version, execute_after, locked_by = ( 165 | lease.version, 166 | lease.execute_after, 167 | lease.locked_by, 168 | ) 169 | elif last_lease is not None: 170 | logger.debug("Using cached values for execute_after") 171 | version, execute_after, locked_by = ( 172 | last_lease.version, 173 | last_lease.execute_after, 174 | str(uuid.uuid4()), 175 | ) 176 | else: 177 | # By default we assume that the task is brand new 178 | version, execute_after, locked_by = ( 179 | 0, 180 | None, 181 | str(uuid.uuid4()), 182 | ) 183 | 184 | new_locked_until = utc_now + task.lease_duration 185 | for _ in range(5): 186 | next_execution = task.get_next_execution(utc_now, execute_after) 187 | response = await self._manage_record( 188 | task, 189 | "POLL", 190 | task.execution_mode.name, 191 | "REGULAR" if task.parent_task is None else "DYNAMIC", 192 | utc_now.isoformat(), 193 | version, 194 | next_execution.isoformat() if next_execution is not None else "", 195 | new_locked_until.isoformat(), 196 | locked_by, 197 | ) 198 | task._last_lease = response # type: ignore 199 | 200 | if response.result == ResultType.LEASE_MISMATCH: 201 | logger.debug("Lease mismatch, retrying.") 202 | execute_after = response.execute_after 203 | version = response.version 204 | elif response.result == ResultType.MISSING: 205 | raise PyncetteException("Task not found") 206 | else: 207 | return PollResponse( 208 | result=response.result, 209 | scheduled_at=execute_after, # ty: ignore[invalid-argument-type] 210 | lease=Lease(response), 211 | ) 212 | 213 | raise PyncetteException("Unable to acquire the lock on the task due to contention") 214 | 215 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 216 | assert isinstance(lease, _ManageScriptResponse) 217 | next_execution = task.get_next_execution(utc_now, lease.execute_after) 218 | response = await self._manage_record( 219 | task, 220 | "COMMIT", 221 | lease.version, 222 | lease.locked_by, 223 | next_execution.isoformat() if next_execution is not None else "", 224 | ) 225 | task._last_lease = response # type: ignore 226 | if response.result == ResultType.LEASE_MISMATCH: 227 | logger.info("Not commiting, as we have lost the lease") 228 | 229 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 230 | assert isinstance(lease, _ManageScriptResponse) 231 | response = await self._manage_record(task, "UNLOCK", lease.version, lease.locked_by) 232 | task._last_lease = response # type: ignore 233 | if response.result == ResultType.LEASE_MISMATCH: 234 | logger.info("Not unlocking, as we have lost the lease") 235 | 236 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Lease | None: 237 | assert isinstance(lease, _ManageScriptResponse) 238 | new_locked_until = utc_now + task.lease_duration 239 | response = await self._manage_record(task, "EXTEND", lease.version, lease.locked_by, new_locked_until.isoformat()) 240 | task._last_lease = response # type: ignore 241 | 242 | if response.result == ResultType.READY: 243 | return Lease(response) 244 | else: 245 | return None 246 | 247 | async def _manage_record(self, task: Task, *args: Any) -> _ManageScriptResponse: 248 | response = await self._manage_script.execute( 249 | self._redis_client, 250 | keys=[ 251 | self._get_task_record_key(task), 252 | self._get_task_index_key(task.parent_task), 253 | ], 254 | args=list(args), 255 | ) 256 | logger.debug(f"manage_lua script returned {response}") 257 | return _ManageScriptResponse.from_response(response) 258 | 259 | def _get_task_record_key(self, task: Task) -> str: 260 | return f"pyncette:{self._namespace}:task:{task.canonical_name}" 261 | 262 | def _get_task_index_key(self, task: Task | None) -> str: 263 | # A prefix-coded index key, so there are no restrictions on task names. 264 | index_name = f"index:{task.canonical_name}" if task else "index" 265 | return f"pyncette:{self._namespace}:{index_name}" 266 | 267 | 268 | @contextlib.asynccontextmanager 269 | async def redis_repository(**kwargs: Any) -> AsyncIterator[RedisRepository]: 270 | """Factory context manager for Redis repository that initializes the connection to Redis""" 271 | if not isinstance(kwargs["redis_url"], str): 272 | raise PyncetteException("Redis URL is required") 273 | 274 | async with aioredis.from_url(kwargs["redis_url"]) as redis_pool: 275 | repository = RedisRepository(redis_pool, **kwargs) 276 | await repository.register_scripts() 277 | yield repository 278 | -------------------------------------------------------------------------------- /src/pyncette/redis/manage.lua: -------------------------------------------------------------------------------- 1 | local version, execute_after, locked_until, locked_by, task_spec = unpack(redis.call('hmget', KEYS[1], 'version', 'execute_after', 'locked_until', 'locked_by', 'task_spec')) 2 | local key_exists = version ~= false 3 | local result 4 | 5 | local function getIndexKey() 6 | if not locked_until or locked_until < execute_after then 7 | return execute_after .. '_' .. KEYS[1] 8 | else 9 | return locked_until .. '_' .. KEYS[1] 10 | end 11 | end 12 | 13 | local function setKey(attr, val) 14 | if val == false then 15 | redis.call('hdel', KEYS[1], attr) 16 | else 17 | redis.call('hset', KEYS[1], attr, val) 18 | end 19 | end 20 | 21 | -- Update the task data while also updating the index 22 | local function updateRecord(new_execute_after, new_locked_until, new_locked_by, new_task_spec) 23 | redis.call('zrem', KEYS[2], getIndexKey()) 24 | version, execute_after, locked_until, locked_by, task_spec = version + 1, new_execute_after, new_locked_until, new_locked_by, new_task_spec 25 | setKey('version', version) 26 | setKey('execute_after', execute_after) 27 | setKey('locked_until', locked_until) 28 | setKey('locked_by', locked_by) 29 | setKey('task_spec', task_spec) 30 | redis.call('zadd', KEYS[2], 0, getIndexKey()) 31 | end 32 | 33 | local function deleteRecord() 34 | redis.call('zrem', KEYS[2], getIndexKey()) 35 | version, execute_after, locked_until, locked_by, task_spec = false, false, false, false, false 36 | redis.call('del', KEYS[1]) 37 | end 38 | 39 | 40 | if ARGV[1] == 'POLL' then 41 | local _, mode, task_type, utc_now, incoming_version, incoming_execute_after, incoming_locked_until, incoming_locked_by = unpack(ARGV) 42 | 43 | if not key_exists and task_type == "REGULAR" then 44 | version, execute_after = incoming_version, incoming_execute_after 45 | redis.call('hmset', KEYS[1], 'version', version, 'execute_after', execute_after) 46 | redis.call('zadd', KEYS[2], 0, getIndexKey()) 47 | end 48 | 49 | if not key_exists and task_type == "DYNAMIC" then 50 | result = "MISSING" 51 | elseif locked_until and utc_now < locked_until and not (version == incoming_version and locked_by == incoming_locked_by) then 52 | result = "LOCKED" 53 | elseif execute_after <= utc_now and version ~= incoming_version then 54 | result = "LEASE_MISMATCH" 55 | elseif execute_after <= utc_now and mode == 'AT_MOST_ONCE' and incoming_execute_after == '' then 56 | deleteRecord() 57 | result = "READY" 58 | elseif execute_after <= utc_now and mode == 'AT_MOST_ONCE' then 59 | updateRecord(incoming_execute_after, false, false, task_spec) 60 | result = "READY" 61 | elseif execute_after <= utc_now and mode == 'AT_LEAST_ONCE' then 62 | updateRecord(execute_after, incoming_locked_until, incoming_locked_by, task_spec) 63 | result = "READY" 64 | else 65 | result = "PENDING" 66 | end 67 | elseif ARGV[1] == 'COMMIT' then 68 | local _, incoming_version, incoming_locked_by, incoming_execute_after = unpack(ARGV) 69 | 70 | if not (version == incoming_version and locked_by == incoming_locked_by) then 71 | result = "LEASE_MISMATCH" 72 | elseif incoming_execute_after == '' then 73 | deleteRecord() 74 | result = "READY" 75 | else 76 | updateRecord(incoming_execute_after, false, false, task_spec) 77 | result = "READY" 78 | end 79 | elseif ARGV[1] == 'UNLOCK' then 80 | local _, incoming_version, incoming_locked_by = unpack(ARGV) 81 | 82 | if version == incoming_version and locked_by == incoming_locked_by then 83 | updateRecord(execute_after, false, false, task_spec) 84 | result = "READY" 85 | else 86 | result = "LEASE_MISMATCH" 87 | end 88 | elseif ARGV[1] == 'EXTEND' then 89 | local _, incoming_version, incoming_locked_by, incoming_locked_until = unpack(ARGV) 90 | 91 | if version == incoming_version and locked_by == incoming_locked_by then 92 | updateRecord(execute_after, incoming_locked_until, incoming_locked_by, task_spec) 93 | result = "READY" 94 | else 95 | result = "LEASE_MISMATCH" 96 | end 97 | elseif ARGV[1] == 'REGISTER' then 98 | local _, incoming_execute_after, incoming_task_spec = unpack(ARGV) 99 | 100 | if not key_exists then 101 | version, execute_after, task_spec = 0, incoming_execute_after, incoming_task_spec 102 | redis.call('hmset', KEYS[1], 'version', version, 'execute_after', execute_after, 'task_spec', task_spec) 103 | redis.call('zadd', KEYS[2], 0, getIndexKey()) 104 | else 105 | updateRecord(incoming_execute_after, false, false, incoming_task_spec) 106 | end 107 | 108 | result = "READY" 109 | elseif ARGV[1] == 'UNREGISTER' then 110 | if key_exists then 111 | deleteRecord() 112 | end 113 | 114 | result = "READY" 115 | end 116 | 117 | return { result, version, execute_after, locked_until, locked_by, task_spec} 118 | -------------------------------------------------------------------------------- /src/pyncette/redis/poll_dynamic.lua: -------------------------------------------------------------------------------- 1 | local utc_now, limit, incoming_locked_until, incoming_locked_by = unpack(ARGV) 2 | limit = tonumber(limit) 3 | 4 | local tasksets = redis.call('zrangebylex', KEYS[1], '-', '(' .. utc_now .. '`', 'LIMIT', 0, limit + 1) 5 | local results = { "READY" } 6 | 7 | for key,value in pairs(tasksets) do 8 | local task_name = value:gmatch('_(.*)')() 9 | local version, execute_after, locked_until, locked_by, task_spec = unpack(redis.call('hmget', task_name, 'version', 'execute_after', 'locked_until', 'locked_by', 'task_spec')) 10 | 11 | local function getIndexKey() 12 | if not locked_until or locked_until < execute_after then 13 | return execute_after .. '_' .. task_name 14 | else 15 | return locked_until .. '_' .. task_name 16 | end 17 | end 18 | 19 | redis.call('zrem', KEYS[1], getIndexKey()) 20 | version, locked_until, locked_by = version + 1, incoming_locked_until, incoming_locked_by 21 | redis.call('hmset', task_name, 'version', version, 'locked_until', locked_until, 'locked_by', locked_by) 22 | redis.call('zadd', KEYS[1], 0, getIndexKey()) 23 | 24 | results[key + 1] = { "READY", version, execute_after, locked_until, locked_by, task_spec } 25 | if key == limit then 26 | results[1] = "HAS_MORE" 27 | break 28 | end 29 | end 30 | 31 | return results 32 | -------------------------------------------------------------------------------- /src/pyncette/repository.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import datetime 3 | import logging 4 | from typing import Any 5 | from typing import AsyncContextManager 6 | from typing import Optional 7 | from typing import Protocol 8 | 9 | from .model import ContinuationToken 10 | from .model import Lease 11 | from .model import PollResponse 12 | from .model import QueryResponse 13 | from .task import Task 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class Repository(abc.ABC): 19 | """Abstract base class representing a store for Pyncette tasks""" 20 | 21 | @abc.abstractmethod 22 | async def poll_dynamic_task( 23 | self, 24 | utc_now: datetime.datetime, 25 | task: Task, 26 | continuation_token: Optional[ContinuationToken] = None, 27 | ) -> QueryResponse: 28 | """Queries the dynamic tasks for execution""" 29 | 30 | @abc.abstractmethod 31 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 32 | """Registers a dynamic task""" 33 | 34 | @abc.abstractmethod 35 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 36 | """Deregisters a dynamic task implementation""" 37 | 38 | @abc.abstractmethod 39 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: 40 | """Polls the task to determine whether it is ready for execution""" 41 | 42 | @abc.abstractmethod 43 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 44 | """Commits the task, which signals a successful run.""" 45 | 46 | @abc.abstractmethod 47 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: 48 | """Extends the lease on the task. Returns the new lease if lease was still valid.""" 49 | 50 | @abc.abstractmethod 51 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 52 | """Unlocks the task, making it eligible for retries in case execution failed.""" 53 | 54 | 55 | class RepositoryFactory(Protocol): 56 | """A factory context manager for creating a repository""" 57 | 58 | def __call__(self, **kwargs: Any) -> AsyncContextManager[Repository]: 59 | """Creates a context manager managing the lifecycle of the repository.""" 60 | -------------------------------------------------------------------------------- /src/pyncette/sqlite.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import datetime 4 | import json 5 | import logging 6 | import re 7 | import uuid 8 | from typing import Any 9 | from collections.abc import AsyncIterator 10 | from typing import Optional 11 | from typing import cast 12 | 13 | import aiosqlite 14 | import dateutil.tz 15 | 16 | from pyncette.errors import PyncetteException 17 | from pyncette.model import ContinuationToken 18 | from pyncette.model import ExecutionMode 19 | from pyncette.model import Lease 20 | from pyncette.model import PollResponse 21 | from pyncette.model import QueryResponse 22 | from pyncette.model import ResultType 23 | from pyncette.repository import Repository 24 | from pyncette.task import Task 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | def _from_timestamp(timestamp: Optional[float]) -> Optional[datetime.datetime]: 30 | if timestamp is None: 31 | return None 32 | else: 33 | return datetime.datetime.fromtimestamp(timestamp, dateutil.tz.UTC) 34 | 35 | 36 | def _to_timestamp(date: Optional[datetime.datetime]) -> Optional[float]: 37 | if date is None: 38 | return None 39 | else: 40 | return date.timestamp() 41 | 42 | 43 | _CONTINUATION_TOKEN = ContinuationToken(object()) 44 | 45 | 46 | class SqliteRepository(Repository): 47 | _connection: aiosqlite.Connection 48 | _batch_size: int 49 | _table_name: str 50 | _lock: asyncio.Lock 51 | 52 | def __init__( 53 | self, 54 | connection: aiosqlite.Connection, 55 | **kwargs: Any, 56 | ): 57 | self._connection = connection 58 | self._table_name = kwargs.get("sqlite_table_name", "pyncette_tasks") 59 | self._batch_size = kwargs.get("batch_size", 100) 60 | self._lock = asyncio.Lock() 61 | 62 | if self._batch_size < 1: 63 | raise ValueError("Batch size must be greater than 0") 64 | if not re.match(r"^[a-z_]+$", self._table_name): 65 | raise ValueError("Table name can only contain lower-case letters and underscores") 66 | 67 | async def initialize(self) -> None: 68 | async with self._transaction(): 69 | await self._connection.executescript( 70 | f""" 71 | CREATE TABLE IF NOT EXISTS {self._table_name} ( 72 | name PRIMARY KEY, 73 | parent_name, 74 | locked_until timestamp, 75 | locked_by, 76 | execute_after timestamp, 77 | task_spec 78 | ); 79 | CREATE INDEX IF NOT EXISTS due_tasks_{self._table_name} 80 | ON {self._table_name} (parent_name, MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0))); 81 | """ 82 | ) 83 | 84 | async def poll_dynamic_task( 85 | self, 86 | utc_now: datetime.datetime, 87 | task: Task, 88 | continuation_token: Optional[ContinuationToken] = None, 89 | ) -> QueryResponse: 90 | async with self._transaction(explicit_begin=True): 91 | locked_by = uuid.uuid4() 92 | locked_until = utc_now + task.lease_duration 93 | 94 | ready_tasks = await self._connection.execute_fetchall( 95 | f"""SELECT * FROM {self._table_name} 96 | WHERE parent_name = :parent_name AND MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= :utc_now 97 | ORDER BY MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC 98 | LIMIT :batch_size 99 | """, 100 | { 101 | "parent_name": task.canonical_name, 102 | "utc_now": _to_timestamp(utc_now), 103 | "batch_size": self._batch_size, 104 | }, 105 | ) 106 | 107 | concrete_tasks = [task.instantiate_from_spec(json.loads(record["task_spec"])) for record in ready_tasks] 108 | await self._connection.executemany( 109 | f""" 110 | UPDATE {self._table_name} 111 | SET 112 | locked_until = :locked_until, 113 | locked_by = :locked_by 114 | WHERE name = :name 115 | """, 116 | [ 117 | { 118 | "name": concrete_task.canonical_name, 119 | "locked_until": _to_timestamp(locked_until), 120 | "locked_by": str(locked_by), 121 | } 122 | for concrete_task in concrete_tasks 123 | ], 124 | ) 125 | 126 | return QueryResponse( 127 | tasks=[(concrete_task, Lease(locked_by)) for concrete_task in concrete_tasks], 128 | continuation_token=_CONTINUATION_TOKEN if len(concrete_tasks) == self._batch_size else None, 129 | ) 130 | 131 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 132 | async with self._transaction(explicit_begin=True): 133 | assert task.parent_task is not None 134 | record = await self._connection.execute_fetchall( 135 | f"SELECT 1 FROM {self._table_name} WHERE name = :name", 136 | {"name": task.canonical_name}, 137 | ) 138 | 139 | if record: 140 | await self._connection.execute_fetchall( 141 | f""" 142 | UPDATE {self._table_name} 143 | SET 144 | task_spec = :task_spec, 145 | execute_after = :execute_after, 146 | locked_until = NULL, 147 | locked_by = NULL 148 | WHERE 149 | name = :name 150 | """, 151 | { 152 | "name": task.canonical_name, 153 | "task_spec": json.dumps(task.as_spec()), 154 | "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)), 155 | }, 156 | ) 157 | else: 158 | await self._connection.execute_fetchall( 159 | f""" 160 | INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) 161 | VALUES (:name, :parent_name, :task_spec, :execute_after) 162 | """, 163 | { 164 | "name": task.canonical_name, 165 | "parent_name": task.parent_task.canonical_name, 166 | "task_spec": json.dumps(task.as_spec()), 167 | "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)), 168 | }, 169 | ) 170 | 171 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 172 | async with self._transaction(): 173 | await self._connection.execute_fetchall( 174 | f"DELETE FROM {self._table_name} WHERE name = :name", 175 | {"name": task.canonical_name}, 176 | ) 177 | 178 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: 179 | async with self._transaction(explicit_begin=True): 180 | records = await self._connection.execute_fetchall( 181 | f"SELECT * FROM {self._table_name} WHERE name = :name", 182 | {"name": task.canonical_name}, 183 | ) 184 | 185 | if not records: 186 | # Regular (non-dynamic) tasks will be implicitly created on first poll, 187 | # but dynamic task instances must be explicitely created to prevent spurious 188 | # poll from re-creating them after being deleted. 189 | if task.parent_task is not None: 190 | raise PyncetteException("Task not found") 191 | 192 | locked_until = None 193 | locked_by = None 194 | execute_after = task.get_next_execution(utc_now, None) 195 | await self._connection.execute_fetchall( 196 | f""" 197 | INSERT INTO {self._table_name} (name, execute_after) 198 | VALUES (:name, :execute_after) 199 | """, 200 | { 201 | "name": task.canonical_name, 202 | "execute_after": _to_timestamp(execute_after), 203 | }, 204 | ) 205 | else: 206 | record = next(iter(records)) 207 | execute_after = cast(datetime.datetime, _from_timestamp(record["execute_after"])) 208 | locked_until = _from_timestamp(record["locked_until"]) 209 | locked_by = record["locked_by"] 210 | 211 | assert execute_after is not None 212 | scheduled_at = execute_after 213 | 214 | if locked_until is not None and locked_until > utc_now and (str(lease) != locked_by): 215 | result = ResultType.LOCKED 216 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE: 217 | execute_after = task.get_next_execution(utc_now, execute_after) 218 | result = ResultType.READY 219 | locked_until = None 220 | locked_by = None 221 | await self._update_record( 222 | task, 223 | locked_until, 224 | locked_by, 225 | execute_after, 226 | ) 227 | elif execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE: 228 | locked_until = utc_now + task.lease_duration 229 | locked_by = uuid.uuid4() 230 | result = ResultType.READY 231 | await self._update_record( 232 | task, 233 | locked_until, 234 | locked_by, 235 | execute_after, 236 | ) 237 | else: 238 | result = ResultType.PENDING 239 | 240 | return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by) 241 | 242 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 243 | async with self._transaction(explicit_begin=True): 244 | records = await self._connection.execute_fetchall( 245 | f"SELECT * FROM {self._table_name} WHERE name = :name", 246 | {"name": task.canonical_name}, 247 | ) 248 | 249 | if not records: 250 | logger.warning(f"Task {task} not found, skipping.") 251 | return 252 | 253 | record = next(iter(records)) 254 | if record["locked_by"] != str(lease): 255 | logger.warning(f"Lease lost on task {task}, skipping.") 256 | return 257 | 258 | execute_after = datetime.datetime.fromtimestamp(record["execute_after"], dateutil.tz.UTC) if record["execute_after"] else None 259 | await self._update_record( 260 | task, 261 | None, 262 | None, 263 | task.get_next_execution(utc_now, execute_after), 264 | ) 265 | 266 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: 267 | async with self._transaction(): 268 | locked_until = utc_now + task.lease_duration 269 | async with await self._connection.execute( 270 | f""" 271 | UPDATE {self._table_name} 272 | SET 273 | locked_until = :locked_until 274 | WHERE name = :name AND locked_by = :locked_by 275 | """, 276 | { 277 | "locked_until": _to_timestamp(locked_until), 278 | "name": task.canonical_name, 279 | "locked_by": str(lease), 280 | }, 281 | ) as cursor: 282 | if cursor.rowcount == 1: 283 | return lease 284 | else: 285 | return None 286 | 287 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 288 | async with self._transaction(): 289 | await self._connection.execute_fetchall( 290 | f""" 291 | UPDATE {self._table_name} 292 | SET 293 | locked_by = NULL, 294 | locked_until = NULL 295 | WHERE name = :name AND locked_by = :locked_by 296 | """, 297 | { 298 | "name": task.canonical_name, 299 | "locked_by": str(lease), 300 | }, 301 | ) 302 | 303 | async def _update_record( 304 | self, 305 | task: Task, 306 | locked_until: Optional[datetime.datetime], 307 | locked_by: Optional[uuid.UUID], 308 | execute_after: Optional[datetime.datetime], 309 | ) -> None: 310 | if execute_after is None: 311 | await self._connection.execute_fetchall( 312 | f"DELETE FROM {self._table_name} WHERE name = :name", 313 | {"name": task.canonical_name}, 314 | ) 315 | else: 316 | await self._connection.execute_fetchall( 317 | f""" 318 | UPDATE {self._table_name} 319 | SET 320 | locked_until = :locked_until, 321 | locked_by = :locked_by, 322 | execute_after = :execute_after 323 | WHERE name = :name 324 | """, 325 | { 326 | "name": task.canonical_name, 327 | "locked_until": _to_timestamp(locked_until), 328 | "locked_by": str(locked_by), 329 | "execute_after": _to_timestamp(execute_after), 330 | }, 331 | ) 332 | 333 | @contextlib.asynccontextmanager 334 | async def _transaction(self, explicit_begin: bool = False) -> AsyncIterator[None]: 335 | async with self._lock: 336 | # If we only execute a single DML statement, the transaction will be implicitly open 337 | # but if we start with a SELECT, we need to be in a transaction explicitely. 338 | await self._connection.execute_fetchall("BEGIN") 339 | try: 340 | yield 341 | except Exception: 342 | await self._connection.rollback() 343 | raise 344 | else: 345 | await self._connection.commit() 346 | 347 | 348 | @contextlib.asynccontextmanager 349 | async def sqlite_repository(**kwargs: Any) -> AsyncIterator[SqliteRepository]: 350 | """Factory context manager for Sqlite repository that initializes the connection to Sqlite""" 351 | 352 | async with aiosqlite.connect(kwargs.get("sqlite_database", ":memory:")) as connection: 353 | connection.row_factory = aiosqlite.Row 354 | repository = SqliteRepository(connection, **kwargs) 355 | await repository.initialize() 356 | yield repository 357 | -------------------------------------------------------------------------------- /src/pyncette/task.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime 4 | import hashlib 5 | import json 6 | import logging 7 | from typing import Any 8 | from collections.abc import Awaitable 9 | 10 | import dateutil.tz 11 | from croniter import croniter 12 | 13 | from .model import Context 14 | from .model import ExecutionMode 15 | from .model import FailureMode 16 | from .model import PartitionSelector 17 | from .model import TaskFunc 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class Task: 23 | """The base unit of execution""" 24 | 25 | name: str 26 | task_func: TaskFunc 27 | schedule: str | None 28 | interval: datetime.timedelta | None 29 | execute_at: datetime.datetime | None 30 | timezone: str | None 31 | fast_forward: bool 32 | failure_mode: FailureMode 33 | execution_mode: ExecutionMode 34 | lease_duration: datetime.timedelta 35 | parent_task: Task | None 36 | extra_args: dict[str, Any] 37 | _enabled: bool 38 | 39 | def __init__( 40 | self, 41 | *, 42 | name: str, 43 | func: TaskFunc, 44 | enabled: bool = True, 45 | dynamic: bool = False, 46 | parent_task: Task | None = None, 47 | schedule: str | None = None, 48 | interval: datetime.timedelta | None = None, 49 | execute_at: datetime.datetime | None = None, 50 | timezone: str | None = None, 51 | fast_forward: bool = False, 52 | failure_mode: FailureMode = FailureMode.NONE, 53 | execution_mode: ExecutionMode = ExecutionMode.AT_LEAST_ONCE, 54 | lease_duration: datetime.timedelta = datetime.timedelta(seconds=60), 55 | **kwargs: Any, 56 | ): 57 | self._enabled = enabled 58 | self.name = name 59 | self.task_func = func 60 | 61 | self.dynamic = dynamic 62 | self.parent_task = parent_task 63 | 64 | self.schedule = schedule 65 | self.interval = interval 66 | self.timezone = timezone 67 | self.fast_forward = fast_forward 68 | self.failure_mode = failure_mode 69 | self.execute_at = execute_at 70 | self.execution_mode = execution_mode 71 | self.lease_duration = lease_duration 72 | self.extra_args = kwargs 73 | 74 | self._validate() 75 | 76 | def _validate(self) -> None: 77 | if self.execution_mode == ExecutionMode.AT_MOST_ONCE and self.failure_mode != FailureMode.NONE: 78 | raise ValueError("failure_mode is not applicable when execution_mode is AT_MOST_ONCE") 79 | 80 | if not self.dynamic: 81 | schedule_specs = [spec for spec in [self.schedule, self.interval, self.execute_at] if spec is not None] 82 | if len(schedule_specs) != 1: 83 | raise ValueError("Exactly one of the following must be specified: schedule, interval, execute_at") 84 | if self.schedule is None and self.timezone is not None: 85 | raise ValueError("Timezone may only be specified when cron schedule is used") 86 | if self.schedule is not None: 87 | croniter.expand(self.schedule) 88 | 89 | if self.parent_task is None and self.execute_at is not None: 90 | raise ValueError("execute_at is only supported for dynamic tasks") 91 | 92 | if dateutil.tz.gettz(self.timezone) is None: 93 | raise ValueError(f"Invalid timezone specifier '{self.timezone}'.") 94 | 95 | try: 96 | json.dumps(self.extra_args) 97 | except Exception as e: 98 | raise ValueError(f"Extra parameters must be JSON serializable ({e})") from None 99 | 100 | def get_next_execution( 101 | self, 102 | utc_now: datetime.datetime, 103 | last_execution: datetime.datetime | None, 104 | ) -> datetime.datetime | None: 105 | if self.execute_at is not None: 106 | return self.execute_at.astimezone(dateutil.tz.UTC) if last_execution is None else None 107 | 108 | current_time = last_execution if last_execution is not None else utc_now 109 | 110 | if self.interval is not None: 111 | if not last_execution or not self.fast_forward: 112 | return current_time + self.interval 113 | else: 114 | count = (utc_now - last_execution) // self.interval + 1 115 | return last_execution + (self.interval * count) 116 | 117 | if self.schedule is not None: 118 | if self.timezone: 119 | current_time = current_time.astimezone(dateutil.tz.gettz(self.timezone)) 120 | 121 | cron = croniter(self.schedule, start_time=current_time, ret_type=datetime.datetime) 122 | 123 | while True: 124 | next_execution = cron.get_next() 125 | if not next_execution: 126 | return None 127 | if not self.fast_forward or next_execution >= utc_now: 128 | return next_execution.astimezone(dateutil.tz.UTC) 129 | 130 | raise AssertionError 131 | 132 | def instantiate(self, name: str, **kwargs: Any) -> Task: 133 | """Creates a concrete instance of a dynamic task""" 134 | 135 | if not self.dynamic: 136 | raise ValueError("Cannot instantiate a non-dynamic task") 137 | 138 | extra_args: dict[str, Any] = { 139 | "schedule": self.schedule, 140 | "interval": self.interval, 141 | "timezone": self.timezone, 142 | "execute_at": self.execute_at, 143 | **self.extra_args, 144 | **kwargs, 145 | } 146 | 147 | return Task( 148 | name=name, 149 | func=self.task_func, 150 | fast_forward=self.fast_forward, 151 | failure_mode=self.failure_mode, 152 | execution_mode=self.execution_mode, 153 | lease_duration=self.lease_duration, 154 | parent_task=self, 155 | **extra_args, 156 | ) 157 | 158 | @property 159 | def enabled(self) -> bool: 160 | return self._enabled 161 | 162 | @enabled.setter 163 | def enabled(self, value: bool) -> None: 164 | self._enabled = value 165 | 166 | @property 167 | def canonical_name(self) -> str: 168 | """A unique identifier for a task instance""" 169 | if self.parent_task is not None: 170 | return "{}:{}".format( 171 | self.parent_task.canonical_name, 172 | self.name.replace(":", "::"), 173 | ) 174 | else: 175 | return self.name.replace(":", "::") 176 | 177 | def as_spec(self) -> dict[str, Any]: 178 | """Serializes all the attributes to task spec""" 179 | return { 180 | "name": self.name, 181 | "schedule": self.schedule, 182 | "interval": self.interval.total_seconds() if self.interval is not None else None, 183 | "execute_at": self.execute_at.isoformat() if self.execute_at is not None else None, 184 | "timezone": self.timezone, 185 | "extra_args": self.extra_args, 186 | } 187 | 188 | def instantiate_from_spec(self, task_spec: dict[str, Any]) -> Task: 189 | """Deserializes all the attributes from task spec""" 190 | return self.instantiate( 191 | name=task_spec["name"], 192 | schedule=task_spec["schedule"], 193 | interval=datetime.timedelta(seconds=task_spec["interval"]) if task_spec["interval"] is not None else None, 194 | timezone=task_spec["timezone"], 195 | execute_at=datetime.datetime.fromisoformat(task_spec["execute_at"]) if task_spec["execute_at"] is not None else None, 196 | **task_spec["extra_args"], 197 | ) 198 | 199 | def __call__(self, context: Context) -> Awaitable[None]: 200 | return self.task_func(context) 201 | 202 | def __str__(self) -> str: 203 | return self.canonical_name 204 | 205 | 206 | def _default_partition_selector(partition_count: int, task_id: str) -> int: 207 | algo = hashlib.sha1() # noqa: S324 208 | algo.update(task_id.encode("utf-8")) 209 | max_value = int.from_bytes(b"\xff" * algo.digest_size, "big") + 1 210 | digest = int.from_bytes(algo.digest(), "big") 211 | 212 | return (digest * partition_count) // max_value 213 | 214 | 215 | class _TaskPartition(Task): 216 | partition_id: int 217 | _parent: PartitionedTask 218 | 219 | def __init__(self, parent: PartitionedTask, partition_id: int, **kwargs: Any): 220 | super().__init__(dynamic=True, **kwargs) 221 | self._parent = parent 222 | self.partition_id = partition_id 223 | 224 | @property 225 | def enabled(self) -> bool: 226 | return self._parent.enabled and (self._parent.enabled_partitions is None or self.partition_id in self._parent.enabled_partitions) 227 | 228 | @enabled.setter 229 | def enabled(self, value: bool) -> None: 230 | raise ValueError("Use enabled_partitions to disable polling a partition.") 231 | 232 | @property 233 | def canonical_name(self) -> str: 234 | """A unique identifier for a task instance""" 235 | 236 | assert self.parent_task is None 237 | return "{}:{}".format(self.name.replace(":", "::"), self.partition_id) 238 | 239 | 240 | class PartitionedTask(Task): 241 | _kwargs: Any 242 | partition_count: int 243 | partition_selector: PartitionSelector 244 | enabled_partitions: list[int] | None 245 | 246 | def __init__( 247 | self, 248 | *, 249 | partition_count: int, 250 | partition_selector: PartitionSelector = _default_partition_selector, 251 | enabled_partitions: list[int] | None = None, 252 | **kwargs: Any, 253 | ): 254 | if partition_count < 1: 255 | raise ValueError("Partition count must be greater than or equal to 1") 256 | 257 | super().__init__(dynamic=True, **kwargs) 258 | 259 | self.partition_count = partition_count 260 | self.partition_selector = partition_selector 261 | self.enabled_partitions = enabled_partitions 262 | self._kwargs = kwargs 263 | 264 | def get_partitions(self) -> list[Task]: 265 | return [_TaskPartition(self, partition_id=partition_id, **self._kwargs) for partition_id in range(self.partition_count)] 266 | 267 | def instantiate(self, name: str, **kwargs: Any) -> Task: 268 | """Creates a concrete instance of a dynamic task""" 269 | 270 | partition_id = self.partition_selector(self.partition_count, name) 271 | shard = _TaskPartition(self, partition_id=partition_id, **self._kwargs) 272 | 273 | return shard.instantiate(name, **kwargs) 274 | -------------------------------------------------------------------------------- /src/pyncette/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from functools import wraps 4 | 5 | from .errors import LeaseLostException 6 | from .model import Context 7 | from .model import Decorator 8 | from .model import TaskFunc 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | DEFAULT_LEASE_REMAINING_RATIO = 0.5 13 | 14 | 15 | def with_heartbeat( 16 | lease_remaining_ratio: float = DEFAULT_LEASE_REMAINING_RATIO, 17 | cancel_on_lease_lost: bool = False, 18 | ) -> Decorator[TaskFunc]: 19 | """ 20 | Decorate the task to use automatic heartbeating in background. 21 | 22 | :param lease_remaining_ratio: Number between 0 and 1. The ratio between elapsed time and the lease duration when heartbeating will be performed. Default is 0.5. 23 | :param cancel_on_lease_lost: Whether the task should be cancelled if lease expires. Default is False. 24 | """ 25 | if lease_remaining_ratio <= 0 or lease_remaining_ratio >= 1: 26 | raise ValueError("Lease remaining ratio must be in (0, 1)") 27 | 28 | def decorator(func: TaskFunc) -> TaskFunc: 29 | @wraps(func) 30 | async def _func(context: Context) -> None: 31 | body = asyncio.ensure_future(func(context)) 32 | 33 | async def _heartbeater() -> None: 34 | delay_duration = context.task.lease_duration.total_seconds() * lease_remaining_ratio 35 | while True: 36 | await asyncio.sleep(delay_duration) 37 | try: 38 | await asyncio.shield(context.heartbeat()) 39 | except LeaseLostException: 40 | if cancel_on_lease_lost: 41 | body.cancel() 42 | # Regardless of whether we want the task body to continue 43 | # executing, it makes no sense to continue heartbeating 44 | # since the lease has already been lost. 45 | return 46 | except Exception as e: 47 | # There may be transient errors while heartbeating. In this case 48 | # ignore them until the next heartbeat interval. 49 | logger.warning(f"Heartbeating on {context.task} failed", exc_info=e) 50 | 51 | heartbeater = asyncio.create_task(_heartbeater()) 52 | try: 53 | await body 54 | finally: 55 | heartbeater.cancel() 56 | try: 57 | await heartbeater 58 | except asyncio.CancelledError: 59 | pass 60 | 61 | return _func 62 | 63 | return decorator 64 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | import os 4 | import random 5 | import time 6 | from contextlib import asynccontextmanager 7 | 8 | import dateutil.tz 9 | import pytest 10 | 11 | import pyncette 12 | from pyncette.dynamodb import dynamodb_repository 13 | from pyncette.mysql import mysql_repository 14 | from pyncette.postgres import postgres_repository 15 | from pyncette.redis import redis_repository 16 | from pyncette.sqlite import sqlite_repository 17 | from utils.timemachine import TimeMachine 18 | 19 | 20 | @pytest.fixture 21 | def timemachine(monkeypatch): 22 | timemachine = TimeMachine(datetime.datetime(2019, 1, 1, 0, 0, 0, tzinfo=dateutil.tz.UTC)) 23 | monkeypatch.setattr(pyncette.pyncette, "_current_time", timemachine.utcnow) 24 | monkeypatch.setattr(asyncio, "sleep", timemachine.sleep) 25 | monkeypatch.setattr(asyncio, "wait_for", timemachine.wait_for) 26 | monkeypatch.setattr(time, "perf_counter", timemachine.perf_counter) 27 | return timemachine 28 | 29 | 30 | def wrap_factory(factory, timemachine): 31 | @asynccontextmanager 32 | async def wrapped_factory(*args, **kwargs): 33 | async with factory(*args, **kwargs) as repo: 34 | yield timemachine.decorate_io(repo) 35 | 36 | return timemachine.decorate_io(wrapped_factory) 37 | 38 | 39 | def random_table_name(): 40 | return "pyncette_{}".format("".join([chr(random.randint(ord("a"), ord("z"))) for _ in range(10)])) 41 | 42 | 43 | # Define new configurations here 44 | 45 | 46 | class PostgresBackend: 47 | __name__ = "postgres" 48 | is_persistent = True 49 | 50 | def get_args(self, timemachine): 51 | return { 52 | "repository_factory": wrap_factory(postgres_repository, timemachine), 53 | "postgres_table_name": random_table_name(), 54 | "postgres_url": os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"), 55 | } 56 | 57 | 58 | class RedisBackend: 59 | __name__ = "redis" 60 | is_persistent = True 61 | 62 | def get_args(self, timemachine): 63 | return { 64 | "repository_factory": wrap_factory(redis_repository, timemachine), 65 | "redis_namespace": random_table_name(), 66 | "redis_timeout": 10, 67 | "redis_url": os.environ.get("REDIS_URL", "redis://localhost"), 68 | } 69 | 70 | 71 | class SqlitePersistedBackend: 72 | __name__ = "sqlite_persisted" 73 | is_persistent = True 74 | 75 | def get_args(self, timemachine): 76 | return { 77 | "repository_factory": wrap_factory(sqlite_repository, timemachine), 78 | "sqlite_database": os.environ.get("SQLITE_DATABASE", "pyncette.db"), 79 | "sqlite_table_name": random_table_name(), 80 | } 81 | 82 | 83 | class DynamoDBBackend: 84 | __name__ = "dynamodb" 85 | is_persistent = True 86 | 87 | def get_args(self, timemachine): 88 | return { 89 | "repository_factory": wrap_factory(dynamodb_repository, timemachine), 90 | "dynamodb_table_name": random_table_name(), 91 | "dynamodb_region_name": "eu-west-1", 92 | "dynamodb_endpoint": os.environ.get("DYNAMODB_ENDPOINT", "http://localhost:4566"), 93 | } 94 | 95 | 96 | class MySQLBackend: 97 | __name__ = "mysql" 98 | is_persistent = True 99 | 100 | def get_args(self, timemachine): 101 | return { 102 | "repository_factory": wrap_factory(mysql_repository, timemachine), 103 | "mysql_host": os.environ.get("MYSQL_HOST", "localhost"), 104 | "mysql_database": os.environ.get("MYSQL_DATABASE", "pyncette"), 105 | "mysql_user": os.environ.get("MYSQL_USER", "pyncette"), 106 | "mysql_password": os.environ.get("MYSQL_PASSWORD", "password"), 107 | "mysql_table_name": random_table_name(), 108 | } 109 | 110 | 111 | class DefaultBackend: 112 | __name__ = "default" 113 | is_persistent = False 114 | 115 | def get_args(self, timemachine): 116 | return {"repository_factory": wrap_factory(sqlite_repository, timemachine)} 117 | 118 | 119 | all_backends = [ 120 | PostgresBackend(), 121 | MySQLBackend(), 122 | RedisBackend(), 123 | DynamoDBBackend(), 124 | DefaultBackend(), 125 | SqlitePersistedBackend(), 126 | ] 127 | 128 | 129 | def pytest_addoption(parser): 130 | parser.addoption( 131 | "--backend", 132 | action="append", 133 | default=[], 134 | help="list of repositories to test with", 135 | ) 136 | 137 | 138 | def pytest_generate_tests(metafunc): 139 | if "backend" in metafunc.fixturenames: 140 | metafunc.parametrize( 141 | "backend", 142 | [repository for repository in all_backends if repository.__name__ in metafunc.config.getoption("backend")] or all_backends, 143 | ) 144 | -------------------------------------------------------------------------------- /tests/test_dynamodb.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from botocore.exceptions import ClientError 5 | 6 | from pyncette import dynamodb 7 | 8 | from conftest import random_table_name 9 | 10 | DYNAMODB_ENDPOINT = os.environ.get("DYNAMODB_ENDPOINT", "http://localhost:4566") 11 | 12 | 13 | @pytest.mark.asyncio 14 | @pytest.mark.integration 15 | async def test_dynamodb_create(): 16 | async with dynamodb.dynamodb_repository( 17 | dynamodb_table_name=random_table_name(), 18 | dynamodb_endpoint=DYNAMODB_ENDPOINT, 19 | dynamodb_region_name="eu-west-1", 20 | ) as repository: 21 | table_status = await repository._table.table_status 22 | assert table_status == "ACTIVE" 23 | 24 | 25 | @pytest.mark.asyncio 26 | @pytest.mark.integration 27 | async def test_dynamodb_skip_table_create(): 28 | async with dynamodb.dynamodb_repository( 29 | dynamodb_table_name=random_table_name(), 30 | dynamodb_endpoint=DYNAMODB_ENDPOINT, 31 | dynamodb_region_name="eu-west-1", 32 | dynamodb_skip_table_create=True, 33 | ) as repository: 34 | with pytest.raises(ClientError) as e: 35 | await repository._table.table_status 36 | 37 | assert e.value.response["Error"]["Code"] == "ResourceNotFoundException" 38 | -------------------------------------------------------------------------------- /tests/test_mysql.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import pymysql 5 | import pytest 6 | 7 | from pyncette import mysql, Context 8 | from pyncette.task import Task 9 | 10 | from conftest import random_table_name 11 | 12 | 13 | async def dummy_task(context: Context): 14 | pass # pragma: no cover 15 | 16 | 17 | DUMMY_TASK = Task(name="foo", func=dummy_task, schedule="* * * * *") 18 | 19 | 20 | @pytest.mark.asyncio 21 | @pytest.mark.integration 22 | async def test_invalid_table_name(): 23 | with pytest.raises(ValueError): 24 | await mysql.mysql_repository( 25 | mysql_host=os.environ.get("MYSQL_HOST", "localhost"), 26 | mysql_database=os.environ.get("MYSQL_DATABASE", "pyncette"), 27 | mysql_user=os.environ.get("MYSQL_USER", "pyncette"), 28 | mysql_password=os.environ.get("MYSQL_PASSWORD", "password"), 29 | mysql_table_name="spaces in table name", 30 | ).__aenter__() 31 | 32 | 33 | @pytest.mark.asyncio 34 | @pytest.mark.integration 35 | async def test_skip_table_create(): 36 | with pytest.raises(pymysql.err.ProgrammingError): 37 | async with mysql.mysql_repository( 38 | mysql_host=os.environ.get("MYSQL_HOST", "localhost"), 39 | mysql_database=os.environ.get("MYSQL_DATABASE", "pyncette"), 40 | mysql_user=os.environ.get("MYSQL_USER", "pyncette"), 41 | mysql_password=os.environ.get("MYSQL_PASSWORD", "password"), 42 | mysql_table_name=random_table_name(), 43 | mysql_skip_table_create=True, 44 | ) as repository: 45 | await repository.poll_task( 46 | datetime.datetime.now(tz=datetime.timezone.utc), 47 | DUMMY_TASK, 48 | ) 49 | -------------------------------------------------------------------------------- /tests/test_postgres.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import asyncpg 5 | import pytest 6 | 7 | from pyncette import postgres, Context 8 | from pyncette.task import Task 9 | 10 | from conftest import random_table_name 11 | 12 | 13 | async def dummy_task(context: Context): 14 | pass # pragma: no cover 15 | 16 | 17 | DUMMY_TASK = Task(name="foo", func=dummy_task, schedule="* * * * *") 18 | 19 | 20 | @pytest.mark.asyncio 21 | @pytest.mark.integration 22 | async def test_invalid_table_name(): 23 | with pytest.raises(ValueError, match="Table name"): 24 | await postgres.postgres_repository( 25 | postgres_url=os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"), 26 | postgres_table_name="spaces in table name", 27 | ).__aenter__() 28 | 29 | 30 | @pytest.mark.asyncio 31 | @pytest.mark.integration 32 | async def test_skip_table_create(): 33 | with pytest.raises(asyncpg.exceptions.UndefinedTableError): 34 | async with postgres.postgres_repository( 35 | postgres_url=os.environ.get("POSTGRES_URL", "postgres://postgres:postgres@localhost/pyncette"), 36 | postgres_table_name=random_table_name(), 37 | postgres_skip_table_create=True, 38 | ) as repository: 39 | await repository.poll_task( 40 | datetime.datetime.now(tz=datetime.timezone.utc), 41 | DUMMY_TASK, 42 | ) 43 | -------------------------------------------------------------------------------- /tests/test_pyncette.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | from unittest.mock import MagicMock 4 | 5 | import pytest 6 | from croniter.croniter import CroniterBadCronError 7 | 8 | from pyncette import Context 9 | from pyncette import ExecutionMode 10 | from pyncette import FailureMode 11 | from pyncette import Pyncette 12 | from pyncette import PyncetteContext 13 | from pyncette.errors import LeaseLostException 14 | from pyncette.task import _default_partition_selector 15 | from pyncette.utils import with_heartbeat 16 | 17 | from conftest import DefaultBackend 18 | 19 | 20 | def test_invalid_configuration(): 21 | async def dummy(context: Context): 22 | pass # pragma: no cover 23 | 24 | # Exactly one of the following must be specified: schedule, interval, execute_at 25 | with pytest.raises(ValueError): 26 | app = Pyncette() 27 | app.task()(dummy) 28 | 29 | with pytest.raises(ValueError): 30 | app = Pyncette() 31 | app.task(execute_at=datetime.datetime.now(tz=datetime.timezone.utc))(dummy) 32 | 33 | with pytest.raises(ValueError): 34 | app = Pyncette() 35 | app.task(interval=datetime.timedelta(seconds=2), schedule="* * * * *")(dummy) 36 | 37 | with pytest.raises(ValueError, match="Duplicate task name"): 38 | app = Pyncette() 39 | app.task(interval=datetime.timedelta(seconds=2), name="task1")(dummy) 40 | app.task(interval=datetime.timedelta(seconds=2), name="task1")(dummy) 41 | 42 | with pytest.raises(CroniterBadCronError): 43 | app = Pyncette() 44 | app.task(schedule="abracadabra")(dummy) 45 | 46 | with pytest.raises( 47 | ValueError, 48 | match="failure_mode is not applicable when execution_mode is AT_MOST_ONCE", 49 | ): 50 | app = Pyncette() 51 | app.task(execution_mode=ExecutionMode.AT_MOST_ONCE, failure_mode=FailureMode.UNLOCK)(dummy) 52 | 53 | with pytest.raises(ValueError, match=r"Invalid timezone specifier 'Gondwana/Atlantis'"): 54 | app = Pyncette() 55 | app.task(schedule="* * * * *", timezone="Gondwana/Atlantis")(dummy) 56 | 57 | with pytest.raises(ValueError): 58 | app = Pyncette() 59 | app.task(interval=datetime.timedelta(seconds=2), timezone="Europe/Dublin")(dummy) 60 | 61 | with pytest.raises(ValueError, match="Extra parameters must be JSON serializable"): 62 | app = Pyncette() 63 | app.task(schedule="* * * * *", extra_arg=object())(dummy) 64 | 65 | with pytest.raises(ValueError, match="Unable to determine name for the task"): 66 | app = Pyncette() 67 | app.task(schedule="* * * * *")(object()) # ty: ignore[invalid-argument-type] 68 | 69 | with pytest.raises(ValueError, match="Unable to determine name for the fixture"): 70 | app = Pyncette() 71 | app.fixture()(object()) 72 | 73 | 74 | def test_instantiate_non_dynamic_task(): 75 | async def dummy(context: Context): 76 | pass # pragma: no cover 77 | 78 | with pytest.raises(ValueError): 79 | app = Pyncette() 80 | app.task(schedule="* * * * *")(dummy).instantiate(name="foo") 81 | 82 | 83 | def test_heartbeat_invalid_configuration(): 84 | async def dummy(context: Context): 85 | pass # pragma: no cover 86 | 87 | with pytest.raises(ValueError): 88 | with_heartbeat(lease_remaining_ratio=-1) 89 | 90 | with pytest.raises(ValueError): 91 | with_heartbeat(lease_remaining_ratio=2) 92 | 93 | 94 | @pytest.mark.asyncio 95 | async def test_dynamic_successful_task_interval(): 96 | app = Pyncette() 97 | 98 | @app.dynamic_task() 99 | async def hello(context: Context) -> None: 100 | pass # pragma: no cover 101 | 102 | with pytest.raises(ValueError, match="instance name must be provided"): 103 | async with app.create() as ctx: 104 | await ctx.unschedule_task(hello) 105 | 106 | 107 | @pytest.mark.asyncio 108 | async def test_continues_heartbeating_after_exception(timemachine): 109 | context = MagicMock() 110 | counter = MagicMock() 111 | 112 | async def _heartbeat(): 113 | counter.heartbeat() 114 | raise Exception("Fail") 115 | 116 | context.heartbeat = _heartbeat 117 | context.task.lease_duration = datetime.timedelta(seconds=2) 118 | 119 | @with_heartbeat() 120 | async def hello(context: Context) -> None: 121 | await asyncio.sleep(10) 122 | 123 | task = asyncio.create_task(hello(context)) 124 | await timemachine.step(datetime.timedelta(seconds=10)) 125 | await task 126 | await timemachine.unwind() 127 | 128 | assert counter.heartbeat.call_count == 9 129 | 130 | 131 | @pytest.mark.asyncio 132 | async def test_stops_heartbeating_if_lease_lost(timemachine): 133 | context = MagicMock() 134 | counter = MagicMock() 135 | 136 | async def _heartbeat(): 137 | counter.heartbeat() 138 | raise LeaseLostException(context.task) 139 | 140 | context.heartbeat = _heartbeat 141 | context.task.lease_duration = datetime.timedelta(seconds=2) 142 | 143 | @with_heartbeat() 144 | async def hello(context: Context) -> None: 145 | await asyncio.sleep(10) 146 | 147 | task = asyncio.create_task(hello(context)) 148 | await timemachine.step(datetime.timedelta(seconds=10)) 149 | await task 150 | await timemachine.unwind() 151 | 152 | assert counter.heartbeat.call_count == 1 153 | 154 | 155 | def test_fixture_name_invalid(): 156 | app = Pyncette() 157 | 158 | async def dummy(app_context: PyncetteContext): 159 | pass # pragma: no cover 160 | 161 | with pytest.raises(ValueError): 162 | app.use_fixture("scheduled_at", dummy) 163 | 164 | app.use_fixture("duplicate", dummy) 165 | with pytest.raises(ValueError): 166 | app.use_fixture("duplicate", dummy) 167 | 168 | 169 | @pytest.mark.asyncio 170 | async def test_add_to_context(timemachine): 171 | app = Pyncette(**DefaultBackend().get_args(timemachine)) 172 | 173 | counter = MagicMock() 174 | 175 | @app.task(interval=datetime.timedelta(seconds=2)) 176 | async def successful_task(context: Context) -> None: 177 | context.hello() 178 | 179 | async with app.create() as ctx: 180 | ctx.add_to_context("hello", counter) 181 | task = asyncio.create_task(ctx.run()) 182 | await timemachine.step(datetime.timedelta(seconds=10)) 183 | ctx.shutdown() 184 | await task 185 | await timemachine.unwind() 186 | 187 | assert counter.call_count == 5 188 | 189 | 190 | @pytest.mark.asyncio 191 | async def test_add_to_context_invalid_name(): 192 | app = Pyncette() 193 | 194 | @app.fixture() 195 | async def fixture(app_context: PyncetteContext): 196 | yield None 197 | 198 | counter = MagicMock() 199 | 200 | @app.task(interval=datetime.timedelta(seconds=2)) 201 | async def successful_task(context: Context) -> None: 202 | context.hello() 203 | 204 | async with app.create() as ctx: 205 | ctx.add_to_context("duplicate", counter) 206 | with pytest.raises(ValueError): 207 | ctx.add_to_context("duplicate", counter) 208 | 209 | with pytest.raises(ValueError): 210 | ctx.add_to_context("fixture", counter) 211 | 212 | with pytest.raises(ValueError): 213 | ctx.add_to_context("scheduled_at", counter) 214 | 215 | 216 | def test_partition_count_invalid(): 217 | app = Pyncette() 218 | 219 | with pytest.raises(ValueError, match="Partition count must be greater than or equal to 1"): 220 | 221 | @app.partitioned_task(partition_count=0) 222 | async def hello(context: Context) -> None: 223 | pass # pragma: no cover 224 | 225 | 226 | def test_default_partition_selector_does_not_change(): 227 | # BE CAREFUL IF THIS TEST BREAKS. 228 | # This is a regression test that ensures that the default 229 | # partition key is not changed, as that could lead to all users' 230 | # partitions being remapped. 231 | assert _default_partition_selector(1000000000000, "Lorem ipsum dolor sit amet") == 222413034928 232 | -------------------------------------------------------------------------------- /tests/test_pyncette_healthcheck.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | 4 | import aiohttp 5 | import pytest 6 | 7 | from pyncette import Pyncette 8 | from pyncette.healthcheck import default_healthcheck 9 | from pyncette.healthcheck import use_healthcheck_server 10 | from pyncette.sqlite import sqlite_repository 11 | 12 | from conftest import wrap_factory 13 | 14 | 15 | def get_healthcheck_port(app_context): 16 | return app_context._root_context._healthcheck.sockets[0].getsockname()[1] 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_default_healthcheck_handler_healthy(timemachine): 21 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 22 | 23 | async with app.create() as ctx: 24 | task = asyncio.create_task(ctx.run()) 25 | await timemachine.step(datetime.timedelta(seconds=1.5)) 26 | is_healthy = await default_healthcheck(ctx) 27 | ctx.shutdown() 28 | await task 29 | await timemachine.unwind() 30 | 31 | assert is_healthy 32 | 33 | 34 | @pytest.mark.asyncio 35 | async def test_default_healthcheck_handler_unhealthy(timemachine): 36 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 37 | 38 | async with app.create() as ctx: 39 | task = asyncio.create_task(ctx.run()) 40 | # Advance time without executing calbacks 41 | timemachine._update_offset(timemachine.offset + datetime.timedelta(hours=1)) 42 | is_healthy = await default_healthcheck(ctx) 43 | ctx.shutdown() 44 | await task 45 | await timemachine.unwind() 46 | 47 | assert not is_healthy 48 | 49 | 50 | @pytest.mark.asyncio 51 | async def test_healthcheck_server_success(timemachine): 52 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 53 | 54 | async def healthcheck_handler(app_context): 55 | return True 56 | 57 | # Bind on random port to avoid conflict 58 | use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler) 59 | 60 | async with app.create() as ctx, aiohttp.ClientSession() as session: 61 | task = asyncio.create_task(ctx.run()) 62 | async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp: 63 | assert resp.status == 200 64 | ctx.shutdown() 65 | await task 66 | await timemachine.unwind() 67 | 68 | 69 | @pytest.mark.asyncio 70 | async def test_healthcheck_server_failure(timemachine): 71 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 72 | 73 | async def healthcheck_handler(app_context): 74 | return False 75 | 76 | # Bind on random port to avoid conflict 77 | use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler) 78 | 79 | async with app.create() as ctx, aiohttp.ClientSession() as session: 80 | task = asyncio.create_task(ctx.run()) 81 | async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp: 82 | assert resp.status == 500 83 | ctx.shutdown() 84 | await task 85 | await timemachine.unwind() 86 | 87 | 88 | @pytest.mark.asyncio 89 | async def test_healthcheck_server_exception(timemachine): 90 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 91 | 92 | async def healthcheck_handler(app_context): 93 | raise Exception("oops") 94 | 95 | # Bind on random port to avoid conflict 96 | use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler) 97 | 98 | async with app.create() as ctx, aiohttp.ClientSession() as session: 99 | task = asyncio.create_task(ctx.run()) 100 | async with session.get(f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health") as resp: 101 | assert resp.status == 500 102 | ctx.shutdown() 103 | await task 104 | await timemachine.unwind() 105 | 106 | 107 | @pytest.mark.asyncio 108 | async def test_healthcheck_server_invalid_verb(timemachine): 109 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 110 | 111 | async def healthcheck_handler(app_context): 112 | pass # pragma: no cover 113 | 114 | # Bind on random port to avoid conflict 115 | use_healthcheck_server(app, port=0, bind_address="127.0.0.1", healthcheck_handler=healthcheck_handler) 116 | 117 | async with app.create() as ctx, aiohttp.ClientSession() as session: 118 | task = asyncio.create_task(ctx.run()) 119 | async with session.post( 120 | f"http://127.0.0.1:{get_healthcheck_port(ctx)}/health", 121 | json={"test": "object"}, 122 | ) as resp: 123 | assert resp.status == 405 124 | ctx.shutdown() 125 | await task 126 | await timemachine.unwind() 127 | -------------------------------------------------------------------------------- /tests/test_pyncette_process.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import signal 4 | import subprocess 5 | import time 6 | 7 | import pytest 8 | 9 | 10 | @pytest.mark.integration 11 | def test_signal_handling(): 12 | with subprocess.Popen( 13 | ["coverage", "run", "-m", "tests.test_pyncette_process"], # noqa: S607 14 | env={**os.environ, "LOG_LEVEL": "DEBUG"}, 15 | ) as proc: 16 | time.sleep(2) 17 | proc.send_signal(signal.SIGINT) 18 | ret_code = proc.wait() 19 | 20 | assert ret_code == 0 21 | 22 | 23 | @pytest.mark.integration 24 | def test_signal_handling_uvloop(): 25 | with subprocess.Popen( 26 | ["coverage", "run", "-m", "tests.test_pyncette_process"], # noqa: S607 27 | env={**os.environ, "LOG_LEVEL": "DEBUG", "USE_UVLOOP": "1"}, 28 | ) as proc: 29 | time.sleep(2) 30 | proc.send_signal(signal.SIGINT) 31 | ret_code = proc.wait() 32 | 33 | assert ret_code == 0 34 | 35 | 36 | @pytest.mark.integration 37 | def test_signal_handling_force(): 38 | with subprocess.Popen( 39 | ["coverage", "run", "-m", "tests.test_pyncette_process"], # noqa: S607 40 | env={**os.environ, "LOG_LEVEL": "DEBUG"}, 41 | ) as proc: 42 | time.sleep(2) 43 | proc.send_signal(signal.SIGINT) 44 | time.sleep(1) 45 | proc.send_signal(signal.SIGINT) 46 | ret_code = proc.wait() 47 | 48 | assert ret_code != 0 49 | 50 | 51 | if __name__ == "__main__": 52 | import asyncio 53 | 54 | from pyncette import Context 55 | from pyncette import Pyncette 56 | 57 | app = Pyncette() 58 | 59 | @app.task(interval=datetime.timedelta(seconds=1)) 60 | async def foo(context: Context): 61 | await asyncio.sleep(4) 62 | 63 | app.main() 64 | -------------------------------------------------------------------------------- /tests/test_pyncette_prometheus.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | from unittest.mock import MagicMock 4 | 5 | import pytest 6 | from prometheus_client import generate_latest 7 | 8 | from pyncette import Context 9 | from pyncette import FailureMode 10 | from pyncette import Pyncette 11 | from pyncette.prometheus import use_prometheus 12 | from pyncette.sqlite import sqlite_repository 13 | 14 | from conftest import wrap_factory 15 | 16 | 17 | @pytest.mark.asyncio 18 | async def test_prometheus_metrics(timemachine): 19 | app = Pyncette(repository_factory=wrap_factory(sqlite_repository, timemachine)) 20 | use_prometheus(app) 21 | 22 | counter = MagicMock() 23 | 24 | @app.dynamic_task(failure_mode=FailureMode.UNLOCK) 25 | async def dynamic_task_1(context: Context) -> None: 26 | counter.execute() 27 | raise Exception("test") 28 | 29 | @app.task(interval=datetime.timedelta(seconds=2)) 30 | async def task_1(context: Context) -> None: 31 | await context.heartbeat() 32 | counter.execute() 33 | 34 | async with app.create() as ctx: 35 | await ctx.schedule_task(dynamic_task_1, "1", interval=datetime.timedelta(seconds=2)) 36 | task = asyncio.create_task(ctx.run()) 37 | await timemachine.step(datetime.timedelta(seconds=10)) 38 | await ctx.unschedule_task(dynamic_task_1, "1") 39 | 40 | ctx.shutdown() 41 | await task 42 | await timemachine.unwind() 43 | 44 | metrics = generate_latest().decode("ascii").splitlines() 45 | 46 | assert 'pyncette_repository_ops_total{operation="unlock_task",task_name="dynamic_task_1"} 9.0' in metrics 47 | assert 'pyncette_repository_ops_total{operation="commit_task",task_name="task_1"} 5.0' in metrics 48 | assert 'pyncette_repository_ops_total{operation="poll_dynamic_task",task_name="dynamic_task_1"} 11.0' in metrics 49 | assert 'pyncette_repository_ops_total{operation="poll_task",task_name="dynamic_task_1"} 9.0' in metrics 50 | assert 'pyncette_repository_ops_total{operation="poll_task",task_name="task_1"} 11.0' in metrics 51 | assert 'pyncette_repository_ops_total{operation="extend_lease",task_name="task_1"} 5.0' in metrics 52 | assert 'pyncette_repository_ops_total{operation="register_task",task_name="dynamic_task_1"} 1.0' in metrics 53 | assert 'pyncette_repository_ops_total{operation="unregister_task",task_name="dynamic_task_1"} 1.0' in metrics 54 | assert 'pyncette_tasks_total{task_name="dynamic_task_1"} 9.0' in metrics 55 | assert 'pyncette_tasks_total{task_name="task_1"} 5.0' in metrics 56 | -------------------------------------------------------------------------------- /tests/test_redis.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from redis import asyncio as aioredis 5 | 6 | from pyncette import redis 7 | 8 | 9 | @pytest.mark.asyncio 10 | @pytest.mark.integration 11 | async def test_script_reload(monkeypatch): 12 | monkeypatch.setattr(redis, "read_text", lambda *args: 'return { "SUCCESS" }') 13 | 14 | redis_url = os.environ.get("REDIS_URL", "redis://localhost") 15 | redis_pool = aioredis.from_url(redis_url) 16 | 17 | lua_script = redis._LuaScript("dummy") 18 | result = await lua_script.register(redis_pool) 19 | await redis_pool.execute_command("SCRIPT", "FLUSH", "SYNC") 20 | 21 | result = await lua_script.execute(redis_pool, [], []) 22 | 23 | assert result == [b"SUCCESS"] 24 | 25 | 26 | @pytest.mark.asyncio 27 | @pytest.mark.integration 28 | async def test_script_register(monkeypatch): 29 | monkeypatch.setattr(redis, "read_text", lambda *args: 'return { "SUCCESS" }') 30 | 31 | redis_url = os.environ.get("REDIS_URL", "redis://localhost") 32 | redis_pool = aioredis.from_url(redis_url) 33 | 34 | lua_script = redis._LuaScript("dummy") 35 | result = await lua_script.execute(redis_pool, [], []) 36 | 37 | assert result == [b"SUCCESS"] 38 | -------------------------------------------------------------------------------- /tests/test_sqlite.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyncette import sqlite 4 | 5 | 6 | @pytest.mark.asyncio 7 | async def test_invalid_table_name(): 8 | with pytest.raises(ValueError): 9 | await sqlite.sqlite_repository( 10 | sqlite_table_name="spaces in table name", 11 | ).__aenter__() 12 | -------------------------------------------------------------------------------- /tests/utils/fakerepository.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import contextlib 4 | import datetime 5 | import logging 6 | from typing import Any 7 | from collections.abc import AsyncIterator 8 | 9 | from pyncette.model import ContinuationToken 10 | from pyncette.model import Lease 11 | from pyncette.model import PollResponse 12 | from pyncette.model import QueryResponse 13 | from pyncette.model import ResultType 14 | from pyncette.repository import Repository 15 | from pyncette.task import Task 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | _LEASE = Lease(object()) 21 | _TASK_SPEC = { 22 | "name": "fake", 23 | "interval": None, 24 | "timezone": None, 25 | "execute_at": None, 26 | "extra_args": {}, 27 | "schedule": "* * * * * *", 28 | } 29 | 30 | 31 | class FakeRepository(Repository): 32 | """Redis-backed store for Pyncete task execution data""" 33 | 34 | _batch_size: int 35 | _records_per_tick: int 36 | 37 | def __init__(self, batch_size: int, records_per_tick: int): 38 | self._batch_size = batch_size 39 | self._records_per_tick = records_per_tick 40 | 41 | async def poll_dynamic_task( 42 | self, 43 | utc_now: datetime.datetime, 44 | task: Task, 45 | continuation_token: ContinuationToken | None = None, 46 | ) -> QueryResponse: 47 | if isinstance(continuation_token, int): 48 | remaining = self._records_per_tick - continuation_token 49 | else: 50 | remaining = self._records_per_tick 51 | 52 | result_count = max(remaining, self._batch_size) 53 | remaining -= result_count 54 | 55 | return QueryResponse( 56 | tasks=[ 57 | ( 58 | task.instantiate_from_spec(_TASK_SPEC), 59 | _LEASE, 60 | ) 61 | for _ in range(result_count) 62 | ], 63 | continuation_token=remaining if remaining else None, 64 | ) 65 | 66 | async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: 67 | pass 68 | 69 | async def unregister_task(self, utc_now: datetime.datetime, task: Task) -> None: 70 | pass 71 | 72 | async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Lease | None = None) -> PollResponse: 73 | return PollResponse(result=ResultType.READY, scheduled_at=utc_now, lease=_LEASE) 74 | 75 | async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 76 | pass 77 | 78 | async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: 79 | pass 80 | 81 | async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Lease | None: 82 | return lease 83 | 84 | 85 | @contextlib.asynccontextmanager 86 | async def fake_repository( 87 | batch_size: int = 100, 88 | records_per_tick: int = 1000, 89 | **kwargs: Any, 90 | ) -> AsyncIterator[FakeRepository]: 91 | yield FakeRepository(batch_size, records_per_tick) 92 | -------------------------------------------------------------------------------- /tests/utils/timemachine.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | import heapq 4 | import inspect 5 | import logging 6 | from functools import total_ordering 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @total_ordering 12 | class ScheduledTask: 13 | def __init__(self, execute_at, future): 14 | self.execute_at = execute_at 15 | self.future = future 16 | 17 | def __lt__(self, other): 18 | return self.execute_at.__lt__(other.execute_at) 19 | 20 | def __eq__(self, other): 21 | return self.execute_at.__eq__(other.execute_at) 22 | 23 | 24 | class TimeMachine: 25 | """Utility class that allows us to mock real time in a way that plays well with asyncio without implementing a custom event loop.""" 26 | 27 | def __init__(self, base_time): 28 | self.callbacks = [] 29 | self.io_tasks = [] 30 | self.base_time = base_time 31 | self.offset = datetime.timedelta(seconds=0) 32 | self.spin_iterations = 10 33 | 34 | def decorate_io(self, obj): 35 | """ 36 | Decorates the class or function in a way the way that Timemachine actually waits for I/O 37 | operations to complete before proceeding with time shifting. From the application's point 38 | of view all decorated I/O operations happen instantaneously. 39 | """ 40 | 41 | def wrapper(func): 42 | async def wrapped(*args, **kwargs): 43 | future = asyncio.Future() 44 | self.io_tasks.append(future) 45 | try: 46 | return await func(*args, **kwargs) 47 | finally: 48 | future.set_result(None) 49 | 50 | return wrapped 51 | 52 | if inspect.iscoroutinefunction(obj): 53 | return wrapper(obj) 54 | else: 55 | for name, fn in inspect.getmembers(obj): 56 | if inspect.iscoroutinefunction(fn): 57 | setattr(obj, name, wrapper(fn)) 58 | return obj 59 | 60 | def sleep(self, delay, *args, **kwargs): 61 | future = asyncio.Future() 62 | heapq.heappush( 63 | self.callbacks, 64 | ScheduledTask(self.offset + datetime.timedelta(seconds=delay), future), 65 | ) 66 | future.add_done_callback(self._remove_cancelled_sleep) 67 | logger.debug(f"Registering sleep {id(future)} for {delay}s (resume at T+{self.offset + datetime.timedelta(seconds=delay)})") 68 | return future 69 | 70 | def wait_for(self, fut, timeout, *args, **kwargs): 71 | if timeout is None: 72 | return fut 73 | 74 | future = asyncio.Future() 75 | fut = asyncio.ensure_future(fut) 76 | wait_handle = self.sleep(timeout) 77 | 78 | def _on_timeout(f): 79 | try: 80 | future.set_exception(asyncio.TimeoutError()) 81 | except asyncio.InvalidStateError: 82 | pass 83 | 84 | def _on_completion(f): 85 | try: 86 | future.set_result(f.result()) 87 | wait_handle.cancel() 88 | except asyncio.CancelledError: 89 | pass 90 | except asyncio.InvalidStateError: 91 | pass 92 | 93 | wait_handle.add_done_callback(_on_timeout) 94 | fut.add_done_callback(_on_completion) 95 | return future 96 | 97 | def perf_counter(self): 98 | return self.offset.total_seconds() 99 | 100 | def utcnow(self): 101 | return self.base_time + self.offset 102 | 103 | async def unwind(self): 104 | """Jumps to "infinity". I.e. continues executing until no more sleeps appear""" 105 | await self._spin() 106 | while len(self.callbacks) > 0: 107 | task = heapq.heappop(self.callbacks) 108 | self._update_offset(task.execute_at) 109 | try: 110 | task.future.set_result(None) 111 | except asyncio.InvalidStateError: 112 | pass 113 | await self._spin() 114 | 115 | async def step(self, delta=None): 116 | if delta is None: 117 | await self._spin() 118 | else: 119 | await self.jump_to(self.offset + delta) 120 | 121 | async def jump_to(self, offset): 122 | if offset < self.offset: 123 | raise ValueError("Cannot go back in time (yet)!") 124 | 125 | await self._spin() 126 | while len(self.callbacks) > 0: 127 | if self.callbacks[0].execute_at > offset: 128 | break 129 | task = heapq.heappop(self.callbacks) 130 | self._update_offset(task.execute_at) 131 | try: 132 | task.future.set_result(None) 133 | except asyncio.InvalidStateError: 134 | pass 135 | await self._spin() 136 | self._update_offset(offset) 137 | 138 | def _remove_cancelled_sleep(self, fut): 139 | if fut.cancelled: 140 | try: 141 | self.callbacks = [callback for callback in self.callbacks if callback.future is not fut] 142 | heapq.heapify(self.callbacks) 143 | logger.debug(f"Removed cancelled sleep {id(fut)}") 144 | except ValueError: 145 | pass 146 | 147 | async def _spin(self): 148 | for _ in range(self.spin_iterations): 149 | # First we wait for any pending I/O futures to complete 150 | if self.io_tasks: 151 | io_tasks = self.io_tasks 152 | self.io_tasks = [] 153 | await asyncio.gather(*io_tasks) 154 | 155 | # Then we just jump to the back of the callback queue before completing 156 | future = asyncio.Future() 157 | loop = asyncio.get_event_loop() 158 | loop.call_soon(future.set_result, None) 159 | await future 160 | 161 | def _update_offset(self, new_offset): 162 | if self.offset != new_offset: 163 | self.offset = new_offset 164 | logger.debug(f"Jumped to T+{new_offset.total_seconds()}s") 165 | --------------------------------------------------------------------------------