├── .github ├── actions │ └── publish │ │ └── action.yaml └── workflows │ └── release.yaml ├── .gitignore ├── LICENSE ├── README.md ├── docs └── worker.png ├── pdm.lock ├── pyproject.toml ├── src └── pytask │ ├── __init__.py │ ├── __main__.py │ ├── flags │ ├── __init__.py │ └── flags.py │ ├── job │ ├── __init__.py │ ├── job.py │ └── types.py │ ├── task_queue │ ├── __init__.py │ ├── constants.py │ ├── task_queue.py │ └── types.py │ └── worker │ ├── __init__.py │ ├── async_worker.py │ ├── base_worker.py │ ├── concurrent_worker.py │ └── worker.py └── tests └── __init__.py /.github/actions/publish/action.yaml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | description: Publish a package to PyPI 4 | 5 | inputs: 6 | version: 7 | description: The version to release 8 | required: true 9 | 10 | pypi_token: 11 | description: The PyPI token to use 12 | required: true 13 | 14 | gpat_token: 15 | description: The Github token to use 16 | required: true 17 | 18 | runs: 19 | using: composite 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v4 23 | with: 24 | token: ${{ inputs.gpat_token }} 25 | 26 | - name: Set up PDM 27 | uses: pdm-project/setup-pdm@v4 28 | with: 29 | python-version: 3.10.5 30 | 31 | - name: Update package versions 32 | run: | 33 | sed -i "s/^version = \".*/version = \"${{ inputs.version }}\"/" pyproject.toml 34 | shell: bash 35 | 36 | - name: Commit version changes to repo 37 | run: | 38 | git config user.name "GitHub Actions" 39 | git config user.email "github-actions@github.com" 40 | git add pyproject.toml 41 | 42 | git commit -m "Update version to ${{ inputs.version }}" 43 | git push origin main 44 | shell: bash 45 | 46 | - name: Publish 47 | run: pdm publish -u __token__ -P ${{ inputs.pypi_token }} -r pypi -p . 48 | shell: bash 49 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | version: 7 | type: string 8 | description: The version to release 9 | 10 | jobs: 11 | release: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | 17 | - name: Publish to pypi 18 | uses: ./.github/actions/publish 19 | with: 20 | version: ${{ inputs.version }} 21 | pypi_token: ${{ secrets.PYPI_TOKEN }} 22 | gpat_token: ${{ secrets.GPAT_TOKEN }} 23 | 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm-project.org/#use-with-ide 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | data -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jayden Pyles 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Logo Picture](https://github.com/jaypyles/pytask/blob/main/docs/worker.png) 2 | 3 | # pytask 4 | 5 | A simple sqlite3-based job queue with a worker. Main purpose is to run jobs in a queue. Jobs are not popped from the queue, which means the queue can act as a history. 6 | 7 | ## Installation 8 | 9 | `pip install pytask-queue` 10 | 11 | ## Usage 12 | 13 | The worker will run the function `func` for each job. The function will be passed a `Job` object. Which means that you can alter the job object in the function, and the newly updated job will be saved to the queue. 14 | 15 | ```python 16 | # python process 1 17 | from pytask import Queue, Job, SQLDataType, SQLColumnConditions 18 | 19 | queue = Queue(schema=[ 20 | ("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL]), 21 | ("bar", SQLDataType.TEXT, [SQLColumnConditions.NOT_NULL]), 22 | ("baz", SQLDataType.JSON, [SQLColumnConditions.NOT_NULL]) 23 | ]) 24 | queue.insert(Job(data={"foo": 1, "bar": "test", "baz": {"foo": "bar"}})) 25 | ``` 26 | 27 | ```python 28 | # python process 2 29 | from import queue 30 | from pytask import Job 31 | 32 | def func(job: Job): 33 | # Do something with job 34 | job.data["foo"] += 1 35 | 36 | worker = Worker(queue, func) 37 | worker.run() 38 | ``` 39 | 40 | Creating multiple queues or multiple workers is possible. Creating a new queue object won't actually create a new queue, it just creates a new connection to the queue. Which means you can have multiple queue objects pointing to the same queue, or you can use the same queue object for multiple workers. 41 | 42 | Be careful to avoid race conditions when using the same queue object for multiple workers. 43 | 44 | ## Flags 45 | 46 | Flags are used to configure the behavior of the queue and worker. 47 | 48 | Current flags: 49 | 50 | - `auto_convert_json_keys`: If True, the queue will automatically convert JSON keys to strings. Useful for retrieving and manipulating JSON data. 51 | - `pop_after_processing`: If True, the job will be popped from the queue after processing. 52 | 53 | ```python 54 | from pytask import Queue, Worker, Job, SQLDataType, SQLColumnConditions, Flags 55 | 56 | flags = Flags(auto_convert_json_keys=True, pop_after_processing=True) 57 | queue = Queue(schema=[("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL])], flags=flags) 58 | 59 | worker = Worker(queue, func, logger=logger) 60 | worker.run() 61 | ``` 62 | 63 | ## Concurrent Worker 64 | 65 | The concurrent worker is a worker that runs jobs in parallel. It uses a thread pool to run the jobs. 66 | 67 | ```python 68 | from pytask import Queue, ConcurrentWorker, Job, SQLDataType, SQLColumnConditions 69 | 70 | worker = ConcurrentWorker(queue, func, logger=logger, interval=1, max_workers=16) 71 | worker.run() 72 | ``` 73 | -------------------------------------------------------------------------------- /docs/worker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaypyles/pytask/57e66446a3204983311792aae5af280ca5f644a7/docs/worker.png -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- 1 | # This file is @generated by PDM. 2 | # It is not intended for manual editing. 3 | 4 | [metadata] 5 | groups = ["default"] 6 | strategy = ["inherit_metadata"] 7 | lock_version = "4.5.0" 8 | content_hash = "sha256:59a09000a5e4eba2ada1683c0c853099add325e77a24fd046c5694a3273cf7ed" 9 | 10 | [[metadata.targets]] 11 | requires_python = "==3.10.*" 12 | 13 | [[package]] 14 | name = "typing-extensions" 15 | version = "4.12.2" 16 | requires_python = ">=3.8" 17 | summary = "Backported and Experimental Type Hints for Python 3.8+" 18 | groups = ["default"] 19 | marker = "python_version == \"3.10\"" 20 | files = [ 21 | {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, 22 | {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, 23 | ] 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pytask-queue" 3 | version = "1.0.3" 4 | description = "A simple queue for Python tasks" 5 | authors = [{ name = "Jayden Pyles", email = "jpylesbusiness@gmail.com" }] 6 | dependencies = ["typing-extensions>=4.12.2"] 7 | requires-python = ">=3.10" 8 | readme = "README.md" 9 | license = { text = "MIT" } 10 | 11 | [project.urls] 12 | repository = "https://github.com/jaypyles/pytask" 13 | 14 | [tool.pdm] 15 | distribution = true 16 | 17 | [tool.pdm.dev-dependencies] 18 | dev = ["ipython>=8.26.0"] 19 | 20 | [tool.pyright] 21 | include = ["./src/"] 22 | exclude = ["**/__pycache__"] 23 | ignore = [] 24 | defineConstant = { DEBUG = true } 25 | stubPath = "" 26 | 27 | reportMissingImports = true 28 | reportMissingTypeStubs = false 29 | reportAny = false 30 | reportUnknownVariableType = false 31 | reportUnknownMemberType = false 32 | reportExplicitAny = false 33 | reportCallInDefaultInitializer = false 34 | 35 | pythonVersion = "3.10" 36 | pythonPlatform = "Linux" 37 | 38 | 39 | [tool.isort] 40 | length_sort = "1" 41 | profile = "black" 42 | sections = "STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER" 43 | import_heading_stdlib = "STL" 44 | import_heading_thirdparty = "PDM" 45 | import_heading_firstparty = "LOCAL" 46 | import_heading_localfolder = "LOCAL" 47 | 48 | [tool.pdm.resolution] 49 | respect-source-order = true 50 | 51 | [[tool.pdm.source]] 52 | name = "pypi" 53 | url = "https://pypi.org/simple" 54 | -------------------------------------------------------------------------------- /src/pytask/__init__.py: -------------------------------------------------------------------------------- 1 | from .task_queue import Queue 2 | from .worker import Worker, ConcurrentWorker, AsyncWorker 3 | from .job import Job 4 | from .flags import Flags 5 | from .task_queue.types import SQLDataType, SQLColumnConditions 6 | 7 | __all__ = [ 8 | "Queue", 9 | "Worker", 10 | "ConcurrentWorker", 11 | "AsyncWorker", 12 | "Job", 13 | "Flags", 14 | "SQLDataType", 15 | "SQLColumnConditions", 16 | ] 17 | -------------------------------------------------------------------------------- /src/pytask/__main__.py: -------------------------------------------------------------------------------- 1 | from pytask.task_queue.task_queue import Queue 2 | from pytask.job.job import Job 3 | from pytask.worker.concurrent_worker import ConcurrentWorker 4 | from pytask.task_queue.types import SQLDataType, SQLColumnConditions 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | logger.setLevel(logging.INFO) 9 | 10 | if not logger.hasHandlers(): 11 | handler = logging.StreamHandler() 12 | formatter = logging.Formatter( 13 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 14 | ) 15 | handler.setFormatter(formatter) 16 | logger.addHandler(handler) 17 | 18 | 19 | def func(job: Job): 20 | logger.info(f"Processing job: {job.task_id}, {job.data}") 21 | job.data["foo"] += 2 22 | 23 | 24 | def insert_jobs(queue): 25 | for i in range(1, 201): 26 | queue.insert(Job(data={"foo": i, "bar": f"test{i}", "baz": {"foo": "bar"}})) 27 | 28 | 29 | def main(): 30 | queue = Queue( 31 | schema=[ 32 | ("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL]), 33 | ("bar", SQLDataType.TEXT, [SQLColumnConditions.NOT_NULL]), 34 | ("baz", SQLDataType.JSON, [SQLColumnConditions.NOT_NULL]), 35 | ], 36 | ) 37 | 38 | worker = ConcurrentWorker(queue, func, logger=logger, interval=1, max_workers=16) 39 | insert_jobs(queue) 40 | print("Requested Jobs are: ", queue.get_all(search_conditions={"foo": 1})) 41 | worker.run() 42 | 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /src/pytask/flags/__init__.py: -------------------------------------------------------------------------------- 1 | from .flags import Flags 2 | 3 | __all__ = ["Flags"] 4 | -------------------------------------------------------------------------------- /src/pytask/flags/flags.py: -------------------------------------------------------------------------------- 1 | class Flags: 2 | """ 3 | Flags are used to configure the behavior of the queue and worker. 4 | 5 | Parameters 6 | ---------- 7 | auto_convert_json_keys : bool 8 | If True, the queue will automatically convert JSON keys to strings. Useful for retrieving and manipulating JSON data. 9 | 10 | pop_after_processing : bool 11 | If True, the job will be popped from the queue after processing. 12 | """ 13 | 14 | auto_convert_json_keys: bool = True 15 | pop_after_processing: bool = False 16 | 17 | def __init__( 18 | self, auto_convert_json_keys: bool = True, pop_after_processing: bool = False 19 | ): 20 | self.auto_convert_json_keys = auto_convert_json_keys 21 | self.pop_after_processing = pop_after_processing 22 | -------------------------------------------------------------------------------- /src/pytask/job/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Job"] 2 | 3 | from .job import Job 4 | -------------------------------------------------------------------------------- /src/pytask/job/job.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | from typing_extensions import override 3 | from typing import Any 4 | from datetime import datetime 5 | import sqlite3 6 | from pytask.job.types import JobStatus 7 | 8 | 9 | class Job: 10 | def __init__( 11 | self, 12 | task_id: str | None = None, 13 | status: JobStatus = "pending", 14 | data: dict[str, Any] = {}, 15 | ): 16 | self.task_id: str = task_id if task_id is not None else uuid4().hex 17 | self.status: JobStatus = status 18 | self.created_at: str = datetime.now().isoformat() 19 | self.updated_at: str = datetime.now().isoformat() 20 | self.data: dict[str, Any] = data 21 | 22 | @override 23 | def __str__(self): 24 | return str(self.flat()) 25 | 26 | @override 27 | def __repr__(self): 28 | return str(self.flat()) 29 | 30 | @staticmethod 31 | def create_from_row(row: sqlite3.Row): 32 | exclude_keys = {"id", "task_id", "status", "created_at", "updated_at"} 33 | job = Job() 34 | job.task_id = row["task_id"] 35 | job.status = row["status"] 36 | job.created_at = row["created_at"] 37 | job.updated_at = row["updated_at"] 38 | job.data = {key: row[key] for key in row.keys() if key not in exclude_keys} 39 | return job 40 | 41 | def to_dict(self) -> dict[str, Any]: 42 | return { 43 | "task_id": self.task_id, 44 | "status": self.status, 45 | "created_at": self.created_at, 46 | "updated_at": self.updated_at, 47 | **self.data, 48 | } 49 | 50 | def flat(self) -> dict[str, Any]: 51 | return { 52 | "task_id": self.task_id, 53 | "status": self.status, 54 | "created_at": self.created_at, 55 | "updated_at": self.updated_at, 56 | **{ 57 | key: value 58 | for key, value in self.data.items() 59 | if not key.startswith("_") 60 | }, 61 | } 62 | -------------------------------------------------------------------------------- /src/pytask/job/types.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | JobStatus = Literal["pending", "running", "completed", "failed"] -------------------------------------------------------------------------------- /src/pytask/task_queue/__init__.py: -------------------------------------------------------------------------------- 1 | from .task_queue import Queue 2 | 3 | __all__ = ["Queue"] 4 | -------------------------------------------------------------------------------- /src/pytask/task_queue/constants.py: -------------------------------------------------------------------------------- 1 | BASE_SCHEMA = "id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, status TEXT, created_at DATETIME, updated_at DATETIME" 2 | DEFAULT_PATH = "data/queue.db" 3 | -------------------------------------------------------------------------------- /src/pytask/task_queue/task_queue.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sqlite3 3 | from typing import Any 4 | 5 | from pytask.task_queue.constants import BASE_SCHEMA, DEFAULT_PATH 6 | from pytask.job.job import Job 7 | from pytask.task_queue.types import SQLDataType, SQLColumnConditions 8 | import json 9 | 10 | from pytask.flags import Flags 11 | 12 | 13 | class Queue: 14 | """ 15 | A queue is a collection of jobs that are waiting to be processed. Using the default path, 16 | the queue will be stored in the current working directory, under ./data/queue.db. 17 | 18 | The queue is stored in a SQLite database, and the table is named "job". Creating more than one object will result in accessing the same queue, unless the path is changed. 19 | 20 | Parameters 21 | ---------- 22 | schema : list[tuple[str, SQLDataType, list[SQLColumnConditions]]] 23 | The schema of the queue. 24 | path : str 25 | The path to the SQLite database. Defaults to ./data/queue.db. 26 | flags : Flags 27 | The flags to configure the behavior of the queue. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | schema: list[tuple[str, SQLDataType, list[SQLColumnConditions]]] = [], 33 | path: str = DEFAULT_PATH, 34 | flags: Flags = Flags(), 35 | ): 36 | self.schema: list[tuple[str, SQLDataType, list[SQLColumnConditions]]] = schema 37 | self.path: str = path 38 | self.base_schema: str = BASE_SCHEMA 39 | self.sql_schema: str = self.__create_sql_schema() 40 | self.insert_schema: str = self.__create_insert_schema() 41 | self.json_keys: list[str] = self.__get_json_keys() 42 | self.flags: Flags = flags 43 | _ = self.__create_table() 44 | 45 | def insert(self, job: Job): 46 | with sqlite3.connect(self.path) as conn: 47 | if self.flags.auto_convert_json_keys: 48 | self.__dump_json_keys(job) 49 | 50 | _ = conn.execute(self.insert_schema, job.to_dict()) 51 | 52 | def update(self, job: Job): 53 | update_schema = self.__create_update_schema(job.data) 54 | 55 | if self.flags.auto_convert_json_keys: 56 | self.__dump_json_keys(job) 57 | 58 | with sqlite3.connect(self.path) as conn: 59 | _ = conn.execute(update_schema, job.to_dict()) 60 | 61 | def delete(self, task_id: str): 62 | with sqlite3.connect(self.path) as conn: 63 | _ = conn.execute("DELETE FROM job WHERE task_id = ?", (task_id,)) 64 | 65 | return True 66 | 67 | def get(self, task_id: str) -> Job | None: 68 | with self.__connect() as conn: 69 | cursor = conn.execute("SELECT * FROM job WHERE task_id = ?", (task_id,)) 70 | row = cursor.fetchone() 71 | 72 | if row: 73 | return Job.create_from_row(row) 74 | 75 | return None 76 | 77 | def get_all(self, search_conditions: dict[str, Any] = {}) -> list[Job]: 78 | """ 79 | Get all jobs that match the search conditions. 80 | 81 | Parameters 82 | ---------- 83 | search_conditions : dict[str, Any] 84 | The conditions to search for. Currently only supports equality such as foo = 1, where search_conditions = {"foo": 1}. 85 | """ 86 | conditions_str = " AND ".join([f"{key} = :{key}" for key in search_conditions]) 87 | where_clause = f"WHERE {conditions_str}" if conditions_str else "" 88 | 89 | with self.__connect() as conn: 90 | cursor = conn.execute( 91 | f"SELECT * FROM job {where_clause}", 92 | search_conditions, 93 | ) 94 | 95 | rows = cursor.fetchall() 96 | jobs = [Job.create_from_row(row) for row in rows] 97 | 98 | if self.flags.auto_convert_json_keys: 99 | for job in jobs: 100 | self.__load_json_keys(job) 101 | 102 | return jobs 103 | 104 | def get_oldest_pending(self) -> Job | None: 105 | with self.__connect() as conn: 106 | cursor = conn.execute( 107 | "SELECT * FROM job WHERE status = 'pending' ORDER BY created_at ASC LIMIT 1" 108 | ) 109 | row = cursor.fetchone() 110 | 111 | if row: 112 | job = Job.create_from_row(row) 113 | 114 | job.status = "running" 115 | self.update(job) 116 | 117 | if self.flags.auto_convert_json_keys: 118 | self.__load_json_keys(job) 119 | 120 | return job 121 | 122 | return None 123 | 124 | def __connect(self) -> sqlite3.Connection: 125 | connection = sqlite3.connect(self.path) 126 | connection.row_factory = sqlite3.Row 127 | return connection 128 | 129 | def __create_sql_schema(self) -> str: 130 | schema_parts: list[str] = [] 131 | for column in self.schema: 132 | column_name, column_type, column_conditions = column 133 | schema_parts.append( 134 | f"{column_name} {column_type.value} {' '.join([condition.value for condition in column_conditions])}" 135 | ) 136 | 137 | schema = ", ".join(schema_parts) 138 | job_schema = self.base_schema 139 | 140 | if schema: 141 | job_schema += f", {schema}" 142 | 143 | return f"""CREATE TABLE IF NOT EXISTS job ( 144 | {job_schema} 145 | )""" 146 | 147 | def __create_table(self): 148 | if not os.path.exists(self.path): 149 | os.makedirs(os.path.dirname(self.path), exist_ok=True) 150 | 151 | try: 152 | with sqlite3.connect(self.path) as conn: 153 | _ = conn.execute(self.sql_schema) 154 | except Exception: 155 | return False 156 | 157 | return True 158 | 159 | def __create_insert_schema(self) -> str: 160 | other_columns: list[str] = [] 161 | 162 | for column in self.schema: 163 | column_name, _, _ = column 164 | other_columns.append(column_name) 165 | 166 | other_columns_str = ", ".join(other_columns) 167 | other_columns_values = ", ".join([f":{col}" for col in other_columns]) 168 | 169 | return f""" 170 | INSERT INTO job (task_id, status, created_at, updated_at, {other_columns_str}) 171 | VALUES (:task_id, :status, :created_at, :updated_at, {other_columns_values}); 172 | """ 173 | 174 | def __create_update_schema(self, extra_columns: dict[str, Any] = {}) -> str: 175 | extra_columns_str = ", ".join([f"{col} = :{col}" for col in extra_columns]) 176 | 177 | return f""" 178 | UPDATE job SET 179 | status = :status, 180 | updated_at = :updated_at{f", {extra_columns_str}" if extra_columns_str else ""} 181 | WHERE task_id = :task_id; 182 | """ 183 | 184 | def __load_json_keys(self, job: Job): 185 | for key in self.json_keys: 186 | job.data[key] = json.loads(job.data[key]) 187 | 188 | def __dump_json_keys(self, job: Job): 189 | for key in self.json_keys: 190 | if not isinstance(job.data[key], str): 191 | job.data[key] = json.dumps(job.data[key]) 192 | 193 | def __get_json_keys(self) -> list[str]: 194 | json_keys: list[str] = [] 195 | 196 | for column in self.schema: 197 | column_name, column_type, _ = column 198 | 199 | if column_type == SQLDataType.JSON: 200 | json_keys.append(column_name) 201 | 202 | return json_keys 203 | -------------------------------------------------------------------------------- /src/pytask/task_queue/types.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class SQLDataType(Enum): 5 | INTEGER = "INTEGER" 6 | TEXT = "TEXT" 7 | JSON = "JSON" 8 | BOOLEAN = "BOOLEAN" 9 | FLOAT = "FLOAT" 10 | DATETIME = "DATETIME" 11 | 12 | 13 | class SQLColumnConditions(Enum): 14 | NOT_NULL = "NOT NULL" 15 | NULL = "NULL" 16 | UNIQUE = "UNIQUE" 17 | PRIMARY_KEY = "PRIMARY KEY" 18 | FOREIGN_KEY = "FOREIGN KEY" 19 | CHECK = "CHECK" 20 | DEFAULT = "DEFAULT" 21 | -------------------------------------------------------------------------------- /src/pytask/worker/__init__.py: -------------------------------------------------------------------------------- 1 | from .worker import Worker 2 | from .concurrent_worker import ConcurrentWorker 3 | from .async_worker import AsyncWorker 4 | 5 | __all__ = ["Worker", "ConcurrentWorker", "AsyncWorker"] 6 | -------------------------------------------------------------------------------- /src/pytask/worker/async_worker.py: -------------------------------------------------------------------------------- 1 | from pytask.task_queue.task_queue import Queue 2 | from typing import Callable, Any 3 | from pytask.job import Job 4 | import time 5 | from collections.abc import Coroutine 6 | 7 | import logging 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AsyncWorker: 13 | def __init__( 14 | self, 15 | queue: Queue, 16 | func: Callable[[Job], Coroutine[Any, Any, Any]], 17 | logger: logging.Logger | None = None, 18 | interval: int = 1, 19 | ): 20 | self.queue: Queue = queue 21 | self.func: Callable[[Job], Coroutine[Any, Any, Any]] = func 22 | self.interval: int = interval 23 | self.logger: logging.Logger | None = logger 24 | 25 | async def run(self): 26 | while True: 27 | job = self.queue.get_oldest_pending() 28 | 29 | if job: 30 | if self.logger: 31 | self.logger.info(f"Processing job: {job}") 32 | 33 | await self.do(job) 34 | 35 | if self.queue.flags.pop_after_processing: 36 | _ = self.queue.delete(job.task_id) 37 | 38 | if self.logger: 39 | self.logger.info(f"Job {job.task_id} removed from queue.") 40 | else: 41 | job.status = "completed" 42 | self.queue.update(job) 43 | 44 | if self.logger: 45 | self.logger.info(f"Job {job.task_id} marked as completed.") 46 | else: 47 | if self.logger: 48 | self.logger.info("No pending jobs found.") 49 | 50 | if self.interval > 0: 51 | time.sleep(self.interval) 52 | 53 | async def do(self, job: Job): 54 | await self.func(job) 55 | -------------------------------------------------------------------------------- /src/pytask/worker/base_worker.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from pytask.task_queue.task_queue import Queue 3 | from typing import Callable, Any 4 | from pytask.job.job import Job 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseWorker(ABC): 12 | def __init__( 13 | self, 14 | queue: Queue, 15 | func: Callable[[Job], Any], 16 | logger: logging.Logger | None = None, 17 | interval: int = 1, 18 | ): 19 | self.queue: Queue = queue 20 | self.func: Callable[[Job], Any] = func 21 | self.interval: int = interval 22 | self.logger: logging.Logger | None = logger 23 | 24 | @abstractmethod 25 | def run(self) -> None: 26 | raise NotImplementedError("Subclass must implement run method") 27 | 28 | def do(self, job: Job): 29 | self.func(job) 30 | -------------------------------------------------------------------------------- /src/pytask/worker/concurrent_worker.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from concurrent.futures import ThreadPoolExecutor 3 | from pytask.job import Job 4 | from typing import Callable, Any 5 | from pytask.task_queue import Queue 6 | import logging 7 | from typing_extensions import override 8 | 9 | from pytask.worker.base_worker import BaseWorker 10 | 11 | 12 | class ConcurrentWorker(BaseWorker): 13 | def __init__( 14 | self, 15 | queue: Queue, 16 | func: Callable[[Job], Any], 17 | max_workers: int = 5, 18 | interval: int = 1, 19 | logger: logging.Logger | None = None, 20 | ): 21 | self.queue: Queue = queue 22 | self.func: Callable[[Job], Any] = func 23 | self.max_workers: int = max_workers 24 | self.logger: logging.Logger | None = logger 25 | self.lock: threading.Lock = threading.Lock() 26 | self._stop_event: threading.Event = threading.Event() 27 | 28 | super().__init__(queue, func, logger, interval) 29 | 30 | @override 31 | def run(self) -> None: 32 | with ThreadPoolExecutor(max_workers=self.max_workers) as executor: 33 | while not self._stop_event.is_set(): 34 | job = self.queue.get_oldest_pending() 35 | 36 | if job: 37 | with self.lock: 38 | 39 | _ = executor.submit(self.process_job, job) 40 | 41 | if self.logger: 42 | self.logger.info(f"Job {job.task_id} submitted for processing.") 43 | else: 44 | if self.logger: 45 | self.logger.info("No pending jobs found.") 46 | 47 | _ = threading.Event().wait(1) 48 | 49 | def process_job(self, job: Job) -> None: 50 | try: 51 | if self.logger: 52 | self.logger.info(f"Processing job: {job.task_id}, {job.data}") 53 | 54 | # Process the job 55 | self.do(job) 56 | 57 | # After processing, update the job status outside the lock 58 | self.update_job(job) 59 | 60 | except Exception as e: 61 | if self.logger: 62 | self.logger.error(f"Error processing job {job.task_id}: {e}") 63 | 64 | def update_job(self, job: Job) -> None: 65 | with self.lock: 66 | if self.queue.flags.pop_after_processing: 67 | _ = self.queue.delete(job.task_id) 68 | 69 | if self.logger: 70 | self.logger.info(f"Job {job.task_id} removed from queue.") 71 | else: 72 | job.status = "completed" 73 | self.queue.update(job) 74 | 75 | if self.logger: 76 | self.logger.info(f"Job {job.task_id} marked as completed.") 77 | 78 | def stop(self) -> None: 79 | """Stop the worker cleanly.""" 80 | self._stop_event.set() 81 | -------------------------------------------------------------------------------- /src/pytask/worker/worker.py: -------------------------------------------------------------------------------- 1 | from pytask.task_queue.task_queue import Queue 2 | from typing import Callable, Any 3 | from pytask.job import Job 4 | from pytask.worker.base_worker import BaseWorker 5 | from typing_extensions import override 6 | import time 7 | 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class Worker(BaseWorker): 14 | def __init__( 15 | self, 16 | queue: Queue, 17 | func: Callable[[Job], Any], 18 | logger: logging.Logger | None = None, 19 | interval: int = 1, 20 | ): 21 | self.queue: Queue = queue 22 | self.func: Callable[[Job], Any] = func 23 | self.interval: int = interval 24 | self.logger: logging.Logger | None = logger 25 | 26 | super().__init__(queue, func, logger, interval) 27 | 28 | @override 29 | def run(self): 30 | while True: 31 | job = self.queue.get_oldest_pending() 32 | 33 | if job: 34 | if self.logger: 35 | self.logger.info(f"Processing job: {job}") 36 | 37 | self.do(job) 38 | 39 | if self.queue.flags.pop_after_processing: 40 | _ = self.queue.delete(job.task_id) 41 | 42 | if self.logger: 43 | self.logger.info(f"Job {job.task_id} removed from queue.") 44 | else: 45 | job.status = "completed" 46 | self.queue.update(job) 47 | 48 | if self.logger: 49 | self.logger.info(f"Job {job.task_id} marked as completed.") 50 | else: 51 | if self.logger: 52 | self.logger.info("No pending jobs found.") 53 | 54 | if self.interval > 0: 55 | time.sleep(self.interval) 56 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaypyles/pytask/57e66446a3204983311792aae5af280ca5f644a7/tests/__init__.py --------------------------------------------------------------------------------