├── .github
    ├── actions
    │   └── publish
    │   │   └── action.yaml
    └── workflows
    │   └── release.yaml
├── .gitignore
├── LICENSE
├── README.md
├── docs
    └── worker.png
├── pdm.lock
├── pyproject.toml
├── src
    └── pytask
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── flags
    │       ├── __init__.py
    │       └── flags.py
    │   ├── job
    │       ├── __init__.py
    │       ├── job.py
    │       └── types.py
    │   ├── task_queue
    │       ├── __init__.py
    │       ├── constants.py
    │       ├── task_queue.py
    │       └── types.py
    │   └── worker
    │       ├── __init__.py
    │       ├── async_worker.py
    │       ├── base_worker.py
    │       ├── concurrent_worker.py
    │       └── worker.py
└── tests
    └── __init__.py


/.github/actions/publish/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | description: Publish a package to PyPI
 4 | 
 5 | inputs:
 6 |   version:
 7 |     description: The version to release
 8 |     required: true
 9 | 
10 |   pypi_token:
11 |     description: The PyPI token to use
12 |     required: true
13 | 
14 |   gpat_token:
15 |     description: The Github token to use
16 |     required: true
17 | 
18 | runs:
19 |   using: composite
20 |   steps:
21 |     - name: Checkout repository
22 |       uses: actions/checkout@v4
23 |       with:
24 |         token: ${{ inputs.gpat_token }}
25 | 
26 |     - name: Set up PDM
27 |       uses: pdm-project/setup-pdm@v4
28 |       with:
29 |         python-version: 3.10.5
30 | 
31 |     - name: Update package versions
32 |       run: |
33 |         sed -i "s/^version = \".*/version = \"${{ inputs.version }}\"/" pyproject.toml
34 |       shell: bash
35 | 
36 |     - name: Commit version changes to repo
37 |       run: |
38 |         git config user.name "GitHub Actions"
39 |         git config user.email "github-actions@github.com"
40 |         git add pyproject.toml
41 | 
42 |         git commit -m "Update version to ${{ inputs.version }}"
43 |         git push origin main
44 |       shell: bash
45 | 
46 |     - name: Publish
47 |       run: pdm publish -u __token__ -P ${{ inputs.pypi_token }} -r pypi -p .
48 |       shell: bash
49 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       version:
 7 |         type: string
 8 |         description: The version to release
 9 | 
10 | jobs:
11 |   release:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Publish to pypi
18 |         uses: ./.github/actions/publish
19 |         with:
20 |           version: ${{ inputs.version }}
21 |           pypi_token: ${{ secrets.PYPI_TOKEN }}
22 |           gpat_token: ${{ secrets.GPAT_TOKEN }}
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm-project.org/#use-with-ide
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | data


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Jayden Pyles 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Logo Picture](https://github.com/jaypyles/pytask/blob/main/docs/worker.png)
 2 | 
 3 | # pytask
 4 | 
 5 | A simple sqlite3-based job queue with a worker. Main purpose is to run jobs in a queue. Jobs are not popped from the queue, which means the queue can act as a history.
 6 | 
 7 | ## Installation
 8 | 
 9 | `pip install pytask-queue`
10 | 
11 | ## Usage
12 | 
13 | The worker will run the function `func` for each job. The function will be passed a `Job` object. Which means that you can alter the job object in the function, and the newly updated job will be saved to the queue. 
14 | 
15 | ```python
16 | # python process 1
17 | from pytask import Queue, Job, SQLDataType, SQLColumnConditions
18 | 
19 | queue = Queue(schema=[
20 |     ("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL]), 
21 |     ("bar", SQLDataType.TEXT, [SQLColumnConditions.NOT_NULL]), 
22 |     ("baz", SQLDataType.JSON, [SQLColumnConditions.NOT_NULL])
23 | ])
24 | queue.insert(Job(data={"foo": 1, "bar": "test", "baz": {"foo": "bar"}}))
25 | ```
26 | 
27 | ```python
28 | # python process 2
29 | from <relative_file> import queue
30 | from pytask import Job
31 | 
32 | def func(job: Job):
33 |     # Do something with job
34 |     job.data["foo"] += 1
35 | 
36 | worker = Worker(queue, func)
37 | worker.run()
38 | ```
39 | 
40 | Creating multiple queues or multiple workers is possible. Creating a new queue object won't actually create a new queue, it just creates a new connection to the queue. Which means you can have multiple queue objects pointing to the same queue, or you can use the same queue object for multiple workers.
41 | 
42 | Be careful to avoid race conditions when using the same queue object for multiple workers.
43 | 
44 | ## Flags
45 | 
46 | Flags are used to configure the behavior of the queue and worker.
47 | 
48 | Current flags:
49 | 
50 | - `auto_convert_json_keys`: If True, the queue will automatically convert JSON keys to strings. Useful for retrieving and manipulating JSON data.
51 | - `pop_after_processing`: If True, the job will be popped from the queue after processing.
52 | 
53 | ```python
54 | from pytask import Queue, Worker, Job, SQLDataType, SQLColumnConditions, Flags
55 | 
56 | flags = Flags(auto_convert_json_keys=True, pop_after_processing=True)
57 | queue = Queue(schema=[("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL])], flags=flags)
58 | 
59 | worker = Worker(queue, func, logger=logger)
60 | worker.run()
61 | ```
62 | 
63 | ## Concurrent Worker
64 | 
65 | The concurrent worker is a worker that runs jobs in parallel. It uses a thread pool to run the jobs.
66 | 
67 | ```python
68 | from pytask import Queue, ConcurrentWorker, Job, SQLDataType, SQLColumnConditions
69 | 
70 | worker = ConcurrentWorker(queue, func, logger=logger, interval=1, max_workers=16)
71 | worker.run()
72 | ```
73 | 


--------------------------------------------------------------------------------
/docs/worker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaypyles/pytask/57e66446a3204983311792aae5af280ca5f644a7/docs/worker.png


--------------------------------------------------------------------------------
/pdm.lock:
--------------------------------------------------------------------------------
 1 | # This file is @generated by PDM.
 2 | # It is not intended for manual editing.
 3 | 
 4 | [metadata]
 5 | groups = ["default"]
 6 | strategy = ["inherit_metadata"]
 7 | lock_version = "4.5.0"
 8 | content_hash = "sha256:59a09000a5e4eba2ada1683c0c853099add325e77a24fd046c5694a3273cf7ed"
 9 | 
10 | [[metadata.targets]]
11 | requires_python = "==3.10.*"
12 | 
13 | [[package]]
14 | name = "typing-extensions"
15 | version = "4.12.2"
16 | requires_python = ">=3.8"
17 | summary = "Backported and Experimental Type Hints for Python 3.8+"
18 | groups = ["default"]
19 | marker = "python_version == \"3.10\""
20 | files = [
21 |     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
22 |     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
23 | ]
24 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pytask-queue"
 3 | version = "1.0.3"
 4 | description = "A simple queue for Python tasks"
 5 | authors = [{ name = "Jayden Pyles", email = "jpylesbusiness@gmail.com" }]
 6 | dependencies = ["typing-extensions>=4.12.2"]
 7 | requires-python = ">=3.10"
 8 | readme = "README.md"
 9 | license = { text = "MIT" }
10 | 
11 | [project.urls]
12 | repository = "https://github.com/jaypyles/pytask"
13 | 
14 | [tool.pdm]
15 | distribution = true
16 | 
17 | [tool.pdm.dev-dependencies]
18 | dev = ["ipython>=8.26.0"]
19 | 
20 | [tool.pyright]
21 | include = ["./src/"]
22 | exclude = ["**/__pycache__"]
23 | ignore = []
24 | defineConstant = { DEBUG = true }
25 | stubPath = ""
26 | 
27 | reportMissingImports = true
28 | reportMissingTypeStubs = false
29 | reportAny = false
30 | reportUnknownVariableType = false
31 | reportUnknownMemberType = false
32 | reportExplicitAny = false
33 | reportCallInDefaultInitializer = false
34 | 
35 | pythonVersion = "3.10"
36 | pythonPlatform = "Linux"
37 | 
38 | 
39 | [tool.isort]
40 | length_sort = "1"
41 | profile = "black"
42 | sections = "STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER"
43 | import_heading_stdlib = "STL"
44 | import_heading_thirdparty = "PDM"
45 | import_heading_firstparty = "LOCAL"
46 | import_heading_localfolder = "LOCAL"
47 | 
48 | [tool.pdm.resolution]
49 | respect-source-order = true
50 | 
51 | [[tool.pdm.source]]
52 | name = "pypi"
53 | url = "https://pypi.org/simple"
54 | 


--------------------------------------------------------------------------------
/src/pytask/__init__.py:
--------------------------------------------------------------------------------
 1 | from .task_queue import Queue
 2 | from .worker import Worker, ConcurrentWorker, AsyncWorker
 3 | from .job import Job
 4 | from .flags import Flags
 5 | from .task_queue.types import SQLDataType, SQLColumnConditions
 6 | 
 7 | __all__ = [
 8 |     "Queue",
 9 |     "Worker",
10 |     "ConcurrentWorker",
11 |     "AsyncWorker",
12 |     "Job",
13 |     "Flags",
14 |     "SQLDataType",
15 |     "SQLColumnConditions",
16 | ]
17 | 


--------------------------------------------------------------------------------
/src/pytask/__main__.py:
--------------------------------------------------------------------------------
 1 | from pytask.task_queue.task_queue import Queue
 2 | from pytask.job.job import Job
 3 | from pytask.worker.concurrent_worker import ConcurrentWorker
 4 | from pytask.task_queue.types import SQLDataType, SQLColumnConditions
 5 | import logging
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | logger.setLevel(logging.INFO)
 9 | 
10 | if not logger.hasHandlers():
11 |     handler = logging.StreamHandler()
12 |     formatter = logging.Formatter(
13 |         "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
14 |     )
15 |     handler.setFormatter(formatter)
16 |     logger.addHandler(handler)
17 | 
18 | 
19 | def func(job: Job):
20 |     logger.info(f"Processing job: {job.task_id}, {job.data}")
21 |     job.data["foo"] += 2
22 | 
23 | 
24 | def insert_jobs(queue):
25 |     for i in range(1, 201):
26 |         queue.insert(Job(data={"foo": i, "bar": f"test{i}", "baz": {"foo": "bar"}}))
27 | 
28 | 
29 | def main():
30 |     queue = Queue(
31 |         schema=[
32 |             ("foo", SQLDataType.INTEGER, [SQLColumnConditions.NOT_NULL]),
33 |             ("bar", SQLDataType.TEXT, [SQLColumnConditions.NOT_NULL]),
34 |             ("baz", SQLDataType.JSON, [SQLColumnConditions.NOT_NULL]),
35 |         ],
36 |     )
37 | 
38 |     worker = ConcurrentWorker(queue, func, logger=logger, interval=1, max_workers=16)
39 |     insert_jobs(queue)
40 |     print("Requested Jobs are: ", queue.get_all(search_conditions={"foo": 1}))
41 |     worker.run()
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | 


--------------------------------------------------------------------------------
/src/pytask/flags/__init__.py:
--------------------------------------------------------------------------------
1 | from .flags import Flags
2 | 
3 | __all__ = ["Flags"]
4 | 


--------------------------------------------------------------------------------
/src/pytask/flags/flags.py:
--------------------------------------------------------------------------------
 1 | class Flags:
 2 |     """
 3 |     Flags are used to configure the behavior of the queue and worker.
 4 | 
 5 |     Parameters
 6 |     ----------
 7 |     auto_convert_json_keys : bool
 8 |         If True, the queue will automatically convert JSON keys to strings. Useful for retrieving and manipulating JSON data.
 9 | 
10 |     pop_after_processing : bool
11 |         If True, the job will be popped from the queue after processing.
12 |     """
13 | 
14 |     auto_convert_json_keys: bool = True
15 |     pop_after_processing: bool = False
16 | 
17 |     def __init__(
18 |         self, auto_convert_json_keys: bool = True, pop_after_processing: bool = False
19 |     ):
20 |         self.auto_convert_json_keys = auto_convert_json_keys
21 |         self.pop_after_processing = pop_after_processing
22 | 


--------------------------------------------------------------------------------
/src/pytask/job/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Job"]
2 | 
3 | from .job import Job
4 | 


--------------------------------------------------------------------------------
/src/pytask/job/job.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | from typing_extensions import override
 3 | from typing import Any
 4 | from datetime import datetime
 5 | import sqlite3
 6 | from pytask.job.types import JobStatus
 7 | 
 8 | 
 9 | class Job:
10 |     def __init__(
11 |         self,
12 |         task_id: str | None = None,
13 |         status: JobStatus = "pending",
14 |         data: dict[str, Any] = {},
15 |     ):
16 |         self.task_id: str = task_id if task_id is not None else uuid4().hex
17 |         self.status: JobStatus = status
18 |         self.created_at: str = datetime.now().isoformat()
19 |         self.updated_at: str = datetime.now().isoformat()
20 |         self.data: dict[str, Any] = data
21 | 
22 |     @override
23 |     def __str__(self):
24 |         return str(self.flat())
25 | 
26 |     @override
27 |     def __repr__(self):
28 |         return str(self.flat())
29 | 
30 |     @staticmethod
31 |     def create_from_row(row: sqlite3.Row):
32 |         exclude_keys = {"id", "task_id", "status", "created_at", "updated_at"}
33 |         job = Job()
34 |         job.task_id = row["task_id"]
35 |         job.status = row["status"]
36 |         job.created_at = row["created_at"]
37 |         job.updated_at = row["updated_at"]
38 |         job.data = {key: row[key] for key in row.keys() if key not in exclude_keys}
39 |         return job
40 | 
41 |     def to_dict(self) -> dict[str, Any]:
42 |         return {
43 |             "task_id": self.task_id,
44 |             "status": self.status,
45 |             "created_at": self.created_at,
46 |             "updated_at": self.updated_at,
47 |             **self.data,
48 |         }
49 | 
50 |     def flat(self) -> dict[str, Any]:
51 |         return {
52 |             "task_id": self.task_id,
53 |             "status": self.status,
54 |             "created_at": self.created_at,
55 |             "updated_at": self.updated_at,
56 |             **{
57 |                 key: value
58 |                 for key, value in self.data.items()
59 |                 if not key.startswith("_")
60 |             },
61 |         }
62 | 


--------------------------------------------------------------------------------
/src/pytask/job/types.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 | 
3 | JobStatus = Literal["pending", "running", "completed", "failed"]


--------------------------------------------------------------------------------
/src/pytask/task_queue/__init__.py:
--------------------------------------------------------------------------------
1 | from .task_queue import Queue
2 | 
3 | __all__ = ["Queue"]
4 | 


--------------------------------------------------------------------------------
/src/pytask/task_queue/constants.py:
--------------------------------------------------------------------------------
1 | BASE_SCHEMA = "id INTEGER PRIMARY KEY AUTOINCREMENT, task_id INTEGER, status TEXT, created_at DATETIME, updated_at DATETIME"
2 | DEFAULT_PATH = "data/queue.db"
3 | 


--------------------------------------------------------------------------------
/src/pytask/task_queue/task_queue.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sqlite3
  3 | from typing import Any
  4 | 
  5 | from pytask.task_queue.constants import BASE_SCHEMA, DEFAULT_PATH
  6 | from pytask.job.job import Job
  7 | from pytask.task_queue.types import SQLDataType, SQLColumnConditions
  8 | import json
  9 | 
 10 | from pytask.flags import Flags
 11 | 
 12 | 
 13 | class Queue:
 14 |     """
 15 |     A queue is a collection of jobs that are waiting to be processed. Using the default path,
 16 |     the queue will be stored in the current working directory, under ./data/queue.db.
 17 | 
 18 |     The queue is stored in a SQLite database, and the table is named "job". Creating more than one object will result in accessing the same queue, unless the path is changed.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     schema : list[tuple[str, SQLDataType, list[SQLColumnConditions]]]
 23 |         The schema of the queue.
 24 |     path : str
 25 |         The path to the SQLite database. Defaults to ./data/queue.db.
 26 |     flags : Flags
 27 |         The flags to configure the behavior of the queue.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         schema: list[tuple[str, SQLDataType, list[SQLColumnConditions]]] = [],
 33 |         path: str = DEFAULT_PATH,
 34 |         flags: Flags = Flags(),
 35 |     ):
 36 |         self.schema: list[tuple[str, SQLDataType, list[SQLColumnConditions]]] = schema
 37 |         self.path: str = path
 38 |         self.base_schema: str = BASE_SCHEMA
 39 |         self.sql_schema: str = self.__create_sql_schema()
 40 |         self.insert_schema: str = self.__create_insert_schema()
 41 |         self.json_keys: list[str] = self.__get_json_keys()
 42 |         self.flags: Flags = flags
 43 |         _ = self.__create_table()
 44 | 
 45 |     def insert(self, job: Job):
 46 |         with sqlite3.connect(self.path) as conn:
 47 |             if self.flags.auto_convert_json_keys:
 48 |                 self.__dump_json_keys(job)
 49 | 
 50 |             _ = conn.execute(self.insert_schema, job.to_dict())
 51 | 
 52 |     def update(self, job: Job):
 53 |         update_schema = self.__create_update_schema(job.data)
 54 | 
 55 |         if self.flags.auto_convert_json_keys:
 56 |             self.__dump_json_keys(job)
 57 | 
 58 |         with sqlite3.connect(self.path) as conn:
 59 |             _ = conn.execute(update_schema, job.to_dict())
 60 | 
 61 |     def delete(self, task_id: str):
 62 |         with sqlite3.connect(self.path) as conn:
 63 |             _ = conn.execute("DELETE FROM job WHERE task_id = ?", (task_id,))
 64 | 
 65 |         return True
 66 | 
 67 |     def get(self, task_id: str) -> Job | None:
 68 |         with self.__connect() as conn:
 69 |             cursor = conn.execute("SELECT * FROM job WHERE task_id = ?", (task_id,))
 70 |             row = cursor.fetchone()
 71 | 
 72 |             if row:
 73 |                 return Job.create_from_row(row)
 74 | 
 75 |             return None
 76 | 
 77 |     def get_all(self, search_conditions: dict[str, Any] = {}) -> list[Job]:
 78 |         """
 79 |         Get all jobs that match the search conditions.
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         search_conditions : dict[str, Any]
 84 |             The conditions to search for. Currently only supports equality such as foo = 1, where search_conditions = {"foo": 1}.
 85 |         """
 86 |         conditions_str = " AND ".join([f"{key} = :{key}" for key in search_conditions])
 87 |         where_clause = f"WHERE {conditions_str}" if conditions_str else ""
 88 | 
 89 |         with self.__connect() as conn:
 90 |             cursor = conn.execute(
 91 |                 f"SELECT * FROM job {where_clause}",
 92 |                 search_conditions,
 93 |             )
 94 | 
 95 |             rows = cursor.fetchall()
 96 |             jobs = [Job.create_from_row(row) for row in rows]
 97 | 
 98 |             if self.flags.auto_convert_json_keys:
 99 |                 for job in jobs:
100 |                     self.__load_json_keys(job)
101 | 
102 |             return jobs
103 | 
104 |     def get_oldest_pending(self) -> Job | None:
105 |         with self.__connect() as conn:
106 |             cursor = conn.execute(
107 |                 "SELECT * FROM job WHERE status = 'pending' ORDER BY created_at ASC LIMIT 1"
108 |             )
109 |             row = cursor.fetchone()
110 | 
111 |             if row:
112 |                 job = Job.create_from_row(row)
113 | 
114 |                 job.status = "running"
115 |                 self.update(job)
116 | 
117 |                 if self.flags.auto_convert_json_keys:
118 |                     self.__load_json_keys(job)
119 | 
120 |                 return job
121 | 
122 |             return None
123 | 
124 |     def __connect(self) -> sqlite3.Connection:
125 |         connection = sqlite3.connect(self.path)
126 |         connection.row_factory = sqlite3.Row
127 |         return connection
128 | 
129 |     def __create_sql_schema(self) -> str:
130 |         schema_parts: list[str] = []
131 |         for column in self.schema:
132 |             column_name, column_type, column_conditions = column
133 |             schema_parts.append(
134 |                 f"{column_name} {column_type.value} {' '.join([condition.value for condition in column_conditions])}"
135 |             )
136 | 
137 |         schema = ", ".join(schema_parts)
138 |         job_schema = self.base_schema
139 | 
140 |         if schema:
141 |             job_schema += f", {schema}"
142 | 
143 |         return f"""CREATE TABLE IF NOT EXISTS job (       
144 |             {job_schema}
145 |         )"""
146 | 
147 |     def __create_table(self):
148 |         if not os.path.exists(self.path):
149 |             os.makedirs(os.path.dirname(self.path), exist_ok=True)
150 | 
151 |         try:
152 |             with sqlite3.connect(self.path) as conn:
153 |                 _ = conn.execute(self.sql_schema)
154 |         except Exception:
155 |             return False
156 | 
157 |         return True
158 | 
159 |     def __create_insert_schema(self) -> str:
160 |         other_columns: list[str] = []
161 | 
162 |         for column in self.schema:
163 |             column_name, _, _ = column
164 |             other_columns.append(column_name)
165 | 
166 |         other_columns_str = ", ".join(other_columns)
167 |         other_columns_values = ", ".join([f":{col}" for col in other_columns])
168 | 
169 |         return f"""
170 |         INSERT INTO job (task_id, status, created_at, updated_at, {other_columns_str}) 
171 |         VALUES (:task_id, :status, :created_at, :updated_at, {other_columns_values});
172 |         """
173 | 
174 |     def __create_update_schema(self, extra_columns: dict[str, Any] = {}) -> str:
175 |         extra_columns_str = ", ".join([f"{col} = :{col}" for col in extra_columns])
176 | 
177 |         return f"""
178 |         UPDATE job SET 
179 |             status = :status, 
180 |             updated_at = :updated_at{f", {extra_columns_str}" if extra_columns_str else ""}
181 |         WHERE task_id = :task_id;
182 |         """
183 | 
184 |     def __load_json_keys(self, job: Job):
185 |         for key in self.json_keys:
186 |             job.data[key] = json.loads(job.data[key])
187 | 
188 |     def __dump_json_keys(self, job: Job):
189 |         for key in self.json_keys:
190 |             if not isinstance(job.data[key], str):
191 |                 job.data[key] = json.dumps(job.data[key])
192 | 
193 |     def __get_json_keys(self) -> list[str]:
194 |         json_keys: list[str] = []
195 | 
196 |         for column in self.schema:
197 |             column_name, column_type, _ = column
198 | 
199 |             if column_type == SQLDataType.JSON:
200 |                 json_keys.append(column_name)
201 | 
202 |         return json_keys
203 | 


--------------------------------------------------------------------------------
/src/pytask/task_queue/types.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class SQLDataType(Enum):
 5 |     INTEGER = "INTEGER"
 6 |     TEXT = "TEXT"
 7 |     JSON = "JSON"
 8 |     BOOLEAN = "BOOLEAN"
 9 |     FLOAT = "FLOAT"
10 |     DATETIME = "DATETIME"
11 | 
12 | 
13 | class SQLColumnConditions(Enum):
14 |     NOT_NULL = "NOT NULL"
15 |     NULL = "NULL"
16 |     UNIQUE = "UNIQUE"
17 |     PRIMARY_KEY = "PRIMARY KEY"
18 |     FOREIGN_KEY = "FOREIGN KEY"
19 |     CHECK = "CHECK"
20 |     DEFAULT = "DEFAULT"
21 | 


--------------------------------------------------------------------------------
/src/pytask/worker/__init__.py:
--------------------------------------------------------------------------------
1 | from .worker import Worker
2 | from .concurrent_worker import ConcurrentWorker
3 | from .async_worker import AsyncWorker
4 | 
5 | __all__ = ["Worker", "ConcurrentWorker", "AsyncWorker"]
6 | 


--------------------------------------------------------------------------------
/src/pytask/worker/async_worker.py:
--------------------------------------------------------------------------------
 1 | from pytask.task_queue.task_queue import Queue
 2 | from typing import Callable, Any
 3 | from pytask.job import Job
 4 | import time
 5 | from collections.abc import Coroutine
 6 | 
 7 | import logging
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class AsyncWorker:
13 |     def __init__(
14 |         self,
15 |         queue: Queue,
16 |         func: Callable[[Job], Coroutine[Any, Any, Any]],
17 |         logger: logging.Logger | None = None,
18 |         interval: int = 1,
19 |     ):
20 |         self.queue: Queue = queue
21 |         self.func: Callable[[Job], Coroutine[Any, Any, Any]] = func
22 |         self.interval: int = interval
23 |         self.logger: logging.Logger | None = logger
24 | 
25 |     async def run(self):
26 |         while True:
27 |             job = self.queue.get_oldest_pending()
28 | 
29 |             if job:
30 |                 if self.logger:
31 |                     self.logger.info(f"Processing job: {job}")
32 | 
33 |                 await self.do(job)
34 | 
35 |                 if self.queue.flags.pop_after_processing:
36 |                     _ = self.queue.delete(job.task_id)
37 | 
38 |                     if self.logger:
39 |                         self.logger.info(f"Job {job.task_id} removed from queue.")
40 |                 else:
41 |                     job.status = "completed"
42 |                     self.queue.update(job)
43 | 
44 |                     if self.logger:
45 |                         self.logger.info(f"Job {job.task_id} marked as completed.")
46 |             else:
47 |                 if self.logger:
48 |                     self.logger.info("No pending jobs found.")
49 | 
50 |             if self.interval > 0:
51 |                 time.sleep(self.interval)
52 | 
53 |     async def do(self, job: Job):
54 |         await self.func(job)
55 | 


--------------------------------------------------------------------------------
/src/pytask/worker/base_worker.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from pytask.task_queue.task_queue import Queue
 3 | from typing import Callable, Any
 4 | from pytask.job.job import Job
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseWorker(ABC):
12 |     def __init__(
13 |         self,
14 |         queue: Queue,
15 |         func: Callable[[Job], Any],
16 |         logger: logging.Logger | None = None,
17 |         interval: int = 1,
18 |     ):
19 |         self.queue: Queue = queue
20 |         self.func: Callable[[Job], Any] = func
21 |         self.interval: int = interval
22 |         self.logger: logging.Logger | None = logger
23 | 
24 |     @abstractmethod
25 |     def run(self) -> None:
26 |         raise NotImplementedError("Subclass must implement run method")
27 | 
28 |     def do(self, job: Job):
29 |         self.func(job)
30 | 


--------------------------------------------------------------------------------
/src/pytask/worker/concurrent_worker.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from concurrent.futures import ThreadPoolExecutor
 3 | from pytask.job import Job
 4 | from typing import Callable, Any
 5 | from pytask.task_queue import Queue
 6 | import logging
 7 | from typing_extensions import override
 8 | 
 9 | from pytask.worker.base_worker import BaseWorker
10 | 
11 | 
12 | class ConcurrentWorker(BaseWorker):
13 |     def __init__(
14 |         self,
15 |         queue: Queue,
16 |         func: Callable[[Job], Any],
17 |         max_workers: int = 5,
18 |         interval: int = 1,
19 |         logger: logging.Logger | None = None,
20 |     ):
21 |         self.queue: Queue = queue
22 |         self.func: Callable[[Job], Any] = func
23 |         self.max_workers: int = max_workers
24 |         self.logger: logging.Logger | None = logger
25 |         self.lock: threading.Lock = threading.Lock()
26 |         self._stop_event: threading.Event = threading.Event()
27 | 
28 |         super().__init__(queue, func, logger, interval)
29 | 
30 |     @override
31 |     def run(self) -> None:
32 |         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
33 |             while not self._stop_event.is_set():
34 |                 job = self.queue.get_oldest_pending()
35 | 
36 |                 if job:
37 |                     with self.lock:
38 | 
39 |                         _ = executor.submit(self.process_job, job)
40 | 
41 |                     if self.logger:
42 |                         self.logger.info(f"Job {job.task_id} submitted for processing.")
43 |                 else:
44 |                     if self.logger:
45 |                         self.logger.info("No pending jobs found.")
46 | 
47 |                     _ = threading.Event().wait(1)
48 | 
49 |     def process_job(self, job: Job) -> None:
50 |         try:
51 |             if self.logger:
52 |                 self.logger.info(f"Processing job: {job.task_id}, {job.data}")
53 | 
54 |             # Process the job
55 |             self.do(job)
56 | 
57 |             # After processing, update the job status outside the lock
58 |             self.update_job(job)
59 | 
60 |         except Exception as e:
61 |             if self.logger:
62 |                 self.logger.error(f"Error processing job {job.task_id}: {e}")
63 | 
64 |     def update_job(self, job: Job) -> None:
65 |         with self.lock:
66 |             if self.queue.flags.pop_after_processing:
67 |                 _ = self.queue.delete(job.task_id)
68 | 
69 |                 if self.logger:
70 |                     self.logger.info(f"Job {job.task_id} removed from queue.")
71 |             else:
72 |                 job.status = "completed"
73 |                 self.queue.update(job)
74 | 
75 |                 if self.logger:
76 |                     self.logger.info(f"Job {job.task_id} marked as completed.")
77 | 
78 |     def stop(self) -> None:
79 |         """Stop the worker cleanly."""
80 |         self._stop_event.set()
81 | 


--------------------------------------------------------------------------------
/src/pytask/worker/worker.py:
--------------------------------------------------------------------------------
 1 | from pytask.task_queue.task_queue import Queue
 2 | from typing import Callable, Any
 3 | from pytask.job import Job
 4 | from pytask.worker.base_worker import BaseWorker
 5 | from typing_extensions import override
 6 | import time
 7 | 
 8 | import logging
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class Worker(BaseWorker):
14 |     def __init__(
15 |         self,
16 |         queue: Queue,
17 |         func: Callable[[Job], Any],
18 |         logger: logging.Logger | None = None,
19 |         interval: int = 1,
20 |     ):
21 |         self.queue: Queue = queue
22 |         self.func: Callable[[Job], Any] = func
23 |         self.interval: int = interval
24 |         self.logger: logging.Logger | None = logger
25 | 
26 |         super().__init__(queue, func, logger, interval)
27 | 
28 |     @override
29 |     def run(self):
30 |         while True:
31 |             job = self.queue.get_oldest_pending()
32 | 
33 |             if job:
34 |                 if self.logger:
35 |                     self.logger.info(f"Processing job: {job}")
36 | 
37 |                 self.do(job)
38 | 
39 |                 if self.queue.flags.pop_after_processing:
40 |                     _ = self.queue.delete(job.task_id)
41 | 
42 |                     if self.logger:
43 |                         self.logger.info(f"Job {job.task_id} removed from queue.")
44 |                 else:
45 |                     job.status = "completed"
46 |                     self.queue.update(job)
47 | 
48 |                     if self.logger:
49 |                         self.logger.info(f"Job {job.task_id} marked as completed.")
50 |             else:
51 |                 if self.logger:
52 |                     self.logger.info("No pending jobs found.")
53 | 
54 |             if self.interval > 0:
55 |                 time.sleep(self.interval)
56 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaypyles/pytask/57e66446a3204983311792aae5af280ca5f644a7/tests/__init__.py


--------------------------------------------------------------------------------