├── .flake8
├── .gitattributes
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode
    └── settings.json
├── LICENSE.md
├── MANIFEST.in
├── README.md
├── TODO.md
├── benchmark
    ├── append.prof
    ├── benchmark.ipynb
    ├── benchmark_log_append.py
    ├── benchmark_nb_tasks.py
    ├── benchmark_nb_tasks_dask.py
    ├── benchmark_queue.py
    ├── benchmark_queuepool.py
    ├── benchmark_throughput.py
    ├── profile_durable_queue.py
    ├── profile_log_append.py
    └── queue.prof
├── conftest.py
├── daskconfig.yaml
├── daskqueue
    ├── ActorPool.py
    ├── Consumer.py
    ├── ConsumerPool.py
    ├── Protocol.py
    ├── QueuePool.py
    ├── __init__.py
    ├── _version.py
    ├── backend.py
    ├── queue
    │   ├── __init__.py
    │   ├── base_queue.py
    │   ├── durable_queue.py
    │   ├── queue_exceptions.py
    │   ├── sql_queue.py
    │   └── transient_queue.py
    ├── segment
    │   ├── __init__.py
    │   ├── index_record.py
    │   ├── index_segment.py
    │   ├── log_record.py
    │   └── log_segment.py
    └── utils
    │   ├── __init__.py
    │   ├── funcs.py
    │   ├── logger.py
    │   └── start_cluster.py
├── examples
    ├── perf_copy.py
    ├── perf_cpu_bound.py
    ├── perf_docs_pages.py
    └── perf_io_bound.py
├── figures
    ├── benchmark_v0.1.5
    │   ├── submittime_nqueues_v0.1.5.png
    │   ├── taskduration_workthroughput_v0.1.5.png
    │   ├── workthroughput_taskduration_nqueues_v0.1.5.png
    │   └── worktime_nqueues.png
    ├── copy async.PNG
    ├── copy async2.PNG
    └── copy async3.PNG
├── integration
    ├── test_integration_durable.py
    ├── test_integration_durable_multiplelog.py
    └── test_integration_restart_cluster.py
├── pytest.ini
├── requirements.dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── test_consumer.py
    ├── test_consumer_pool.py
    ├── test_durable_queue.py
    ├── test_generalconsumer.py
    ├── test_index_segment.py
    ├── test_log_segment.py
    ├── test_protocol.py
    ├── test_queue.py
    └── test_queue_pool.py
└── versioneer.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | max-complexity = 18
 4 | select = B,C,E,F,W,T4,B9
 5 | ignore = E203, E266, E501, W503, F403, E731
 6 | exclude =
 7 |     .git,
 8 |     __pycache__,
 9 |     docs/source/conf.py,
10 |     old,
11 |     build,
12 |     dist,
13 |     tests/*.py,
14 |     versioneer.py
15 | per-file-ignores =
16 |     */__init__.py: F401
17 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | daskqueue/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Linting and Testing
 5 | 
 6 | on: [push]
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v3
17 |       - name: Set up Python 3.9
18 |         uses: actions/setup-python@v3
19 |         with:
20 |           python-version: "3.9"
21 |       - name: Install dependencies
22 |         run: |
23 |           python -m pip install --upgrade pip
24 |           pip install flake8 pytest
25 |           if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi
26 |       - name: Lint with flake8
27 |         run: |
28 |           # stop the build if there are Python syntax errors or undefined names
29 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
30 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
31 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
32 |       - name: Test with pytest
33 |         run: |
34 |           pytest tests/ -n auto
35 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   build-n-publish:
 9 |     name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout source
13 |         uses: actions/checkout@v2
14 | 
15 |       - name: Set up Python 3.9
16 |         uses: actions/setup-python@v1
17 |         with:
18 |           python-version: 3.9
19 | 
20 |       - name: Install build dependencies
21 |         run: python -m pip install build wheel twine
22 | 
23 |       - name: Build distributions
24 |         shell: bash -l {0}
25 |         run: python setup.py sdist bdist_wheel
26 | 
27 |       # - name: Publish package 📦 to Test PyPI
28 |       #   uses: pypa/gh-action-pypi-publish@master
29 |       #   if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
30 |       #   with:
31 |       #     user: __token__
32 |       #     password: ${{ secrets.TEST_PYPI_API_TOKEN }}
33 |       #     repository_url: https://test.pypi.org/legacy/
34 | 
35 |       - name: Publish package 📦 to PyPI
36 |         # Execute only on PR to the main branch
37 |         # if: github.event_name == 'pull_request' && github.event.pull_request.merged == true
38 |         uses: pypa/gh-action-pypi-publish@release/v1
39 |         with:
40 |           password: ${{ secrets.PYPI_API_TOKEN }}
41 | 
42 |       - name: Create GitHub Release
43 |         id: create_release
44 |         # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
45 |         uses: actions/create-release@v1
46 |         env:
47 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
48 |         with:
49 |           tag_name: ${{ github.ref }}
50 |           release_name: ${{ github.ref }}
51 |           draft: false
52 |           prerelease: false
53 | 
54 |       - name: Get Asset name
55 |         # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
56 |         run: |
57 |           export PKG=$(ls dist/ | grep tar)
58 |           set -- $PKG
59 |           echo "name=$1" >> $GITHUB_ENV
60 | 
61 |       - name: Upload Release Asset (sdist) to GitHub
62 |         id: upload-release-asset
63 |         # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
64 |         uses: actions/upload-release-asset@v1
65 |         env:
66 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
67 |         with:
68 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
69 |           asset_path: dist/${{ env.name }}
70 |           asset_name: ${{ env.name }}
71 |           asset_content_type: application/zip
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | notebooks/
162 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # repos:
 2 | # -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 | #     rev: v2.3.0
 4 | #     hooks:
 5 | #     -   id: check-yaml
 6 | #     -   id: end-of-file-fixer
 7 | #     -   id: trailing-whitespace
 8 | # -   repo: https://github.com/psf/black
 9 | #     rev: 22.1.0
10 | #     hooks:
11 | #       - id: black
12 | #         additional_dependencies: ['click==8.0.4']
13 | # -   repo: https://github.com/pre-commit/pre-commit-hooks
14 | #     rev: v2.3.0
15 | #     hooks:
16 | 
17 | repos:
18 | - repo: https://github.com/pre-commit/pre-commit-hooks
19 |   rev: v3.2.0
20 |   hooks:
21 |     - id: trailing-whitespace
22 |     - id: mixed-line-ending
23 |     - id: check-added-large-files
24 |       args: ['--maxkb=1000']
25 |     - id: end-of-file-fixer
26 |     - id: requirements-txt-fixer
27 |     - id: check-yaml
28 |     - id: check-json
29 |     - id: check-merge-conflict
30 | 
31 | - repo: local
32 |   hooks:
33 |   - id: isort
34 |     name: isort
35 |     stages: [commit]
36 |     language: system
37 |     entry: isort
38 |     types: [python]
39 |     args: ["--filter-files","--profile","black"]
40 | 
41 |   - id: black
42 |     name: black
43 |     stages: [commit]
44 |     language: system
45 |     entry: black
46 |     types: [python]
47 | 
48 |   # - id: mypy
49 |   #   name: mypy
50 |   #   stages: [commit]
51 |   #   language: system
52 |   #   entry: mypy
53 |   #   types: [python]
54 | 
55 |   - id: flake8
56 |     name: flake8
57 |     stages: [commit]
58 |     language: system
59 |     entry: flake8
60 |     types: [python]
61 |     exclude: setup.py
62 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.flake8Enabled": true,
3 |     "python.linting.enabled": true
4 | }
5 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Amine Dirhoussi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include daskqueue/_version.py
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | daskqueue
  2 | ===============
  3 | 
  4 | [![code style](https://img.shields.io/pypi/v/daskqueue)](https://pypi.org/project/daskqueue/)
  5 | [![licence](https://img.shields.io/github/license/AmineDiro/daskqueue)](https://github.com/AmineDiro/daskqueue/blob/main/LICENSE.md)
  6 | [![issues](https://img.shields.io/github/issues/AmineDiro/daskqueue)](https://github.com/AmineDiro/daskqueue/issues)
  7 | [![code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
  8 | 
  9 | **daskqueue** is small python library built on top of Dask and Dask Distributed that implements a very lightweight Distributed Task queue.
 10 | 
 11 | Think of this library as a simpler version of Celery built entirely on Dask. Running Celery on HPC environnment (for instance) is usually very tricky whereas spawning a Dask Cluster is a lot easier to manage, debug and cleanup.
 12 | 
 13 | Motivation
 14 | -------
 15 | 
 16 | [Dask](https://docs.dask.org/en/stable/) is an amazing library for parallel computing written entirely in Python. It is easy to install and offer both a high level API wrapping common collections (arrays, bags, dataframes) and a low level API for written custom code (Task graph with Delayed and Futures).
 17 | 
 18 | For all its greatness, Dask implements a central scheduler (basically a simple tornado eventloop) involved in every decision, which can sometimes create a central bottleneck. This is a pretty serious limitation when trying use Dask in high throughput situation. A simple Task Queue is usually the best approach when trying to distribute millions of tasks.
 19 | 
 20 | The **daskqueue** python library leverages [Dask Actors](https://distributed.dask.org/en/stable/actors.html) to implement distributed queues with a simple load balancer `QueuePool` and a `Consummer` class to consumme message from these queues.
 21 | 
 22 | We used Actors because:
 23 | 
 24 | - Actors are stateful, they  can hold on to and mutate state. They are allowed to update their state in-place, which is very useful when spawning distributed queues !
 25 | 
 26 | - **NO CENTRAL SCHEDULING NEEDED :** Operations on actors do not inform the central scheduler, and so do not contribute to the 4000 task/second overhead. They also avoid an extra network hop and so have lower latencies. Actors can communicate between themselves in a P2P manner, which makes it pretty neat when having a huge number of queues and consummers.
 27 | 
 28 | > **Note** :  Dask provides a [Queue implementation](https://docs.dask.org/en/stable/futures.html?highlight=queue#queues) but they are mediated by the central scheduler, and so they are not ideal for sending large amounts of data (everything you send will be routed through a central point) and add additionnal overhead on the scheduler when trying to put millions of tasks.
 29 | 
 30 | 
 31 | Install
 32 | -------
 33 | 
 34 | daskqueue requires Python 3.6 or newer.
 35 | You can install manually by cloning the repository:
 36 | 
 37 | ```bash
 38 | $ pip install daskqueue
 39 | ```
 40 | 
 41 | 
 42 | Usage
 43 | -----
 44 | 
 45 | 
 46 | This simple example show how to copy files in parallel using Dask workers and a distributed queue:
 47 | 
 48 | ```python
 49 | from distributed import Client
 50 | from daskqueue import QueuePool, ConsumerPool
 51 | from daskqueue.utils import logger
 52 | 
 53 | def process_item():
 54 |     return sum(i * i for i in range(10**8))
 55 | 
 56 | if __name__ == "__main__":
 57 |     client = Client(
 58 |         n_workers=5,
 59 |         # task function doesn't release the GIL
 60 |         threads_per_worker=1,
 61 |         direct_to_workers=True,
 62 |     )
 63 | 
 64 |     ## Params
 65 |     n_queues = 1
 66 |     n_consumers = 5
 67 | 
 68 |     queue_pool = QueuePool(client, n_queues=n_queues)
 69 | 
 70 |     consumer_pool = ConsumerPool(client, queue_pool, n_consumers=n_consumers)
 71 |     consumer_pool.start()
 72 | 
 73 |     for i in range(5):
 74 |         queue_pool.submit(process_item)
 75 | 
 76 |     # Wait for all work to be done
 77 |     consumer_pool.join()
 78 | 
 79 |     ## Get results
 80 |     results = consumer_pool.results()
 81 | 
 82 | ```
 83 | 
 84 | Take a look at the `examples/` folder to get some usage.
 85 | 
 86 | 
 87 | Implementation
 88 | -------
 89 | You should think of daskqueue as a very simple distributed version of aiomultiprocessing. We have these basic classes:
 90 | - `Queue` : The daskqueue library provides two queue types :
 91 |   -  `TransientQueue`: The default queue class. The submitted messages are appended to an in memory FIFO queue.
 92 |   -  `DurableQueue`: This is a disk backed queue for persisting the messages. The tasks are served in FIFO manner. Durable queues append serialized message to a fixed-sized file called `LogSegment`. The durable queues also append queue operations to and an `IndexSegment`. The index segment is a combination of a [bitcask](https://riak.com/assets/bitcask-intro.pdf) index for segment offsets and WAL file : it is an append only file where we record message status after each queue operation (ready, delivered, acked and failed) and an offset to the message in on of the `LogSegments`
 93 | - `QueuePoolActor`: Basic Pool actor, it holds a reference to queues and their sizes. It interfaces with the Client and the Consummers. The QueuePool implements a round robin batch submit.
 94 | 
 95 | - `ConsumerBaseClass`: Abstract class interfaces implementing all the fetching logic for you worker. The Consumers have a `start()` method where we run an unfinite fetch loop to pop items from queue assigned by QueuePool. The consumer directly communicate with the Queue, providing highly scalable workflows. The Consummer will then get an item from the queue and schedule `process_item` on the dask worker's ThreadPoolExecutor, freeing the worker's eventloop to communicate with the scheduler, fetch tasks asynchronously etc ....
 96 | 
 97 | Performance and Limitations
 98 | -------
 99 | ### Benchmarks
100 | The **daskqueue** library is very well suited for IO bound jobs: by running multiple consummers and queues, communication asynchronously, we can bypass the dask scheduler limit and process **millions of tasks 🥰 !! **
101 | 
102 | The example copy code above was ran on cluster of 20 consummers and 5 queues. The tasks ran are basic file copy between two location (copying form NFS filer). We copied 200 000 files (~ 1.1To) without ever breaking a sweat !
103 | 
104 | We can clearly see the network saturation:
105 | 
106 | ![Image](figures/copy%20async.PNG)
107 | 
108 | Looking at the scheduler metrics, we can have a mean of 19.3%
109 | ![Image](figures/copy%20async3.PNG)
110 | 
111 | You can take a look at the `benchmark/` directory for various benchmarks ran using `daskqueue` vs `dask`:
112 | - We put  1_000_000 tasks using dask cluster (2 nodes- 1 thread per process- 4 queues- 8 consumers)
113 | - The tasks were chunked using  into 1000 calls of 1000 tasks per batch
114 | - The client submits to the QueuePool manager using
115 | - The function is 'empty' : just passes and doesn't use CPU or IO
116 | - Processing 1_000_000 empty tasks took 338s = 5min36s 😸!!
117 | 
118 | #### Throughput
119 | 
120 | - For durable queues, we can achive the following throughput with 1 consumer and 1 queue (running on the same machine) with message size of ~100Bytes
121 |   - 1 queue | 1 consumer :
122 |     - Mean write ops [1tests] 86991.03 wop/s
123 |     - Mean read ops [1tests] 8439.37 rop/s
124 |   - 5 queues | 5 consumers, we have a near linear speed up for consumers, reader:
125 |     - Mean write ops [1tests] 86008.31 wop/s
126 |     - Mean read ops [1tests] 25199.66 rop/s
127 |     
128 | - For Transient queues, we can achive the following throughput with 1 consumer and 1 queue (running on the same machine) with message size of ~100Bytes
129 |   - 1 queue | 1 consumer :
130 |     - Mean write ops [1tests] 86991.03 wop/s
131 |     - Mean read ops [1tests] 9840.37 rop/s
132 |   - 5 queues | 5 consumers, we have a near linear speed up for consumers, reader:
133 |     - Mean write ops [1tests] 86008.31 wop/s
134 |     - Mean read ops [1tests] 26958.66 rop/s
135 |     
136 | | All files are mmaped so we don't see any performance degration for workloads that fit into memory.
137 | 
138 | ### Limitations
139 | As for the limitation, given the current implementation, you should be mindfull of the following limitations (this list will be updated regularly):
140 | - We run the tasks in the workers ThreadPool, we inherit all the limitations that the standard dask.submit method have.
141 | - Task that require multiprocessing/multithreading within a worker cannot be scheduled at the time. This is also true for dask tasks.
142 | - The QueuePool implement simple scheduling on put and get. More sophisticated scheduling will be implementing in the future.
143 | 
144 | Features roadmap
145 | -------
146 | - [x] Consumer should run arbitrary funcs (ala celery)
147 | - [x] Use Worker's thread pool for long running tasks ( probe finished to get results)
148 | - [x] Wrap consummers in a Consummers class
149 | - [x] Implement a Distributed Join to know when to stop cluster
150 | - [x] Implement a `concurrency_limit` as the maximum number of active, concurrent jobs each worker process will pick up from its queue at once.
151 | - [x] Run tasks on custom Worker's executors
152 | - [x] Add benchmarks
153 | - [x] Tests
154 | - [x] Implement durable queues with bitcask index
155 | - [x] Implement Ack Mechanism
156 | - [x] Reschedule Unacked Message
157 | - [ ] Implement health check mechanism
158 | - [ ] Implement tasks retries
159 | - [ ] Add queue plugin to dask dashboard
160 | 
161 | Contributing
162 | --------------
163 | Contributions are what makes the open-source community such an amazing place to learn, inspire, and create.
164 | This project is still very very rough! Any contributions you make will benefit everybody else and are greatly appreciated  😍 😍 😍 !
165 | 
166 | Please try to create bug reports that are:
167 | 
168 | - Reproducible. Include steps to reproduce the problem.
169 | - Specific. Include as much detail as possible: which version, what environment, etc.
170 | - Unique. Do not duplicate existing opened issues.
171 | - Scoped to a Single Bug. One bug per issue.
172 | 
173 | Releasing
174 | ---------
175 | Releases are published automatically when a tag is pushed to GitHub.
176 | 
177 | ```bash
178 | git checkout master
179 | git pull
180 | # Set next version number
181 | export RELEASE=x.x.x
182 | 
183 | # Create tags
184 | git commit --allow-empty -m "Release $RELEASE"
185 | git tag -a $RELEASE -m "Version $RELEASE"
186 | 
187 | # Push
188 | git push upstream --tags
189 | ```
190 | 
191 | License
192 | -------
193 | 
194 | **daskqueue** is copyright **Amine Dirhoussi**, and licensed under
195 | the MIT license.  I am providing code in this repository to you under an open
196 | source license.  This is my personal repository; the license you receive to
197 | my code is from me and not from my employer. See the `LICENSE` file for details.
198 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | # AMQP 0-9-1  Implementation
 2 | 
 3 | ## Messages
 4 | - Upgrade Messages protocol
 5 | - Add topic support
 6 | 
 7 | ## Exchanges / QueuePool
 8 | - Take msg and route it into 0..n queues based on the msg key
 9 | - Msg with key routing R goes to Queue(R)
10 | - Implement Drop message /return to the publisher when we can't route
11 | 
12 | ## Queues
13 | - Store messages
14 | - Name < 255bytes UTF-8
15 | - Durability :  [Durable | Transient]
16 |     - Metadata of  queue are stored on disk
17 |     - Messages of the durable queue are stored on  disk
18 | - Should separate queue serving and storage: the Queue Pool should provide us with a path at spawn time
19 | - Durable queues should keep a log and flush it to disk in the background (WAL style) -> Use sqlite as a backend
20 | - Message TTL
21 | - Queue should keep records of poped unacked messages and either : flag them is they are acked or push them back to queue is they fail
22 | - Should keep note of
23 | 
24 | ## Consumers
25 | - Support for message acking : [ Early Ack  | LATE ack]
26 | - Early acks when the consumer gets the msg from the queue
27 | - Late acks when message succeeds
28 | - Two ways to consume: Push and Pull. For now, only support Pull style.
29 | - Should run the pulling in a separate asyncio event loop.
30 | 
31 | ---------------------------------------------------------------
32 | # Kafka storage
33 | 
34 | - Logfile is **append only**
35 | - Each log is split into segments
36 | - Each Log holds up to : 1 GB of data (or time limit )
37 | - When log is full we close it and open a new logfile
38 | 
39 | Example :
40 | [Seg0:  0-957 (RO)] [Seg1: 958-1484 (RO)] [Seg2: 1485-... (RW)]
41 | 
42 | 
43 | ### Directory structure
44 | - 1 dir per Topic ( per exchange)
45 |   - Segment Filename is offset
46 |   - Index | Log ( ==  segment) | TimeIndex
47 | 
48 | ## Indices:
49 | - Offset index : Index on msg offset in a specific segment : can be hashtable because msg ids are random uuids ie no order needed
50 | - Timestamp index :  Index on timestamps to find specific msg : should be implemented as a binary search tree
51 | 
52 | ## Message Status:
53 | - A Msg can either be : Ready |  Delivered | Acked
54 | -
55 | 
56 | ---
57 | 
58 | ##  TODO : Durable queue
59 | - [x] Change the serialization protocol : from cloudpickle to pickle , basically x6~ performance improvement
60 | - [ ] Improve the  batch submit func
61 | - [ ] Append to multiple log semgents
62 | 


--------------------------------------------------------------------------------
/benchmark/append.prof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/benchmark/append.prof


--------------------------------------------------------------------------------
/benchmark/benchmark.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from distributed import Client, LocalCluster\n",
 10 |     "import click\n",
 11 |     "from daskqueue.Protocol import Message\n",
 12 |     "from time import perf_counter\n",
 13 |     "\n",
 14 |     "from daskqueue import ConsumerPool, Durability, QueuePool\n",
 15 |     "import tempfile\n",
 16 |     "\n",
 17 |     "no_func = lambda: None"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "cprint = click.echo\n",
 27 |     "gprint = lambda s: click.style(s, fg=\"green\")\n",
 28 |     "func = lambda x: x + 2\n",
 29 |     "\n",
 30 |     "def rdx_msg():\n",
 31 |     "    msg = Message(func, 12)\n",
 32 |     "    return msg\n"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 5,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stderr",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "2023-01-02 18:41:10,411 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-64juugzz', purging\n",
 45 |       "2023-01-02 18:41:10,411 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-jxt3gv4c', purging\n",
 46 |       "2023-01-02 18:41:10,411 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-_4rzcgcc', purging\n",
 47 |       "2023-01-02 18:41:10,411 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-l335dt48', purging\n",
 48 |       "2023-01-02 18:41:10,412 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-cs8k9qde', purging\n"
 49 |      ]
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "cluster= LocalCluster(\n",
 54 |     "    n_workers=6,\n",
 55 |     "    threads_per_worker=1,\n",
 56 |     "    dashboard_address=\":3338\",\n",
 57 |     "    memory_limit=\"2GB\"\n",
 58 |     ")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 3,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stderr",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "/home/amine/Documents/coding/daskqueue/env/lib/python3.10/site-packages/distributed/client.py:1274: VersionMismatchWarning: Mismatched versions found\n",
 71 |       "\n",
 72 |       "+---------+----------------+----------------+----------------+\n",
 73 |       "| Package | client         | scheduler      | workers        |\n",
 74 |       "+---------+----------------+----------------+----------------+\n",
 75 |       "| python  | 3.10.8.final.0 | 3.10.9.final.0 | 3.10.9.final.0 |\n",
 76 |       "+---------+----------------+----------------+----------------+\n",
 77 |       "  warnings.warn(version_module.VersionMismatchWarning(msg[0][\"warning\"]))\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "client = Client(\"tcp://192.168.1.181:8786\",direct_to_workers=True)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 7,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/html": [
 93 |        "<div>\n",
 94 |        "    <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\"> </div>\n",
 95 |        "    <div style=\"margin-left: 48px;\">\n",
 96 |        "        <h3 style=\"margin-bottom: 0px;\">Client</h3>\n",
 97 |        "        <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-189e843b-8b4c-11ed-b419-a5ee56d941ec</p>\n",
 98 |        "        <table style=\"width: 100%; text-align: left;\">\n",
 99 |        "\n",
100 |        "        <tr>\n",
101 |        "        \n",
102 |        "            <td style=\"text-align: left;\"><strong>Connection method:</strong> Direct</td>\n",
103 |        "            <td style=\"text-align: left;\"></td>\n",
104 |        "        \n",
105 |        "        </tr>\n",
106 |        "\n",
107 |        "        \n",
108 |        "            <tr>\n",
109 |        "                <td style=\"text-align: left;\">\n",
110 |        "                    <strong>Dashboard: </strong> <a href=\"http://192.168.1.181:8787/status\" target=\"_blank\">http://192.168.1.181:8787/status</a>\n",
111 |        "                </td>\n",
112 |        "                <td style=\"text-align: left;\"></td>\n",
113 |        "            </tr>\n",
114 |        "        \n",
115 |        "\n",
116 |        "        </table>\n",
117 |        "\n",
118 |        "        \n",
119 |        "            <details>\n",
120 |        "            <summary style=\"margin-bottom: 20px;\"><h3 style=\"display: inline;\">Scheduler Info</h3></summary>\n",
121 |        "            <div style=\"\">\n",
122 |        "    <div>\n",
123 |        "        <div style=\"width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;\"> </div>\n",
124 |        "        <div style=\"margin-left: 48px;\">\n",
125 |        "            <h3 style=\"margin-bottom: 0px;\">Scheduler</h3>\n",
126 |        "            <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-0bfa43aa-7710-4b6e-968e-fd199ef77b15</p>\n",
127 |        "            <table style=\"width: 100%; text-align: left;\">\n",
128 |        "                <tr>\n",
129 |        "                    <td style=\"text-align: left;\">\n",
130 |        "                        <strong>Comm:</strong> tcp://192.168.1.181:8786\n",
131 |        "                    </td>\n",
132 |        "                    <td style=\"text-align: left;\">\n",
133 |        "                        <strong>Workers:</strong> 3\n",
134 |        "                    </td>\n",
135 |        "                </tr>\n",
136 |        "                <tr>\n",
137 |        "                    <td style=\"text-align: left;\">\n",
138 |        "                        <strong>Dashboard:</strong> <a href=\"http://192.168.1.181:8787/status\" target=\"_blank\">http://192.168.1.181:8787/status</a>\n",
139 |        "                    </td>\n",
140 |        "                    <td style=\"text-align: left;\">\n",
141 |        "                        <strong>Total threads:</strong> 3\n",
142 |        "                    </td>\n",
143 |        "                </tr>\n",
144 |        "                <tr>\n",
145 |        "                    <td style=\"text-align: left;\">\n",
146 |        "                        <strong>Started:</strong> 17 minutes ago\n",
147 |        "                    </td>\n",
148 |        "                    <td style=\"text-align: left;\">\n",
149 |        "                        <strong>Total memory:</strong> 5.66 GiB\n",
150 |        "                    </td>\n",
151 |        "                </tr>\n",
152 |        "            </table>\n",
153 |        "        </div>\n",
154 |        "    </div>\n",
155 |        "\n",
156 |        "    <details style=\"margin-left: 48px;\">\n",
157 |        "        <summary style=\"margin-bottom: 20px;\">\n",
158 |        "            <h3 style=\"display: inline;\">Workers</h3>\n",
159 |        "        </summary>\n",
160 |        "\n",
161 |        "        \n",
162 |        "        <div style=\"margin-bottom: 20px;\">\n",
163 |        "            <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
164 |        "            <div style=\"margin-left: 48px;\">\n",
165 |        "            <details>\n",
166 |        "                <summary>\n",
167 |        "                    <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: tcp://192.168.1.181:45963</h4>\n",
168 |        "                </summary>\n",
169 |        "                <table style=\"width: 100%; text-align: left;\">\n",
170 |        "                    <tr>\n",
171 |        "                        <td style=\"text-align: left;\">\n",
172 |        "                            <strong>Comm: </strong> tcp://192.168.1.181:45963\n",
173 |        "                        </td>\n",
174 |        "                        <td style=\"text-align: left;\">\n",
175 |        "                            <strong>Total threads: </strong> 1\n",
176 |        "                        </td>\n",
177 |        "                    </tr>\n",
178 |        "                    <tr>\n",
179 |        "                        <td style=\"text-align: left;\">\n",
180 |        "                            <strong>Dashboard: </strong> <a href=\"http://192.168.1.181:44003/status\" target=\"_blank\">http://192.168.1.181:44003/status</a>\n",
181 |        "                        </td>\n",
182 |        "                        <td style=\"text-align: left;\">\n",
183 |        "                            <strong>Memory: </strong> 1.89 GiB\n",
184 |        "                        </td>\n",
185 |        "                    </tr>\n",
186 |        "                    <tr>\n",
187 |        "                        <td style=\"text-align: left;\">\n",
188 |        "                            <strong>Nanny: </strong> tcp://192.168.1.181:41851\n",
189 |        "                        </td>\n",
190 |        "                        <td style=\"text-align: left;\"></td>\n",
191 |        "                    </tr>\n",
192 |        "                    <tr>\n",
193 |        "                        <td colspan=\"2\" style=\"text-align: left;\">\n",
194 |        "                            <strong>Local directory: </strong> /tmp/dask-worker-space/worker-ed5x4qpu\n",
195 |        "                        </td>\n",
196 |        "                    </tr>\n",
197 |        "\n",
198 |        "                    \n",
199 |        "\n",
200 |        "                    \n",
201 |        "                    <tr>\n",
202 |        "                        <td style=\"text-align: left;\">\n",
203 |        "                            <strong>Tasks executing: </strong> 0\n",
204 |        "                        </td>\n",
205 |        "                        <td style=\"text-align: left;\">\n",
206 |        "                            <strong>Tasks in memory: </strong> 0\n",
207 |        "                        </td>\n",
208 |        "                    </tr>\n",
209 |        "                    <tr>\n",
210 |        "                        <td style=\"text-align: left;\">\n",
211 |        "                            <strong>Tasks ready: </strong> 0\n",
212 |        "                        </td>\n",
213 |        "                        <td style=\"text-align: left;\">\n",
214 |        "                            <strong>Tasks in flight: </strong>0\n",
215 |        "                        </td>\n",
216 |        "                    </tr>\n",
217 |        "                    <tr>\n",
218 |        "                        <td style=\"text-align: left;\">\n",
219 |        "                            <strong>CPU usage:</strong> 0.0%\n",
220 |        "                        </td>\n",
221 |        "                        <td style=\"text-align: left;\">\n",
222 |        "                            <strong>Last seen: </strong> Just now\n",
223 |        "                        </td>\n",
224 |        "                    </tr>\n",
225 |        "                    <tr>\n",
226 |        "                        <td style=\"text-align: left;\">\n",
227 |        "                            <strong>Memory usage: </strong> 65.81 MiB\n",
228 |        "                        </td>\n",
229 |        "                        <td style=\"text-align: left;\">\n",
230 |        "                            <strong>Spilled bytes: </strong> 0 B\n",
231 |        "                        </td>\n",
232 |        "                    </tr>\n",
233 |        "                    <tr>\n",
234 |        "                        <td style=\"text-align: left;\">\n",
235 |        "                            <strong>Read bytes: </strong> 0.0 B\n",
236 |        "                        </td>\n",
237 |        "                        <td style=\"text-align: left;\">\n",
238 |        "                            <strong>Write bytes: </strong> 0.0 B\n",
239 |        "                        </td>\n",
240 |        "                    </tr>\n",
241 |        "                    \n",
242 |        "\n",
243 |        "                </table>\n",
244 |        "            </details>\n",
245 |        "            </div>\n",
246 |        "        </div>\n",
247 |        "        \n",
248 |        "        <div style=\"margin-bottom: 20px;\">\n",
249 |        "            <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
250 |        "            <div style=\"margin-left: 48px;\">\n",
251 |        "            <details>\n",
252 |        "                <summary>\n",
253 |        "                    <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: tcp://192.168.1.181:46041</h4>\n",
254 |        "                </summary>\n",
255 |        "                <table style=\"width: 100%; text-align: left;\">\n",
256 |        "                    <tr>\n",
257 |        "                        <td style=\"text-align: left;\">\n",
258 |        "                            <strong>Comm: </strong> tcp://192.168.1.181:46041\n",
259 |        "                        </td>\n",
260 |        "                        <td style=\"text-align: left;\">\n",
261 |        "                            <strong>Total threads: </strong> 1\n",
262 |        "                        </td>\n",
263 |        "                    </tr>\n",
264 |        "                    <tr>\n",
265 |        "                        <td style=\"text-align: left;\">\n",
266 |        "                            <strong>Dashboard: </strong> <a href=\"http://192.168.1.181:35259/status\" target=\"_blank\">http://192.168.1.181:35259/status</a>\n",
267 |        "                        </td>\n",
268 |        "                        <td style=\"text-align: left;\">\n",
269 |        "                            <strong>Memory: </strong> 1.89 GiB\n",
270 |        "                        </td>\n",
271 |        "                    </tr>\n",
272 |        "                    <tr>\n",
273 |        "                        <td style=\"text-align: left;\">\n",
274 |        "                            <strong>Nanny: </strong> tcp://192.168.1.181:37555\n",
275 |        "                        </td>\n",
276 |        "                        <td style=\"text-align: left;\"></td>\n",
277 |        "                    </tr>\n",
278 |        "                    <tr>\n",
279 |        "                        <td colspan=\"2\" style=\"text-align: left;\">\n",
280 |        "                            <strong>Local directory: </strong> /tmp/dask-worker-space/worker-blkc5bo0\n",
281 |        "                        </td>\n",
282 |        "                    </tr>\n",
283 |        "\n",
284 |        "                    \n",
285 |        "\n",
286 |        "                    \n",
287 |        "                    <tr>\n",
288 |        "                        <td style=\"text-align: left;\">\n",
289 |        "                            <strong>Tasks executing: </strong> 0\n",
290 |        "                        </td>\n",
291 |        "                        <td style=\"text-align: left;\">\n",
292 |        "                            <strong>Tasks in memory: </strong> 0\n",
293 |        "                        </td>\n",
294 |        "                    </tr>\n",
295 |        "                    <tr>\n",
296 |        "                        <td style=\"text-align: left;\">\n",
297 |        "                            <strong>Tasks ready: </strong> 0\n",
298 |        "                        </td>\n",
299 |        "                        <td style=\"text-align: left;\">\n",
300 |        "                            <strong>Tasks in flight: </strong>0\n",
301 |        "                        </td>\n",
302 |        "                    </tr>\n",
303 |        "                    <tr>\n",
304 |        "                        <td style=\"text-align: left;\">\n",
305 |        "                            <strong>CPU usage:</strong> 0.0%\n",
306 |        "                        </td>\n",
307 |        "                        <td style=\"text-align: left;\">\n",
308 |        "                            <strong>Last seen: </strong> Just now\n",
309 |        "                        </td>\n",
310 |        "                    </tr>\n",
311 |        "                    <tr>\n",
312 |        "                        <td style=\"text-align: left;\">\n",
313 |        "                            <strong>Memory usage: </strong> 65.78 MiB\n",
314 |        "                        </td>\n",
315 |        "                        <td style=\"text-align: left;\">\n",
316 |        "                            <strong>Spilled bytes: </strong> 0 B\n",
317 |        "                        </td>\n",
318 |        "                    </tr>\n",
319 |        "                    <tr>\n",
320 |        "                        <td style=\"text-align: left;\">\n",
321 |        "                            <strong>Read bytes: </strong> 0.0 B\n",
322 |        "                        </td>\n",
323 |        "                        <td style=\"text-align: left;\">\n",
324 |        "                            <strong>Write bytes: </strong> 0.0 B\n",
325 |        "                        </td>\n",
326 |        "                    </tr>\n",
327 |        "                    \n",
328 |        "\n",
329 |        "                </table>\n",
330 |        "            </details>\n",
331 |        "            </div>\n",
332 |        "        </div>\n",
333 |        "        \n",
334 |        "        <div style=\"margin-bottom: 20px;\">\n",
335 |        "            <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
336 |        "            <div style=\"margin-left: 48px;\">\n",
337 |        "            <details>\n",
338 |        "                <summary>\n",
339 |        "                    <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: tcp://192.168.1.181:46179</h4>\n",
340 |        "                </summary>\n",
341 |        "                <table style=\"width: 100%; text-align: left;\">\n",
342 |        "                    <tr>\n",
343 |        "                        <td style=\"text-align: left;\">\n",
344 |        "                            <strong>Comm: </strong> tcp://192.168.1.181:46179\n",
345 |        "                        </td>\n",
346 |        "                        <td style=\"text-align: left;\">\n",
347 |        "                            <strong>Total threads: </strong> 1\n",
348 |        "                        </td>\n",
349 |        "                    </tr>\n",
350 |        "                    <tr>\n",
351 |        "                        <td style=\"text-align: left;\">\n",
352 |        "                            <strong>Dashboard: </strong> <a href=\"http://192.168.1.181:34357/status\" target=\"_blank\">http://192.168.1.181:34357/status</a>\n",
353 |        "                        </td>\n",
354 |        "                        <td style=\"text-align: left;\">\n",
355 |        "                            <strong>Memory: </strong> 1.89 GiB\n",
356 |        "                        </td>\n",
357 |        "                    </tr>\n",
358 |        "                    <tr>\n",
359 |        "                        <td style=\"text-align: left;\">\n",
360 |        "                            <strong>Nanny: </strong> tcp://192.168.1.181:46857\n",
361 |        "                        </td>\n",
362 |        "                        <td style=\"text-align: left;\"></td>\n",
363 |        "                    </tr>\n",
364 |        "                    <tr>\n",
365 |        "                        <td colspan=\"2\" style=\"text-align: left;\">\n",
366 |        "                            <strong>Local directory: </strong> /tmp/dask-worker-space/worker-z52_ld0f\n",
367 |        "                        </td>\n",
368 |        "                    </tr>\n",
369 |        "\n",
370 |        "                    \n",
371 |        "\n",
372 |        "                    \n",
373 |        "                    <tr>\n",
374 |        "                        <td style=\"text-align: left;\">\n",
375 |        "                            <strong>Tasks executing: </strong> 0\n",
376 |        "                        </td>\n",
377 |        "                        <td style=\"text-align: left;\">\n",
378 |        "                            <strong>Tasks in memory: </strong> 0\n",
379 |        "                        </td>\n",
380 |        "                    </tr>\n",
381 |        "                    <tr>\n",
382 |        "                        <td style=\"text-align: left;\">\n",
383 |        "                            <strong>Tasks ready: </strong> 0\n",
384 |        "                        </td>\n",
385 |        "                        <td style=\"text-align: left;\">\n",
386 |        "                            <strong>Tasks in flight: </strong>0\n",
387 |        "                        </td>\n",
388 |        "                    </tr>\n",
389 |        "                    <tr>\n",
390 |        "                        <td style=\"text-align: left;\">\n",
391 |        "                            <strong>CPU usage:</strong> 0.0%\n",
392 |        "                        </td>\n",
393 |        "                        <td style=\"text-align: left;\">\n",
394 |        "                            <strong>Last seen: </strong> Just now\n",
395 |        "                        </td>\n",
396 |        "                    </tr>\n",
397 |        "                    <tr>\n",
398 |        "                        <td style=\"text-align: left;\">\n",
399 |        "                            <strong>Memory usage: </strong> 65.83 MiB\n",
400 |        "                        </td>\n",
401 |        "                        <td style=\"text-align: left;\">\n",
402 |        "                            <strong>Spilled bytes: </strong> 0 B\n",
403 |        "                        </td>\n",
404 |        "                    </tr>\n",
405 |        "                    <tr>\n",
406 |        "                        <td style=\"text-align: left;\">\n",
407 |        "                            <strong>Read bytes: </strong> 0.0 B\n",
408 |        "                        </td>\n",
409 |        "                        <td style=\"text-align: left;\">\n",
410 |        "                            <strong>Write bytes: </strong> 0.0 B\n",
411 |        "                        </td>\n",
412 |        "                    </tr>\n",
413 |        "                    \n",
414 |        "\n",
415 |        "                </table>\n",
416 |        "            </details>\n",
417 |        "            </div>\n",
418 |        "        </div>\n",
419 |        "        \n",
420 |        "\n",
421 |        "    </details>\n",
422 |        "</div>\n",
423 |        "            </details>\n",
424 |        "        \n",
425 |        "\n",
426 |        "    </div>\n",
427 |        "</div>"
428 |       ],
429 |       "text/plain": [
430 |        "<Client: 'tcp://192.168.1.181:8786' processes=0 threads=0, memory=0 B>"
431 |       ]
432 |      },
433 |      "execution_count": 7,
434 |      "metadata": {},
435 |      "output_type": "execute_result"
436 |     }
437 |    ],
438 |    "source": [
439 |     "client.restart()"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": 8,
445 |    "metadata": {},
446 |    "outputs": [
447 |     {
448 |      "name": "stderr",
449 |      "output_type": "stream",
450 |      "text": [
451 |       "2023-01-03 10:52:42,485,485 INFO: Created 1 queues in Cluster and one QueueManager.\n"
452 |      ]
453 |     },
454 |     {
455 |      "name": "stdout",
456 |      "output_type": "stream",
457 |      "text": [
458 |       "Write ops \u001b[32m6234.22 wop/s\u001b[0m\n"
459 |      ]
460 |     }
461 |    ],
462 |    "source": [
463 |     "N = 10_000\n",
464 |     "n_queues = 1\n",
465 |     "n_consumers = 1\n",
466 |     "\n",
467 |     "with tempfile.TemporaryDirectory() as tmpdirname:\n",
468 |     "    s = perf_counter()\n",
469 |     "    queue_pool = QueuePool(\n",
470 |     "        client, n_queues, durability=Durability.DURABLE, dirpath=str(tmpdirname)\n",
471 |     "    )\n",
472 |     "\n",
473 |     "    _ = queue_pool.batch_submit([(no_func,) for _ in range(N)])\n",
474 |     "    e = perf_counter()\n",
475 |     "    bs_ops = N / (e - s)\n",
476 |     "\n",
477 |     "cprint(f\"Write ops \" + gprint(f\"{bs_ops:.2f} wop/s\"))"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": 9,
483 |    "metadata": {},
484 |    "outputs": [
485 |     {
486 |      "name": "stderr",
487 |      "output_type": "stream",
488 |      "text": [
489 |       "2023-01-03 10:52:45,407,407 INFO: Starting 1 consumers\n",
490 |       "2023-01-03 10:52:45,408,408 INFO: Waiting for the 1 consumers to process all items in queue_pool...\n",
491 |       "2023-01-03 10:52:50,093,93 INFO: All consumers are done ! 10000 items processed. \n",
492 |       "2023-01-03 10:52:50,094,94 INFO: Cancelling 1 consumers.\n"
493 |      ]
494 |     },
495 |     {
496 |      "name": "stdout",
497 |      "output_type": "stream",
498 |      "text": [
499 |       "Read ops \u001b[32m6234.22 r_op/s\u001b[0m\n"
500 |      ]
501 |     },
502 |     {
503 |      "name": "stderr",
504 |      "output_type": "stream",
505 |      "text": [
506 |       "2023-01-03 11:14:45,083 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client\n"
507 |      ]
508 |     }
509 |    ],
510 |    "source": [
511 |     "consumer_pool = ConsumerPool(client, queue_pool, n_consumers=n_consumers)\n",
512 |     "\n",
513 |     "s = perf_counter()\n",
514 |     "consumer_pool.start()\n",
515 |     "consumer_pool.join(timestep=0.1, progress=False)\n",
516 |     "consumer_pool.results()\n",
517 |     "\n",
518 |     "e = perf_counter()\n",
519 |     "\n",
520 |     "rps = N / (e - s)  # op/s\n",
521 |     "\n",
522 |     "cprint(f\"Read ops \" + gprint(f\"{bs_ops:.2f} r_op/s\"))"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "markdown",
527 |    "metadata": {},
528 |    "source": [
529 |     "## Test Counter Actor"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": 12,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "N = 1_000_000"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": 14,
544 |    "metadata": {},
545 |    "outputs": [],
546 |    "source": [
547 |     "class Counter:\n",
548 |     "    n = 0\n",
549 |     "\n",
550 |     "    def __init__(self):\n",
551 |     "        self.n = 0\n",
552 |     "\n",
553 |     "    def increment(self):\n",
554 |     "        self.n += 1\n",
555 |     "        return self.n\n",
556 |     "    def add(self, x):\n",
557 |     "        self.n += x\n",
558 |     "        return self.n\n",
559 |     "\n",
560 |     "future = client.submit(Counter, actor=True)  # Create a Counter on a worker\n",
561 |     "counter = future.result()  \n"
562 |    ]
563 |   },
564 |   {
565 |    "cell_type": "code",
566 |    "execution_count": 15,
567 |    "metadata": {},
568 |    "outputs": [],
569 |    "source": [
570 |     "s = perf_counter()\n",
571 |     "for _ in range(N):\n",
572 |     "    future = counter.increment()                 # Call remote method\n",
573 |     "    # res = future.result() \n",
574 |     "e = perf_counter()\n",
575 |     "wops = N / (e - s)"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": 9,
581 |    "metadata": {},
582 |    "outputs": [
583 |     {
584 |      "name": "stdout",
585 |      "output_type": "stream",
586 |      "text": [
587 |       "41\n",
588 |       "51\n",
589 |       "38\n"
590 |      ]
591 |     }
592 |    ],
593 |    "source": [
594 |     "import random\n",
595 |     "while True:\n",
596 |     "    try : \n",
597 |     "        x = random.randint(0,100)\n",
598 |     "        print(x)\n",
599 |     "        if x < 40:\n",
600 |     "            raise ValueError(\"error\")\n",
601 |     "    except ValueError:\n",
602 |     "        break\n",
603 |     "j"
604 |    ]
605 |   },
606 |   {
607 |    "cell_type": "code",
608 |    "execution_count": 10,
609 |    "metadata": {},
610 |    "outputs": [],
611 |    "source": [
612 |     "import itertools\n",
613 |     "from typing import Generator, Iterable\n",
614 |     "from daskqueue.Protocol import Message\n",
615 |     "\n",
616 |     "def msg_grouper(n: int, iterable: Iterable, **kwargs) -> Generator:\n",
617 |     "    it = iter(iterable)\n",
618 |     "    while True:\n",
619 |     "        chunk = tuple(itertools.islice(it, n))\n",
620 |     "        if not chunk:\n",
621 |     "            return\n",
622 |     "        yield [Message(func, *args, **kwargs) for func, *args in chunk]\n"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "code",
627 |    "execution_count": 34,
628 |    "metadata": {},
629 |    "outputs": [],
630 |    "source": [
631 |     "batch_size = 10\n",
632 |     "f = lambda: None\n",
633 |     "N = 100000\n",
634 |     "list_calls = [(f,) for _ in range(N)]\n",
635 |     "s = 0\n",
636 |     "for msgs in msg_grouper(10000, list_calls):\n",
637 |     "    s +=len(msgs)"
638 |    ]
639 |   },
640 |   {
641 |    "cell_type": "code",
642 |    "execution_count": 35,
643 |    "metadata": {},
644 |    "outputs": [
645 |     {
646 |      "data": {
647 |       "text/plain": [
648 |        "100000"
649 |       ]
650 |      },
651 |      "execution_count": 35,
652 |      "metadata": {},
653 |      "output_type": "execute_result"
654 |     }
655 |    ],
656 |    "source": [
657 |     "s"
658 |    ]
659 |   },
660 |   {
661 |    "cell_type": "code",
662 |    "execution_count": null,
663 |    "metadata": {},
664 |    "outputs": [],
665 |    "source": []
666 |   }
667 |  ],
668 |  "metadata": {
669 |   "kernelspec": {
670 |    "display_name": "Python 3.10.8 ('env': venv)",
671 |    "language": "python",
672 |    "name": "python3"
673 |   },
674 |   "language_info": {
675 |    "codemirror_mode": {
676 |     "name": "ipython",
677 |     "version": 3
678 |    },
679 |    "file_extension": ".py",
680 |    "mimetype": "text/x-python",
681 |    "name": "python",
682 |    "nbconvert_exporter": "python",
683 |    "pygments_lexer": "ipython3",
684 |    "version": "3.10.8"
685 |   },
686 |   "orig_nbformat": 4,
687 |   "vscode": {
688 |    "interpreter": {
689 |     "hash": "8f4f5eb83f277e7163073597761add89177199da99666b44ebf83757865b02d2"
690 |    }
691 |   }
692 |  },
693 |  "nbformat": 4,
694 |  "nbformat_minor": 2
695 | }
696 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_log_append.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import time
 4 | 
 5 | import numpy as np
 6 | from tqdm import tqdm
 7 | 
 8 | from daskqueue.Protocol import Message
 9 | from daskqueue.segment.index_segment import IndexSegment, MessageStatus
10 | from daskqueue.segment.log_segment import LogAccess, LogSegment
11 | 
12 | N = 10_000
13 | N_TEST = 10
14 | MAX_BYTES = 100 * int(1e6)  # 100 MB
15 | 
16 | 
17 | def rdx_msg():
18 |     func = lambda x: x + 2
19 |     msg = Message(func, 12)
20 |     return msg
21 | 
22 | 
23 | def log_segment(tmpdir):
24 |     seg_name = str(0).rjust(20, "0") + ".log"
25 |     seg_path = os.path.join(tmpdir, seg_name)
26 | 
27 |     return LogSegment(seg_path, LogAccess.RW, MAX_BYTES)
28 | 
29 | 
30 | def index_segment(tmpdir):
31 |     name = str(0).rjust(10, "0") + ".index"
32 |     seg_path = os.path.join(tmpdir, name)
33 |     return IndexSegment(seg_path, MAX_BYTES)
34 | 
35 | 
36 | def write_log(N: int, idx: IndexSegment, log: LogSegment):
37 |     s = time.perf_counter()
38 |     size = 0
39 |     for _ in range(N):
40 |         msg = rdx_msg()
41 |         offset = log.append(msg)
42 |         size += offset.size
43 |         idx.append(msg.id, MessageStatus.READY, offset)
44 |     e = time.perf_counter()
45 | 
46 |     wps = N / (e - s)  # op/s
47 |     w_mbps = size / (e - s) / (1e6)  # mbs
48 | 
49 |     log.read_only()
50 |     idx.close()
51 |     return w_mbps, wps
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     w_ops = []
56 |     w_mbps = []
57 | 
58 |     for i in tqdm(range(N_TEST)):
59 |         with tempfile.TemporaryDirectory() as tmpdirname:
60 |             if i == 0:
61 |                 print("Temp dir :", tmpdirname)
62 |             log = log_segment(tmpdirname)
63 |             idx = index_segment(tmpdirname)
64 |             t_wmbps, t_wops = write_log(N, idx, log)
65 |             w_ops.append(t_wops)
66 |             w_mbps.append(t_wmbps)
67 | 
68 |     w_ops = np.array(w_ops)
69 |     w_mbps = np.array(w_mbps)
70 | 
71 |     print(f"Mean write ops [{N_TEST}tests] {w_ops.mean():.2f} wop/s")
72 |     print(f"Mean write mps [{N_TEST}tests] {w_mbps.mean():.2f} mb/s")
73 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_nb_tasks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmarking 1_000_000 tasks using :
 3 |     - 2 nodes
 4 |     - 1 thread per process
 5 |     - 4 queues
 6 |     - 8 consumers
 7 | The tasks were chunked using  into 1000 calls of 1000 tasks per batch
 8 | The client submits to the QueuePool manager using
 9 | The function is 'empty' : just passes and doesn't use CPU or IO
10 | 
11 | Processing 1_000_000 empty tasks took 338s = 5min36s
12 | """
13 | 
14 | import time
15 | from concurrent.futures import ThreadPoolExecutor
16 | from functools import partial
17 | from typing import Callable, Tuple
18 | 
19 | from daskqueue import ConsumerPool, QueuePool
20 | from daskqueue.Protocol import Message
21 | from daskqueue.utils import logger
22 | from distributed import Client
23 | 
24 | 
25 | def void_func():
26 |     pass
27 | 
28 | 
29 | def process_item():
30 |     return sum(i * i for i in range(10**5))
31 | 
32 | 
33 | def batch_submit(queue_pool, func: Callable):
34 |     # queue_pool.submit(func, *args)
35 |     queue_pool.batch_submit([(func,) for _ in range(100)])
36 | 
37 | 
38 | if __name__ == "__main__":
39 | 
40 |     client = Client(
41 |         address="tcp://192.168.1.92:8786"
42 |         # n_workers=6,
43 |         # threads_per_worker=1,
44 |         # dashboard_address=":3338",
45 |         # direct_to_workers=True,
46 |         # n_workers=6,
47 |         # threads_per_worker=1,
48 |         # dashboard_address=":3338",
49 |         # direct_to_workers=True,
50 |     )
51 | 
52 |     client.restart()
53 |     logger.info("Cluster restarted.")
54 | 
55 |     ## Params
56 |     n_queues = 4
57 |     n_consumers = 8
58 |     n_calls = 1_000
59 |     chunk = 1000
60 | 
61 |     queue_pool = QueuePool(client, n_queues)
62 | 
63 |     consumer_pool = ConsumerPool(client, queue_pool, n_consumers=n_consumers)
64 | 
65 |     tic = time.perf_counter()
66 |     consumer_pool.start()
67 | 
68 |     submit_func = partial(batch_submit, queue_pool)
69 | 
70 |     with ThreadPoolExecutor(max_workers=20) as executor:
71 |         _ = executor.map(submit_func, [void_func for _ in range(n_calls)])
72 | 
73 |     consumer_pool.join(timestep=1, progress=True)
74 |     toc = time.perf_counter()
75 | 
76 |     print(f"Processed all {n_calls*chunk} in  {toc - tic:0.4f} seconds")
77 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_nb_tasks_dask.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmarking 1_000_000 tasks using :
 3 |     - 2 nodes
 4 |     - 1 thread per process
 5 |     - 4 queues
 6 |     - 8 consumers
 7 | You can't submit 1_000_000 tasks using client.submit.
 8 | We use launching  tasks from tasks : submit 1000 tasks each spawned 1000 task.
 9 | The function is 'empty' : just passes and doesn't use CPU or IO
10 | 
11 | The scheduler came to a halt !
12 | """
13 | 
14 | import time
15 | from concurrent.futures import ThreadPoolExecutor
16 | from functools import partial
17 | from typing import Callable, Tuple
18 | 
19 | from daskqueue import ConsumerPool, QueuePool
20 | from daskqueue.Protocol import Message
21 | from daskqueue.utils import logger
22 | from distributed import Client, worker_client
23 | 
24 | 
25 | def void_func():
26 |     pass
27 | 
28 | 
29 | def process_item():
30 |     return sum(i * i for i in range(10**5))
31 | 
32 | 
33 | def batch_submit(chunk):
34 |     res = []
35 |     with worker_client() as client:
36 |         for _ in range(chunk):
37 |             res.append(client.submit(void_func, pure=False))
38 | 
39 |         return client.gather(res)
40 | 
41 | 
42 | if __name__ == "__main__":
43 | 
44 |     client = Client(
45 |         address="tcp://192.168.1.92:8786"
46 |         # n_workers=6,
47 |         # threads_per_worker=1,
48 |         # dashboard_address=":3338",
49 |         # direct_to_workers=True,
50 |         # n_workers=6,
51 |         # threads_per_worker=1,
52 |         # dashboard_address=":3338",
53 |         # direct_to_workers=True,
54 |     )
55 | 
56 |     client.restart()
57 |     logger.info("Cluster restarted.")
58 | 
59 |     ## Params
60 |     n_queues = 4
61 |     n_consumers = 8
62 |     n_calls = 1_000
63 |     chunk = 1000
64 | 
65 |     tic = time.perf_counter()
66 |     futures = []
67 | 
68 |     for _ in range(n_calls):
69 |         futures.append(client.submit(batch_submit, chunk, pure=False))
70 | 
71 |     res = client.gather(futures)
72 |     toc = time.perf_counter()
73 | 
74 |     print(f"Processed all {n_calls*chunk} in  {toc - tic:0.4f} seconds")
75 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_queue.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from time import perf_counter
 3 | 
 4 | import click
 5 | import numpy as np
 6 | from tqdm import tqdm
 7 | 
 8 | from daskqueue.Protocol import Message
 9 | from daskqueue.queue import BaseQueue, DurableQueue, TransientQueue
10 | 
11 | N = 10_000
12 | N_TEST = 10
13 | MAX_BYTES = 100 * int(1e6)  # 100 MB
14 | 
15 | 
16 | cprint = click.echo
17 | gprint = lambda s: click.style(s, fg="green")
18 | 
19 | 
20 | def func(x):
21 |     return x + 2
22 | 
23 | 
24 | def rdx_msg():
25 |     msg = Message(func, 12)
26 |     return msg
27 | 
28 | 
29 | def read_write_queue(N: int, queue: BaseQueue):
30 |     s = perf_counter()
31 |     for _ in range(N):
32 |         msg = rdx_msg()
33 |         queue.put_sync(msg)
34 |     e = perf_counter()
35 |     wps = N / (e - s)  # op/s
36 | 
37 |     assert queue.qsize() == N
38 | 
39 |     s = perf_counter()
40 |     for _ in range(N):
41 |         _ = queue.get_sync()
42 |     e = perf_counter()
43 |     rps = N / (e - s)  # op/s
44 | 
45 |     return wps, rps
46 | 
47 | 
48 | @click.command()
49 | @click.option("--durable/--transient", default=False)
50 | def bench(durable):
51 |     w_ops = []
52 |     r_ops = []
53 | 
54 |     for i in tqdm(range(N_TEST)):
55 |         with tempfile.TemporaryDirectory() as tmpdirname:
56 | 
57 |             if durable:
58 |                 queue = DurableQueue(name="queue-0", dirpath=str(tmpdirname))
59 |             else:
60 |                 queue = TransientQueue()
61 | 
62 |             t_wops, t_rops = read_write_queue(N, queue)
63 | 
64 |             w_ops.append(t_wops)
65 |             r_ops.append(t_rops)
66 | 
67 |     w_ops = np.array(w_ops)
68 |     r_ops = np.array(r_ops)
69 | 
70 |     cprint(f"Mean write ops [{N_TEST}tests] " + gprint(f"{w_ops.mean():.2f} wop/s"))
71 |     cprint(f"Mean read ops [{N_TEST}tests] " + gprint(f"{r_ops.mean():.2f} rop/s"))
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     bench()
76 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_queuepool.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | from time import perf_counter, sleep
  3 | 
  4 | import click
  5 | import numpy as np
  6 | from distributed import Client, LocalCluster
  7 | 
  8 | from conftest import func
  9 | from daskqueue import ConsumerPool, Durability, QueuePool
 10 | 
 11 | N_TEST = 1
 12 | MAX_BYTES = 100 * int(1e6)  # 100 MB
 13 | 
 14 | 
 15 | cprint = click.echo
 16 | gprint = lambda s: click.style(s, fg="green")
 17 | 
 18 | 
 19 | def read_write_benchmark(
 20 |     client: Client,
 21 |     dirpath: str,
 22 |     N: int,
 23 |     n_queues: int,
 24 |     n_consumers: int,
 25 |     durability: bool,
 26 |     progress: bool,
 27 |     sync: bool,
 28 |     early_ack: bool,
 29 | ):
 30 | 
 31 |     if durability:
 32 |         queue_pool = QueuePool(
 33 |             client, n_queues, durability=Durability.DURABLE, dirpath=str(dirpath)
 34 |         )
 35 |     else:
 36 | 
 37 |         queue_pool = QueuePool(client, n_queues)
 38 | 
 39 |     consumer_pool = ConsumerPool(
 40 |         client, queue_pool, n_consumers=n_consumers, batch_size=100, early_ack=early_ack
 41 |     )
 42 | 
 43 |     s = perf_counter()
 44 | 
 45 |     queue_pool.batch_submit([(func, 3) for _ in range(N)], sync_mode=sync)
 46 | 
 47 |     # for _ in range(N):
 48 |     #     queue_pool.submit(func_no_return)
 49 |     e = perf_counter()
 50 | 
 51 |     while sum(list(queue_pool.get_queue_size().values())) != N:
 52 |         sleep(0.1)
 53 | 
 54 |     wops = N / (e - s)
 55 | 
 56 |     s = perf_counter()
 57 |     consumer_pool.start()
 58 |     consumer_pool.join(timestep=0.01, progress=progress)
 59 |     consumer_pool.results()
 60 |     e = perf_counter()
 61 |     rps = N / (e - s)  # op/s
 62 | 
 63 |     return wops, rps
 64 | 
 65 | 
 66 | @click.command("cli", context_settings={"show_default": True})
 67 | @click.option("--durable/--transient", default=False)
 68 | @click.option("--early/--late", default=True)
 69 | @click.option("-v/-q", "--verbose", default=False, help="show queue/consumer progress")
 70 | @click.option("-N", "--ntasks", default=10000, help="Number of tasks to send.")
 71 | @click.option("--nqueues", default=1, help="Number of queue actors ")
 72 | @click.option("--nconsumers", default=1, help="Number of consumer actors.")
 73 | @click.option(
 74 |     "--sync/--async",
 75 |     default=True,
 76 |     help="Batch submission is asynchronous or synchrounous.",
 77 | )
 78 | def bench(durable, verbose, ntasks, nqueues, nconsumers, sync, early):
 79 |     w_ops = []
 80 |     r_ops = []
 81 | 
 82 |     cluster = LocalCluster(
 83 |         n_workers=10,
 84 |         threads_per_worker=1,
 85 |         dashboard_address=":3338",
 86 |         worker_dashboard_address=":0",
 87 |     )
 88 |     client = Client(cluster, direct_to_workers=True)
 89 | 
 90 |     with tempfile.TemporaryDirectory() as tmpdirname:
 91 |         t_wops, t_rops = read_write_benchmark(
 92 |             client,
 93 |             tmpdirname,
 94 |             ntasks,
 95 |             nqueues,
 96 |             nconsumers,
 97 |             durable,
 98 |             verbose,
 99 |             sync,
100 |             early,
101 |         )
102 |         w_ops.append(t_wops)
103 |         r_ops.append(t_rops)
104 | 
105 |     w_ops = np.array(w_ops)
106 |     r_ops = np.array(r_ops)
107 | 
108 |     cprint(f"Mean write ops [{N_TEST}tests] " + gprint(f"{w_ops.mean():.2f} wop/s"))
109 |     cprint(f"Mean read ops [{N_TEST}tests] " + gprint(f"{r_ops.mean():.2f} rop/s"))
110 | 
111 |     client.close()
112 |     cluster.close()
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     bench()
117 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_throughput.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Benchmarking 1_000_000 tasks using :
  3 |     - 2 nodes
  4 |     - 1 thread per process
  5 |     - 4 queues
  6 |     - 8 consumers
  7 | The tasks were chunked using  into 1000 calls of 1000 tasks per batch
  8 | The client submits to the QueuePool manager using
  9 | The function is 'empty' : just passes and doesn't use CPU or IO
 10 | 
 11 | Processing 1_000_000 empty tasks took 338s = 5min36s
 12 | """
 13 | 
 14 | import itertools
 15 | import os
 16 | import time
 17 | from typing import Callable
 18 | 
 19 | import pandas as pd
 20 | from daskqueue import ConsumerPool, QueuePool
 21 | import daskqueue
 22 | from daskqueue.utils import logger
 23 | from distributed import Client, LocalCluster
 24 | from datetime import datetime
 25 | 
 26 | 
 27 | def save_results(params):
 28 |     result_path = "/home/amine/Documents/programming/dask-queue/benchmark/results.csv"
 29 |     if os.path.exists(result_path):
 30 |         df = pd.read_csv(result_path)
 31 |         df = df.append(params, ignore_index=True)
 32 |         df.to_csv(result_path, index=False)
 33 |     else:
 34 |         df = pd.DataFrame([params])
 35 |         df.to_csv(result_path, index=False)
 36 | 
 37 | 
 38 | def batch_submit(queue_pool, func: Callable):
 39 |     # queue_pool.submit(func, *args)
 40 |     queue_pool.batch_submit([(func,) for _ in range(100)])
 41 | 
 42 | 
 43 | def slowinc(x, delay=0.1):
 44 |     time.sleep(delay)
 45 |     return x + 1
 46 | 
 47 | 
 48 | def run(params):
 49 |     cluster = LocalCluster(
 50 |         n_workers=4,
 51 |         threads_per_worker=params["thread_per_consumer"],
 52 |         dashboard_address=":3338",
 53 |     )
 54 |     client = Client(
 55 |         cluster,
 56 |         direct_to_workers=True,
 57 |     )
 58 | 
 59 |     queue_pool = QueuePool(client, params["n_queues"])
 60 | 
 61 |     consumer_pool = ConsumerPool(
 62 |         client,
 63 |         queue_pool,
 64 |         n_consumers=params["n_consumers"],
 65 |         max_concurrency=params["max_concurrency"],
 66 |     )
 67 | 
 68 |     tic = time.perf_counter()
 69 |     consumer_pool.start(timeout=1)
 70 | 
 71 |     for _ in range(params["n_calls"]):
 72 |         queue_pool.submit(slowinc, 1, params["task_duration"])
 73 | 
 74 |     consumer_pool.join(timestep=0.001)
 75 |     toc = time.perf_counter()
 76 | 
 77 |     print(f"Processed all items in  {toc - tic:0.4f} seconds")
 78 | 
 79 |     params["job_time"] = toc - tic
 80 |     params["version"] = str(daskqueue.__version__)
 81 |     params["timestamp"] = datetime.now()
 82 | 
 83 |     save_results(params)
 84 |     client.close()
 85 |     cluster.close()
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     ## Params
 90 |     n_queues = [1, 2]
 91 |     n_consumers = [10]
 92 |     thread_per_consumer = [1]
 93 |     max_concurrency = [1]
 94 |     task_duration = [0.1]
 95 |     task_release_GIL = [False]
 96 |     n_calls = [10000, 20000]
 97 | 
 98 |     list = [
 99 |         n_queues,
100 |         n_consumers,
101 |         thread_per_consumer,
102 |         max_concurrency,
103 |         task_duration,
104 |         task_release_GIL,
105 |         n_calls,
106 |     ]
107 | 
108 |     combinations = [p for p in itertools.product(*list)]
109 |     combination = pd.DataFrame(
110 |         combinations,
111 |         columns=(
112 |             "n_queues",
113 |             "n_consumers",
114 |             "thread_per_consumer",
115 |             "max_concurrency",
116 |             "task_duration",
117 |             "task_release_GIL",
118 |             "n_calls",
119 |         ),
120 |     )
121 |     df = combination[(combination.n_queues <= combination.n_consumers)]
122 |     df = df[((df.n_calls >= 1000) & (df.n_consumers > 2)) | ((df.n_calls < 1000))]
123 | 
124 |     for _, params in df.iterrows():
125 |         logger.info(f"Run params :{params}")
126 |         run(params=params)
127 | 


--------------------------------------------------------------------------------
/benchmark/profile_durable_queue.py:
--------------------------------------------------------------------------------
 1 | import cProfile
 2 | import tempfile
 3 | 
 4 | from daskqueue.Protocol import Message
 5 | from daskqueue.queue import DurableQueue
 6 | 
 7 | N = 10_000
 8 | N_TEST = 10
 9 | MAX_BYTES = 100 * int(1e6)  # 100 MB
10 | 
11 | 
12 | func = lambda x: x + 2
13 | 
14 | 
15 | def rdx_msg():
16 |     msg = Message(func, 12)
17 |     return msg
18 | 
19 | 
20 | def read_write_queue(queue, msg):
21 |     queue.put_sync(msg)
22 |     pop_msg = queue.get_sync()
23 |     return pop_msg
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     with tempfile.TemporaryDirectory() as tmpdirname:
28 |         queue = DurableQueue(name="queue-0", dirpath=str(tmpdirname))
29 |         msg = rdx_msg()
30 | 
31 |         cProfile.run("read_write_queue(queue,msg)", filename="benchmark/queue.prof")
32 | 


--------------------------------------------------------------------------------
/benchmark/profile_log_append.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from daskqueue.Protocol import Message
 5 | from daskqueue.segment.index_segment import IndexSegment, MessageStatus
 6 | from daskqueue.segment.log_segment import LogAccess, LogSegment
 7 | 
 8 | N = 10_000
 9 | N_TEST = 10
10 | MAX_BYTES = 100 * int(1e6)  # 100 MB
11 | 
12 | 
13 | func = lambda x: x + 2
14 | 
15 | 
16 | def rdx_msg():
17 |     msg = Message(func, 12)
18 |     return msg
19 | 
20 | 
21 | def log_segment(tmpdir):
22 |     seg_name = str(0).rjust(20, "0") + ".log"
23 |     seg_path = os.path.join(tmpdir, seg_name)
24 | 
25 |     return LogSegment(seg_path, LogAccess.RW, MAX_BYTES)
26 | 
27 | 
28 | def index_segment(tmpdir):
29 |     name = str(0).rjust(10, "0") + ".index"
30 |     seg_path = os.path.join(tmpdir, name)
31 |     return IndexSegment(seg_path, MAX_BYTES)
32 | 
33 | 
34 | def write_log(N: int, idx: IndexSegment, log: LogSegment):
35 |     msg = rdx_msg()
36 |     offset = log.append(msg)
37 |     idx.append(msg.id, MessageStatus.READY, offset)
38 |     log.read_only()
39 |     idx.close()
40 | 
41 | 
42 | def read_log(N: int, idx: IndexSegment, log: LogSegment):
43 | 
44 |     log.read_only()
45 |     idx.close()
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     with tempfile.TemporaryDirectory() as tmpdirname:
50 |         print("Temp dir :", tmpdirname)
51 |         log = log_segment(tmpdirname)
52 |         idx = index_segment(tmpdirname)
53 |         write_log(N, idx, log)
54 | 


--------------------------------------------------------------------------------
/benchmark/queue.prof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/benchmark/queue.prof


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import pytest
 4 | from distributed.utils_test import (  # noqa
 5 |     cleanup,
 6 |     client,
 7 |     cluster_fixture,
 8 |     gen_cluster,
 9 |     loop,
10 | )
11 | 
12 | from daskqueue.Protocol import Message
13 | from daskqueue.queue.durable_queue import DurableQueue
14 | from daskqueue.segment.index_segment import IndexSegment
15 | from daskqueue.segment.log_segment import LogAccess, LogSegment
16 | 
17 | MAX_BYTES = 100 * 1024  # 100KB
18 | 
19 | 
20 | def sleep_func(t):
21 |     time.sleep(t)
22 |     return t
23 | 
24 | 
25 | # def func(x):
26 | #     return x + 2
27 | 
28 | func = lambda x: x + 2
29 | 
30 | 
31 | @pytest.fixture
32 | def msg():
33 |     msg = Message(func, 12)
34 |     return msg
35 | 
36 | 
37 | @pytest.fixture
38 | def log_segment(tmp_path):
39 |     seg_name = str(0).rjust(20, "0") + ".log"
40 |     seg_path = tmp_path / seg_name
41 | 
42 |     seg = LogSegment(seg_path, LogAccess.RW, MAX_BYTES)
43 |     return seg
44 | 
45 | 
46 | @pytest.fixture
47 | def index_segment(tmp_path):
48 |     name = str(0).rjust(10, "0") + ".index"
49 |     index_path = tmp_path / name
50 |     idx = IndexSegment(index_path)
51 |     return idx
52 | 
53 | 
54 | @pytest.fixture
55 | def durable_queue(tmp_path):
56 |     return DurableQueue(name="queue-0", dirpath=str(tmp_path))
57 | 


--------------------------------------------------------------------------------
/daskconfig.yaml:
--------------------------------------------------------------------------------
 1 | distributed:
 2 |   comm:
 3 |     retry:
 4 |       count: 100
 5 |       delay:
 6 |         max: 400s
 7 |     timeouts:
 8 |       connect: 5000s # time before connecting fails
 9 |       tcp: 10000s # time before calling an unresponsive connection dead
10 | 
11 |   worker:
12 |     multiprocessing-method: spawn
13 |     use-file-locking: False
14 |     memory:
15 |     #   rebalance:
16 |     #     measure: managed_in_memory
17 |       spill: false
18 |       pause: false
19 |       terminate: false
20 |   # nanny:
21 |   #   environ:
22 |   #     # NOTE: Releases memory back to the OS, check this for more info:
23 |   #     # https://distributed.dask.org/en/latest/worker.html#memory-not-released-back-to-the-os
24 |   #     MALLOC_TRIM_THRESHOLD_: 0
25 | 
26 | logging:
27 |   distributed: error
28 |   distributed.client: error
29 |   bokeh: error


--------------------------------------------------------------------------------
/daskqueue/ActorPool.py:
--------------------------------------------------------------------------------
  1 | """THis class is heavily inspired from the Ray's ActorPool.
  2 |  It tries to mirror how ray reimplements the map interface to loadbalance across Actors.
  3 | 
  4 | NOTE : Under heavy development !
  5 | """
  6 | 
  7 | # from actors.dask_actors import QueueActor
  8 | from typing import Any, Callable, List
  9 | 
 10 | from distributed import wait
 11 | 
 12 | from .queue.transient_queue import TransientQueue
 13 | 
 14 | 
 15 | class ActorPool:
 16 |     """Utility class to operate on a fixed pool of actors.
 17 | 
 18 |     Arguments:
 19 |         actors (list): List of DaskActors to use in this pool.
 20 |     """
 21 | 
 22 |     def __init__(self, actors: List[TransientQueue]):
 23 |         # actors to be used
 24 |         self._idle_actors = list(actors)
 25 | 
 26 |         # get actor from future
 27 |         self._future_to_actor = {}
 28 | 
 29 |         self._future_key = 0
 30 | 
 31 |         # get future from index
 32 |         self._index_to_future = {}
 33 | 
 34 |         # next task to do
 35 |         self._next_task_index = 0
 36 | 
 37 |         # next task to return
 38 |         self._next_return_index = 0
 39 | 
 40 |         # next work depending when actors free
 41 |         self._pending_submits = []
 42 | 
 43 |     def map(self, fn: Callable, values: Any = None):
 44 |         """Apply the given function in parallel over the actors and values.
 45 | 
 46 |         This returns an ordered iterator that will return results of the map
 47 |         as they finish. Note that you must iterate over the iterator to force
 48 |         the computation to finish.
 49 | 
 50 |         Arguments:
 51 |             fn (func): Function that takes (actor, value) as argument and
 52 |                 returns an ObjectRef computing the result over the value. The
 53 |                 actor will be considered busy until the ObjectRef completes.
 54 |             values (list): List of values that fn(actor, value) should be
 55 |                 applied to.
 56 | 
 57 |         Returns:
 58 |             Iterator over results from applying fn to the actors and values.
 59 | 
 60 |         """
 61 |         # Ignore/Cancel all the previous submissions
 62 |         # by calling `has_next` and `gen_next` repeteadly.
 63 |         while self.has_next():
 64 |             try:
 65 |                 self.get_next(timeout=0)
 66 |             except TimeoutError:
 67 |                 pass
 68 | 
 69 |         if values is not None:
 70 |             for v in values:
 71 |                 self.submit(fn, v)
 72 |         else:
 73 |             self.submit(fn)
 74 | 
 75 |         result = []
 76 |         while self.has_next():
 77 |             result.append(self.get_next())
 78 | 
 79 |         return result
 80 | 
 81 |     def submit(self, fn, value=None):
 82 |         """Schedule a single task to run in the pool.
 83 | 
 84 |         This has the same argument semantics as map(), but takes on a single
 85 |         value instead of a list of values. The result can be retrieved using
 86 |         get_next() / get_next_unordered().
 87 | 
 88 |         Arguments:
 89 |             fn (func): Function that takes (actor, value) as argument and
 90 |                 returns an ObjectRef computing the result over the value. The
 91 |                 actor will be considered busy until the ObjectRef completes.
 92 |             value (object): Value to compute a result for.
 93 | 
 94 |         """
 95 |         if self._idle_actors:
 96 |             actor = self._idle_actors.pop()
 97 |             if value is not None:
 98 |                 future = fn(actor, value)
 99 |             else:
100 |                 future = fn(actor)
101 |             self.future_key = tuple(future) if isinstance(future, list) else future
102 |             self._future_to_actor[self._future_key] = (self._next_task_index, actor)
103 |             self._future_key += 1
104 |             self._index_to_future[self._next_task_index] = future
105 |             self._next_task_index += 1
106 |             # print(
107 |             #     f"future_key : {self._future_key}, future : {future}, result : {future.result()}"
108 |             # )
109 |             print("index to future: ", self._index_to_future)
110 |             print("future to actor", self._future_to_actor)
111 |         else:
112 |             self._pending_submits.append((fn, value))
113 | 
114 |     def has_next(self):
115 |         """Returns whether there are any pending results to return.
116 | 
117 |         Returns:
118 |             True if there are any pending results not yet returned.
119 | 
120 |         """
121 |         return bool(self._future_to_actor)
122 | 
123 |     def get_next(self, timeout=None):
124 |         """Returns the next pending result in order.
125 | 
126 |         This returns the next result produced by submit(), blocking for up to
127 |         the specified timeout until it is available.
128 | 
129 |         Returns:
130 |             The next result.
131 | 
132 |         Raises:
133 |             TimeoutError if the timeout is reached.
134 | 
135 |         """
136 |         if not self.has_next():
137 |             raise StopIteration("No more results to get")
138 |         if self._next_return_index >= self._next_task_index:
139 |             raise ValueError(
140 |                 "It is not allowed to call get_next() after get_next_unordered()."
141 |             )
142 |         future = self._index_to_future[self._next_return_index]
143 |         if timeout is not None:
144 |             res, _ = wait([future], timeout=timeout)
145 |             if not res:
146 |                 raise TimeoutError("Timed out waiting for result")
147 |         del self._index_to_future[self._next_return_index]
148 |         self._next_return_index += 1
149 | 
150 |         future_key = tuple(future) if isinstance(future, list) else future
151 |         i, a = self._future_to_actor.pop(future_key)
152 | 
153 |         self._return_actor(a)
154 | 
155 |         print("result returned : ", future.result())
156 | 
157 |         return future.result()
158 | 
159 |     def _return_actor(self, actor):
160 |         self._idle_actors.append(actor)
161 |         if self._pending_submits:
162 |             self.submit(*self._pending_submits.pop(0))
163 | 
164 |     def has_free(self):
165 |         """Returns whether there are any idle actors available.
166 | 
167 |         Returns:
168 |             True if there are any idle actors and no pending submits.
169 | 
170 |         """
171 |         return len(self._idle_actors) > 0 and len(self._pending_submits) == 0
172 | 
173 |     def pop_idle(self):
174 |         """Removes an idle actor from the pool.
175 | 
176 |         Returns:
177 |             An idle actor if one is available.
178 |             None if no actor was free to be removed.
179 | 
180 |         """
181 |         if self.has_free():
182 |             return self._idle_actors.pop()
183 |         return None
184 | 
185 |     def push(self, actor):
186 |         """Pushes a new actor into the current list of idle actors."""
187 |         busy_actors = []
188 |         if self._future_to_actor.values():
189 |             _, busy_actors = zip(*self._future_to_actor.values())
190 |         if actor in self._idle_actors or actor in busy_actors:
191 |             raise ValueError("Actor already belongs to current ActorPool")
192 |         else:
193 |             self._idle_actors.append(actor)
194 | 


--------------------------------------------------------------------------------
/daskqueue/Consumer.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import os
  4 | from abc import ABC, abstractmethod
  5 | from collections import defaultdict
  6 | from typing import Any, Dict, List
  7 | 
  8 | from distributed import get_worker
  9 | 
 10 | from daskqueue.queue.base_queue import BaseQueue
 11 | from daskqueue.utils import logger
 12 | 
 13 | from .backend import Backend
 14 | from .Protocol import Message
 15 | 
 16 | 
 17 | class ConsumerBaseClass(ABC):
 18 |     def __init__(
 19 |         self,
 20 |         id: int,
 21 |         name: str,
 22 |         pool,
 23 |         batch_size: int,
 24 |         max_concurrency: int,
 25 |         retries: int,
 26 |         early_ack: bool,
 27 |     ) -> None:
 28 |         self.id = id
 29 |         self.name = name + f"-{os.getpid()}"
 30 |         self.pool = pool
 31 |         self.future = None
 32 |         self.items = []
 33 |         self._worker = get_worker()
 34 |         self._executor = self._worker.executor
 35 |         self._running_tasks = []
 36 |         self._logger = logger
 37 |         self.max_concurrency = max_concurrency
 38 |         self.batch_size = batch_size
 39 |         self.n_retries = retries
 40 |         self.early_ack = early_ack
 41 | 
 42 |         logger.debug(
 43 |             f"Consumer specs : batch : {batch_size}, retry : {self.n_retries}, max_concrrency: {max_concurrency}"
 44 |         )
 45 | 
 46 |     async def len_items(self) -> int:
 47 |         return len(self.items)
 48 | 
 49 |     async def len_pending_items(self) -> int:
 50 |         return len(self._running_tasks)
 51 | 
 52 |     async def get_current_queue(self) -> BaseQueue:
 53 |         return self._current_
 54 | 
 55 |     async def get_items(self) -> List[Any]:
 56 |         return self.items
 57 | 
 58 |     async def done(self) -> bool:
 59 |         """Checks if the consumer is done. A done consumers has a closed fetch_loop
 60 |         and no running tasks.
 61 | 
 62 |         Returns:
 63 |             done (bool): True if the consumer is done consumming.
 64 |         """
 65 |         await self.update_state()
 66 |         try:
 67 |             done = (
 68 |                 len(self._running_tasks) == 0
 69 |                 and self.fetch_loop.done()
 70 |                 # and len(self.items) > 0
 71 |             )
 72 |             return done
 73 |         except AttributeError:
 74 |             # Hasn't started the loop yet
 75 |             return False
 76 | 
 77 |     async def is_consumming(self) -> bool:
 78 |         return not (self.fetch_loop.cancelled() or self.fetch_loop.done())
 79 | 
 80 |     async def start(self, timeout: int = 1) -> None:
 81 |         """Starts the consumming loop, runs on Dask Worker's Tornado event loop."""
 82 |         self._current_q = await self.pool.get_next_queue()
 83 |         self.fetch_loop = asyncio.create_task(self._consume(timeout))
 84 | 
 85 |     async def update_state(self) -> None:
 86 |         _done_tasks = [task for task in self._running_tasks if task.done()]
 87 |         for task in _done_tasks:
 88 |             logger.debug(f"[{self.name}]: Cleaning done tasks")
 89 |             self._running_tasks.remove(task)
 90 | 
 91 |     async def _ack_items(self, items: List[Message]):
 92 |         logger.debug(f"[Consumer-{self.name}- Ackings items {items}")
 93 |         await self._current_q.ack_many(
 94 |             [(item.delivered_timestamp, item.id) for item in items]
 95 |         )
 96 | 
 97 |     async def _ack_late_item(self, task: asyncio.Future, item: Message):
 98 |         await task
 99 |         await self._current_q.ack(item.delivered_timestamp, item.id)
100 | 
101 |     async def _consume(self, timeout: float = 0.1) -> None:
102 |         """Runs an async loop to fetch item from a queue determined by the QueuePool and processes it in place"""
103 |         loop = asyncio.get_event_loop()
104 | 
105 |         while True:
106 |             await self.update_state()
107 |             items = await self._current_q.get_many(self.batch_size, timeout=timeout)
108 |             items = [item for item in items if item]
109 | 
110 |             if len(items) > 0:
111 |                 if self.early_ack:
112 |                     await self._ack_items(items)
113 | 
114 |                 for item in items:
115 |                     logger.debug(f"[{self.name}]: Received item : {item}")
116 |                     if item is None:
117 |                         continue
118 | 
119 |                     self.items.append(item)
120 | 
121 |                     task = asyncio.ensure_future(
122 |                         loop.run_in_executor(self._executor, self.process_item, item),
123 |                     )
124 | 
125 |                     if not self.early_ack:
126 |                         # NOTE: Ack each message individualy
127 |                         ack_task = asyncio.create_task(self._ack_late_item(task, item))
128 |                         self._running_tasks.append(ack_task)
129 | 
130 |                     self._running_tasks.append(task)
131 |                     if len(self._running_tasks) > self.max_concurrency:
132 |                         done, pending = await asyncio.wait(
133 |                             self._running_tasks, return_when=asyncio.FIRST_COMPLETED
134 |                         )
135 | 
136 |     async def cancel(self) -> bool:
137 |         """Cancels the running _consume task"""
138 |         logging.info(
139 |             f"[{self.name}]: Cancelling {len(self._running_tasks)} outstanding tasks"
140 |         )
141 |         done = True
142 |         # Waits for all pending tasks to finish before killing
143 |         await self.update_state()
144 |         # TODO : Check that everything is shutdown
145 |         try:
146 |             self.fetch_loop.cancel()
147 |             if len(self._running_tasks) > 0:
148 |                 done, _ = await asyncio.wait(
149 |                     self._running_tasks, return_when=asyncio.ALL_COMPLETED
150 |                 )
151 |         finally:
152 |             return True
153 | 
154 |     @abstractmethod
155 |     def process_item(self, item: Any):
156 |         """Process items from the queue."""
157 |         raise NotImplementedError
158 | 
159 | 
160 | class DummyConsumer(ConsumerBaseClass):
161 |     def process_item(self, item):
162 |         logger.info(f"[{self.name}]: Processing {item}")
163 | 
164 | 
165 | class GeneralConsumer(ConsumerBaseClass):
166 |     def __init__(
167 |         self,
168 |         id: int,
169 |         name,
170 |         pool,
171 |         batch_size,
172 |         max_concurrency: int,
173 |         retries: int,
174 |         early_ack: bool,
175 |         backend: Backend = None,
176 |     ) -> None:
177 |         self.backend = backend
178 |         self._results = defaultdict(lambda: None)
179 |         super().__init__(
180 |             id, name, pool, batch_size, max_concurrency, retries, early_ack
181 |         )
182 | 
183 |     def get_results(self) -> Dict[Message, Any]:
184 |         return self._results
185 | 
186 |     def save(self, msg, result: Any) -> None:
187 |         logger.debug(f"[{self.name}] Saving result for item : {msg}")
188 |         self._results[hash(msg)] = result
189 |         if self.backend:
190 |             self.backend.save(result)
191 | 
192 |     def process_item(self, msg: Message) -> None:
193 |         logger.debug(f"[{self.name}] Processing item : {msg.data}")
194 |         result = msg.data()
195 |         self.save(msg, result)
196 | 


--------------------------------------------------------------------------------
/daskqueue/ConsumerPool.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import time
  3 | from typing import TypeVar
  4 | 
  5 | from distributed import Client
  6 | 
  7 | from daskqueue import ConsumerBaseClass, GeneralConsumer
  8 | from daskqueue.utils import logger
  9 | 
 10 | from .QueuePool import QueuePool
 11 | 
 12 | TConsumer = TypeVar("TConsumer", bound=ConsumerBaseClass)
 13 | 
 14 | 
 15 | class ConsumerPool:
 16 |     _counter = itertools.count()
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         client: Client,
 21 |         queue_pool: QueuePool,
 22 |         ConsumerClass: TConsumer = GeneralConsumer,
 23 |         n_consumers: int = 1,
 24 |         batch_size: int = 1,
 25 |         retries: int = 1,
 26 |         early_ack: bool = True,
 27 |         max_concurrency: int = 10000,
 28 |     ) -> None:
 29 |         if not issubclass(ConsumerClass, ConsumerBaseClass):
 30 |             raise Exception(
 31 |                 "ConsumerClass passed should be a subclass of ConsumerBaseClasse"
 32 |             )
 33 |         self.n_consumers = n_consumers
 34 |         self.queue_pool = queue_pool.actor
 35 |         self.consumer_class = ConsumerClass
 36 |         self.consumers = {}
 37 |         self.batch_size = batch_size
 38 |         for idx in range(n_consumers):
 39 |             name = f"{ConsumerClass.__name__}-{idx}"
 40 |             self.consumers[name] = client.submit(
 41 |                 ConsumerClass,
 42 |                 idx + 1,
 43 |                 name,
 44 |                 self.queue_pool,
 45 |                 self.batch_size,
 46 |                 max_concurrency,
 47 |                 retries,
 48 |                 early_ack,
 49 |                 actor=True,
 50 |             ).result()
 51 | 
 52 |     # def __repr__(self) -> str:
 53 |     #     return f"ConsumerPool : \n\t{self.n_consumers} Consumer(s) \n\t{self.nb_consumed()} items consummed"
 54 | 
 55 |     def __repr__(self) -> str:
 56 |         if len(self.consumers) < 5:
 57 |             consumer_info = [
 58 |                 f"\n\t{c_name}: {consumer.len_items().result()} received, {consumer.len_pending_items().result()} pending tasks"
 59 |                 for c_name, consumer in self.consumers.items()
 60 |             ]
 61 | 
 62 |             return f"Consumers : {self.n_consumers} Consumers(s)" + "".join(
 63 |                 consumer_info
 64 |             )
 65 |         else:
 66 |             sum_received = sum(
 67 |                 [consumer.len_items().result() for consumer in self.consumers.values()]
 68 |             )
 69 |             sum_pending = sum(
 70 |                 [
 71 |                     consumer.len_pending_items().result()
 72 |                     for consumer in self.consumers.values()
 73 |                 ]
 74 |             )
 75 |             return f"Consumers : \n\t{self.n_consumers} Consumers(s), \n\t{sum_received} received \n\t{sum_pending} pending "
 76 | 
 77 |     def __getitem__(self, idx: int) -> ConsumerBaseClass:
 78 |         return list(self.consumers.values())[idx]
 79 | 
 80 |     def __len__(self) -> int:
 81 |         return len(self.consumers)
 82 | 
 83 |     def start(self, timeout: int = 1) -> None:
 84 |         """Start the consumme loop in each consumer"""
 85 |         logger.info(f"Starting {self.n_consumers} consumers")
 86 |         [c.start(timeout) for c in self.consumers.values()]
 87 | 
 88 |     def nb_consumed(self) -> None:
 89 |         """Return the total number of items consumed by our ConsumerPool"""
 90 |         return sum([c.len_items().result() for c in self.consumers.values()])
 91 | 
 92 |     def join(
 93 |         self, timestep: int = 0.1, print_timestep: int = 2, progress: bool = False
 94 |     ) -> None:
 95 |         """Join ConsumerPool will wait until all consumer are done processing items.
 96 |         Basically have processed all the elements of the queue_pool.
 97 |         We then cancel consumer to make sure the while loop is closed
 98 | 
 99 |         Args:
100 |             timestep (int, optional): time step (in seconds) to wait between each check. Defaults to 2.
101 |         """
102 |         logger.info(
103 |             f"Waiting for the {self.n_consumers} consumers to process all items in queue_pool..."
104 |         )
105 |         start_join = time.time()
106 | 
107 |         while True:
108 |             n_pending = sum(list(self.queue_pool.get_queue_size().result().values()))
109 |             if n_pending > 0:
110 |                 if progress and (time.time() - start_join > print_timestep):
111 |                     logger.debug("Still processing...")
112 |                     logger.info(self.queue_pool.print().result())
113 |                     logger.info(self)
114 |                     start_join = time.time()
115 |                 time.sleep(timestep)
116 |             else:
117 |                 logger.info(
118 |                     f"All consumers are done ! {self.nb_consumed()} items processed. "
119 |                 )
120 |                 break
121 | 
122 |         consumer_state = self.cancel()
123 |         logger.info(f"Consumer state : {consumer_state}")
124 | 
125 |     def results(self) -> None:
126 |         """Start the consumme loop in each consumer"""
127 |         if not hasattr(self.consumer_class, "get_results"):
128 |             raise NotImplementedError(
129 |                 "Please Implement a .get_results method in your ConsumerClass"
130 |             )
131 |         return {k: self.consumers[k].get_results().result() for k in self.consumers}
132 | 
133 |     def cancel(self) -> None:
134 |         """Cancels the consume loop task in each consumer."""
135 |         logger.info(f"Cancelling {self.n_consumers} consumers.")
136 |         [c.cancel().result() for c in self.consumers.values()]
137 | 


--------------------------------------------------------------------------------
/daskqueue/Protocol.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import time
 3 | import uuid
 4 | from functools import partial
 5 | from typing import Callable
 6 | 
 7 | import cloudpickle
 8 | 
 9 | 
10 | class Message:
11 |     """Message interface."""
12 | 
13 |     def __init__(self, func, *args, **kwargs) -> None:
14 |         self.id = uuid.uuid4()
15 |         self.timestamp = int(time.time())
16 |         self.delivered_timestamp = None
17 | 
18 |         if not isinstance(func, Callable):
19 |             raise RuntimeError
20 |         self.data = partial(func, *args, **kwargs)
21 | 
22 |     def __hash__(self) -> int:
23 |         return hash(self.id)
24 | 
25 |     def __str__(self) -> str:
26 |         return f"Message {self.id.int}"  #: \n\t\tfunction = {self.func.__name__} \n\t\targs={self._data[0]} \n\t\tkwargs={self._data[1]} "
27 | 
28 |     def serialize(self) -> bytes:
29 |         try:
30 |             return pickle.dumps(self)
31 |         except pickle.PicklingError:
32 |             return cloudpickle.dumps(self)
33 | 


--------------------------------------------------------------------------------
/daskqueue/QueuePool.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import functools
  3 | import itertools
  4 | import os
  5 | from concurrent.futures import ThreadPoolExecutor
  6 | from typing import Any, Callable, Dict, List, Tuple, TypeVar, Union
  7 | 
  8 | import numpy as np
  9 | from distributed.worker import get_client, get_worker
 10 | 
 11 | from daskqueue.queue import BaseQueue, TransientQueue
 12 | from daskqueue.queue.base_queue import Durability
 13 | from daskqueue.queue.durable_queue import DurableQueue
 14 | from daskqueue.utils import logger
 15 | from daskqueue.utils.funcs import msg_grouper
 16 | 
 17 | from .Consumer import ConsumerBaseClass, GeneralConsumer
 18 | from .Protocol import Message
 19 | 
 20 | TConsumer = TypeVar("TConsumer", bound=ConsumerBaseClass)
 21 | 
 22 | 
 23 | class PutTimeout(Exception):
 24 |     pass
 25 | 
 26 | 
 27 | class QueuePoolActor:
 28 |     """Utility class to operate on a fixed pool of queues.
 29 | 
 30 |     Arguments:
 31 |         queues (list): List of DaskActors to use in this pool.
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         n_queues: int,
 37 |         durability: Durability = Durability.TRANSIENT,
 38 |         ack_timeout: int = 5,
 39 |         retry: bool = False,
 40 |         **kwargs,
 41 |     ):
 42 |         self.ack_timeout = ack_timeout
 43 |         self.retry = retry
 44 |         # actors to be used
 45 |         self._client = get_client()
 46 |         self.n_queues = n_queues
 47 |         self._queues = self.create_queues(n_queues, durability, **kwargs)
 48 | 
 49 |         self._cycle_queues_put = itertools.cycle(self._queues)
 50 |         self._cycle_queues_get = itertools.cycle(self._queues)
 51 | 
 52 |         self.n_queues = n_queues
 53 |         self._index_queue = {q.key: q for q in self._queues}
 54 |         self._queue_size = {q.key: 0 for q in self._queues}
 55 |         self.worker_class = GeneralConsumer
 56 |         self._total_put = 0
 57 | 
 58 |     @property
 59 |     def _worker(self):
 60 |         return get_worker()
 61 | 
 62 |     @property
 63 |     def _io_loop(self) -> asyncio.BaseEventLoop:
 64 |         if self._worker:
 65 |             return self._worker.io_loop
 66 | 
 67 |     def print(self) -> str:
 68 |         if len(self._queues) < 5:
 69 |             qsize = [
 70 |                 f"\n\t{idx}: {q.qsize().result()} pending items"
 71 |                 for idx, q in self._index_queue.items()
 72 |             ]
 73 | 
 74 |             return f"QueuePool : {self.n_queues} queue(s)" + "".join(qsize)
 75 |         else:
 76 |             sum_qsize = sum([q.qsize().result() for q in self._queues])
 77 |             return f"QueuePool : \n\t{self.n_queues} queue(s) \n\t{self._total_put} received \n\t{sum_qsize} pending"
 78 | 
 79 |     def get_len(self) -> int:
 80 |         return len(self._index_queue)
 81 | 
 82 |     def create_queues(self, n_queues: int, durability: Durability, **kwargs):
 83 |         if durability == Durability.TRANSIENT:
 84 |             return [
 85 |                 self._client.submit(
 86 |                     TransientQueue,
 87 |                     maxsize=-1,
 88 |                     ack_timeout=self.ack_timeout,
 89 |                     retry=self.retry,
 90 |                     actor=True,
 91 |                 ).result()
 92 |                 for _ in range(n_queues)
 93 |             ]
 94 |         if durability == Durability.DURABLE:
 95 |             dirpath = kwargs["dirpath"] if "dirpath" in kwargs else "/tmp/"
 96 |             return [
 97 |                 self._client.submit(
 98 |                     DurableQueue,
 99 |                     f"queue-{i}",
100 |                     dirpath,
101 |                     ack_timeout=self.ack_timeout,
102 |                     retry=self.retry,
103 |                     actor=True,
104 |                 ).result()
105 |                 for i in range(n_queues)
106 |             ]
107 |         raise ValueError("Please provide a correct durability type.")
108 | 
109 |     async def get_queues(self) -> List[BaseQueue]:
110 |         return self._queues
111 | 
112 |     def get_next_queue(self) -> BaseQueue:
113 |         return next(self._cycle_queues_get)
114 | 
115 |     async def get_queue(self, idx: int) -> BaseQueue:
116 |         return self._queues[idx]
117 | 
118 |     def get_queue_size(self) -> Dict[str, int]:
119 |         return {q: q.qsize().result() for q in self._queues}
120 | 
121 |     def _get_random_queue(self) -> BaseQueue:
122 |         idx = np.random.randint(len(self._queues))
123 |         return self._queues[idx]
124 | 
125 |     # TODO : Don't need this ??
126 |     async def get_max_queue(self) -> BaseQueue:
127 |         queues_size = self.get_queue_size()
128 |         logger.info(f"queues_size : {queues_size}")
129 |         size_max, q_max = max(zip(queues_size.values(), queues_size.keys()))
130 |         if size_max == 0:
131 |             return None
132 |         return q_max
133 | 
134 |     async def submit(
135 |         self,
136 |         func: Callable,
137 |         *args,
138 |         timeout=None,
139 |         worker_class=GeneralConsumer,
140 |         **kwargs,
141 |     ):
142 |         if not issubclass(worker_class, GeneralConsumer):
143 |             raise RuntimeError(
144 |                 "Can't submit arbitrary tasks to arbitrary consumer. Please use the default GeneralConsumer class"
145 |             )
146 |         msg = Message(func, *args, **kwargs)
147 |         await self.put(msg, timeout=timeout)
148 | 
149 |     def put_many_sync(self, list_items: List[Any]) -> None:
150 |         q = next(self._cycle_queues_put)
151 | 
152 |         logger.debug(f"Sending {len(list_items)} item to  queue : {q}")
153 |         q.put_many(list_items).result()
154 |         self._total_put += len(list_items)
155 | 
156 |     async def put(self, msg: Union[Message, Any], timeout=None) -> None:
157 |         try:
158 |             logger.debug(f"[QueuePool] Item put in queue: {msg}")
159 |             q = next(self._cycle_queues_put)
160 |             # TODO : delete this !
161 |             await asyncio.wait_for(q.put(msg), timeout)
162 |             self._total_put += 1
163 |         except asyncio.TimeoutError:
164 |             pass
165 | 
166 |     async def put_many(self, list_items: List[Any], timeout=None) -> None:
167 |         q = next(self._cycle_queues_put)
168 | 
169 |         try:
170 |             q.put_many(list_items)
171 |             self._total_put += len(list_items)
172 |         except asyncio.TimeoutError:
173 |             ## TODO : implement canceling tasks  in Queue and QueuePool
174 |             raise PutTimeout(f"Couldn't put all element in queue : {q}")
175 | 
176 |     def put_nowait(self, item: Any) -> None:
177 |         q = self._get_random_queue()
178 |         self._queue_size[q.key] += 1
179 |         try:
180 |             q.put_nowait(item).result()
181 |         except asyncio.QueueFull:
182 |             logger.debug("reRaise same error")
183 |             raise asyncio.QueueFull
184 | 
185 |     def stop_gc(self):
186 |         [q.stop_gc() for q in self._queues]
187 | 
188 | 
189 | def decorator(cls):
190 |     class Interface:
191 |         """Interface class to communicate with the queue Pool actor spawned in the cluster."""
192 | 
193 |         def __init__(self, client, n_queues: int, **kwargs):
194 |             self.actor = client.submit(cls, n_queues, actor=True, **kwargs).result()
195 |             self.n_queues = n_queues
196 |             logger.info(f"Created {n_queues} queues in Cluster and one QueueManager.")
197 | 
198 |         def __getattr__(self, key):
199 |             attr = getattr(self.actor, key)
200 |             if callable(attr):
201 | 
202 |                 @functools.wraps(attr)
203 |                 def func(*args, **kwargs):
204 |                     return attr(*args, **kwargs).result()
205 | 
206 |                 return func
207 | 
208 |         def __repr__(self) -> str:
209 |             return self.actor.print().result()
210 | 
211 |         def __getitem__(self, idx: int) -> TransientQueue:
212 |             return self.actor.get_queue(idx).result()
213 | 
214 |         def __len__(self):
215 |             return self.actor.get_len().result()
216 | 
217 |         def batch_submit(
218 |             self,
219 |             list_calls: List[Tuple[Callable, ...]],
220 |             sync_mode: bool = True,
221 |             worker_class: ConsumerBaseClass = GeneralConsumer,
222 |             batch_size: int = 1000,
223 |             **kwargs,
224 |         ):
225 |             """Batch submits a list of messages to the next put queue in pool.
226 | 
227 |             Args:
228 |                 list_calls (List[Tuple[Callable, ...]]): List of tasks Tuple[func, args] to submit
229 |                 timeout (_type_, optional): Optional timeout. Defaults to None.
230 |                 worker_class (_type_, optional): Submit is only available is using a subclass of GeneralConsumer class. Defaults to GeneralConsumer.
231 | 
232 |             Raises:
233 |                 RuntimeError: Exception if worker_class is not a subclass of GeneralConsumer
234 |             """
235 |             if not issubclass(worker_class, GeneralConsumer):
236 |                 raise RuntimeError(
237 |                     "Can't submit arbitrary tasks to arbitrary consumer. Please use the default GeneralConsumer class"
238 |                 )
239 | 
240 |             futures = []
241 | 
242 |             with ThreadPoolExecutor(min(os.cpu_count(), self.n_queues)) as e:
243 |                 # TODO : figure out a heuristic for the batch size
244 |                 for msgs in msg_grouper(
245 |                     min(len(list_calls) // self.n_queues + 1, batch_size), list_calls
246 |                 ):
247 |                     if sync_mode:
248 |                         f = e.submit(self.put_many_sync, msgs)
249 |                     else:
250 |                         f = e.submit(self.actor.put_many_sync, msgs)
251 |                     futures.append(f)
252 |             return [f.result() for f in futures]
253 | 
254 |     return Interface
255 | 
256 | 
257 | QueuePool = decorator(QueuePoolActor)
258 | 


--------------------------------------------------------------------------------
/daskqueue/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import _version
 2 | from .ActorPool import ActorPool
 3 | from .Consumer import ConsumerBaseClass, DummyConsumer, GeneralConsumer
 4 | from .ConsumerPool import ConsumerPool
 5 | from .Protocol import Message
 6 | from .queue.transient_queue import TransientQueue
 7 | from .QueuePool import *
 8 | 
 9 | __version__ = _version.get_versions()["version"]
10 | 


--------------------------------------------------------------------------------
/daskqueue/_version.py:
--------------------------------------------------------------------------------
  1 | # This file helps to compute a version number in source trees obtained from
  2 | # git-archive tarball (such as those provided by githubs download-from-tag
  3 | # feature). Distribution tarballs (built by setup.py sdist) and build
  4 | # directories (produced by setup.py build) will contain a much shorter file
  5 | # that just contains the computed version number.
  6 | 
  7 | # This file is released into the public domain. Generated by
  8 | # versioneer-0.22 (https://github.com/python-versioneer/python-versioneer)
  9 | 
 10 | """Git implementation of _version.py."""
 11 | 
 12 | import errno
 13 | import os
 14 | import re
 15 | import subprocess
 16 | import sys
 17 | from typing import Callable, Dict
 18 | import functools
 19 | 
 20 | 
 21 | def get_keywords():
 22 |     """Get the keywords needed to look up the version information."""
 23 |     # these strings will be replaced by git during git-archive.
 24 |     # setup.py/versioneer.py will grep for the variable names, so they must
 25 |     # each be defined on a line of their own. _version.py will just call
 26 |     # get_keywords().
 27 |     git_refnames = " (HEAD -> main, 17-zmq-communication-layer)"
 28 |     git_full = "106fdcfd68db763bbadaa5f95f961552dcc3a219"
 29 |     git_date = "2023-01-09 23:31:44 +0100"
 30 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 31 |     return keywords
 32 | 
 33 | 
 34 | class VersioneerConfig:
 35 |     """Container for Versioneer configuration parameters."""
 36 | 
 37 | 
 38 | def get_config():
 39 |     """Create, populate and return the VersioneerConfig() object."""
 40 |     # these strings are filled in when 'setup.py versioneer' creates
 41 |     # _version.py
 42 |     cfg = VersioneerConfig()
 43 |     cfg.VCS = "git"
 44 |     cfg.style = "pep440"
 45 |     cfg.tag_prefix = ""
 46 |     cfg.parentdir_prefix = "daskqueue-"
 47 |     cfg.versionfile_source = "daskqueue/_version.py"
 48 |     cfg.verbose = False
 49 |     return cfg
 50 | 
 51 | 
 52 | class NotThisMethod(Exception):
 53 |     """Exception raised if a method is not valid for the current scenario."""
 54 | 
 55 | 
 56 | LONG_VERSION_PY: Dict[str, str] = {}
 57 | HANDLERS: Dict[str, Dict[str, Callable]] = {}
 58 | 
 59 | 
 60 | def register_vcs_handler(vcs, method):  # decorator
 61 |     """Create decorator to mark a method as the handler of a VCS."""
 62 | 
 63 |     def decorate(f):
 64 |         """Store f in HANDLERS[vcs][method]."""
 65 |         if vcs not in HANDLERS:
 66 |             HANDLERS[vcs] = {}
 67 |         HANDLERS[vcs][method] = f
 68 |         return f
 69 | 
 70 |     return decorate
 71 | 
 72 | 
 73 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
 74 |     """Call the given command(s)."""
 75 |     assert isinstance(commands, list)
 76 |     process = None
 77 | 
 78 |     popen_kwargs = {}
 79 |     if sys.platform == "win32":
 80 |         # This hides the console window if pythonw.exe is used
 81 |         startupinfo = subprocess.STARTUPINFO()
 82 |         startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 83 |         popen_kwargs["startupinfo"] = startupinfo
 84 | 
 85 |     for command in commands:
 86 |         try:
 87 |             dispcmd = str([command] + args)
 88 |             # remember shell=False, so use git.cmd on windows, not just git
 89 |             process = subprocess.Popen(
 90 |                 [command] + args,
 91 |                 cwd=cwd,
 92 |                 env=env,
 93 |                 stdout=subprocess.PIPE,
 94 |                 stderr=(subprocess.PIPE if hide_stderr else None),
 95 |                 **popen_kwargs
 96 |             )
 97 |             break
 98 |         except OSError:
 99 |             e = sys.exc_info()[1]
100 |             if e.errno == errno.ENOENT:
101 |                 continue
102 |             if verbose:
103 |                 print("unable to run %s" % dispcmd)
104 |                 print(e)
105 |             return None, None
106 |     else:
107 |         if verbose:
108 |             print("unable to find command, tried %s" % (commands,))
109 |         return None, None
110 |     stdout = process.communicate()[0].strip().decode()
111 |     if process.returncode != 0:
112 |         if verbose:
113 |             print("unable to run %s (error)" % dispcmd)
114 |             print("stdout was %s" % stdout)
115 |         return None, process.returncode
116 |     return stdout, process.returncode
117 | 
118 | 
119 | def versions_from_parentdir(parentdir_prefix, root, verbose):
120 |     """Try to determine the version from the parent directory name.
121 | 
122 |     Source tarballs conventionally unpack into a directory that includes both
123 |     the project name and a version string. We will also support searching up
124 |     two directory levels for an appropriately named parent directory
125 |     """
126 |     rootdirs = []
127 | 
128 |     for _ in range(3):
129 |         dirname = os.path.basename(root)
130 |         if dirname.startswith(parentdir_prefix):
131 |             return {
132 |                 "version": dirname[len(parentdir_prefix) :],
133 |                 "full-revisionid": None,
134 |                 "dirty": False,
135 |                 "error": None,
136 |                 "date": None,
137 |             }
138 |         rootdirs.append(root)
139 |         root = os.path.dirname(root)  # up a level
140 | 
141 |     if verbose:
142 |         print(
143 |             "Tried directories %s but none started with prefix %s"
144 |             % (str(rootdirs), parentdir_prefix)
145 |         )
146 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
147 | 
148 | 
149 | @register_vcs_handler("git", "get_keywords")
150 | def git_get_keywords(versionfile_abs):
151 |     """Extract version information from the given file."""
152 |     # the code embedded in _version.py can just fetch the value of these
153 |     # keywords. When used from setup.py, we don't want to import _version.py,
154 |     # so we do it with a regexp instead. This function is not used from
155 |     # _version.py.
156 |     keywords = {}
157 |     try:
158 |         with open(versionfile_abs, "r") as fobj:
159 |             for line in fobj:
160 |                 if line.strip().startswith("git_refnames ="):
161 |                     mo = re.search(r'=\s*"(.*)"', line)
162 |                     if mo:
163 |                         keywords["refnames"] = mo.group(1)
164 |                 if line.strip().startswith("git_full ="):
165 |                     mo = re.search(r'=\s*"(.*)"', line)
166 |                     if mo:
167 |                         keywords["full"] = mo.group(1)
168 |                 if line.strip().startswith("git_date ="):
169 |                     mo = re.search(r'=\s*"(.*)"', line)
170 |                     if mo:
171 |                         keywords["date"] = mo.group(1)
172 |     except OSError:
173 |         pass
174 |     return keywords
175 | 
176 | 
177 | @register_vcs_handler("git", "keywords")
178 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
179 |     """Get version information from git keywords."""
180 |     if "refnames" not in keywords:
181 |         raise NotThisMethod("Short version file found")
182 |     date = keywords.get("date")
183 |     if date is not None:
184 |         # Use only the last line.  Previous lines may contain GPG signature
185 |         # information.
186 |         date = date.splitlines()[-1]
187 | 
188 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
189 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
190 |         # -like" string, which we must then edit to make compliant), because
191 |         # it's been around since git-1.5.3, and it's too difficult to
192 |         # discover which version we're using, or to work around using an
193 |         # older one.
194 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
195 |     refnames = keywords["refnames"].strip()
196 |     if refnames.startswith("$Format"):
197 |         if verbose:
198 |             print("keywords are unexpanded, not using")
199 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
200 |     refs = {r.strip() for r in refnames.strip("()").split(",")}
201 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
202 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
203 |     TAG = "tag: "
204 |     tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
205 |     if not tags:
206 |         # Either we're using git < 1.8.3, or there really are no tags. We use
207 |         # a heuristic: assume all version tags have a digit. The old git %d
208 |         # expansion behaves like git log --decorate=short and strips out the
209 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
210 |         # between branches and tags. By ignoring refnames without digits, we
211 |         # filter out many common branch names like "release" and
212 |         # "stabilization", as well as "HEAD" and "master".
213 |         tags = {r for r in refs if re.search(r"\d", r)}
214 |         if verbose:
215 |             print("discarding '%s', no digits" % ",".join(refs - tags))
216 |     if verbose:
217 |         print("likely tags: %s" % ",".join(sorted(tags)))
218 |     for ref in sorted(tags):
219 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
220 |         if ref.startswith(tag_prefix):
221 |             r = ref[len(tag_prefix) :]
222 |             # Filter out refs that exactly match prefix or that don't start
223 |             # with a number once the prefix is stripped (mostly a concern
224 |             # when prefix is '')
225 |             if not re.match(r"\d", r):
226 |                 continue
227 |             if verbose:
228 |                 print("picking %s" % r)
229 |             return {
230 |                 "version": r,
231 |                 "full-revisionid": keywords["full"].strip(),
232 |                 "dirty": False,
233 |                 "error": None,
234 |                 "date": date,
235 |             }
236 |     # no suitable tags, so version is "0+unknown", but full hex is still there
237 |     if verbose:
238 |         print("no suitable tags, using unknown + full revision id")
239 |     return {
240 |         "version": "0+unknown",
241 |         "full-revisionid": keywords["full"].strip(),
242 |         "dirty": False,
243 |         "error": "no suitable tags",
244 |         "date": None,
245 |     }
246 | 
247 | 
248 | @register_vcs_handler("git", "pieces_from_vcs")
249 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
250 |     """Get version from 'git describe' in the root of the source tree.
251 | 
252 |     This only gets called if the git-archive 'subst' keywords were *not*
253 |     expanded, and _version.py hasn't already been rewritten with a short
254 |     version string, meaning we're inside a checked out source tree.
255 |     """
256 |     GITS = ["git"]
257 |     if sys.platform == "win32":
258 |         GITS = ["git.cmd", "git.exe"]
259 | 
260 |     # GIT_DIR can interfere with correct operation of Versioneer.
261 |     # It may be intended to be passed to the Versioneer-versioned project,
262 |     # but that should not change where we get our version from.
263 |     env = os.environ.copy()
264 |     env.pop("GIT_DIR", None)
265 |     runner = functools.partial(runner, env=env)
266 | 
267 |     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
268 |     if rc != 0:
269 |         if verbose:
270 |             print("Directory %s not under git control" % root)
271 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
272 | 
273 |     MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else []
274 | 
275 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
276 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
277 |     describe_out, rc = runner(
278 |         GITS,
279 |         ["describe", "--tags", "--dirty", "--always", "--long", *MATCH_ARGS],
280 |         cwd=root,
281 |     )
282 |     # --long was added in git-1.5.5
283 |     if describe_out is None:
284 |         raise NotThisMethod("'git describe' failed")
285 |     describe_out = describe_out.strip()
286 |     full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
287 |     if full_out is None:
288 |         raise NotThisMethod("'git rev-parse' failed")
289 |     full_out = full_out.strip()
290 | 
291 |     pieces = {}
292 |     pieces["long"] = full_out
293 |     pieces["short"] = full_out[:7]  # maybe improved later
294 |     pieces["error"] = None
295 | 
296 |     branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
297 |     # --abbrev-ref was added in git-1.6.3
298 |     if rc != 0 or branch_name is None:
299 |         raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
300 |     branch_name = branch_name.strip()
301 | 
302 |     if branch_name == "HEAD":
303 |         # If we aren't exactly on a branch, pick a branch which represents
304 |         # the current commit. If all else fails, we are on a branchless
305 |         # commit.
306 |         branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
307 |         # --contains was added in git-1.5.4
308 |         if rc != 0 or branches is None:
309 |             raise NotThisMethod("'git branch --contains' returned error")
310 |         branches = branches.split("\n")
311 | 
312 |         # Remove the first line if we're running detached
313 |         if "(" in branches[0]:
314 |             branches.pop(0)
315 | 
316 |         # Strip off the leading "* " from the list of branches.
317 |         branches = [branch[2:] for branch in branches]
318 |         if "master" in branches:
319 |             branch_name = "master"
320 |         elif not branches:
321 |             branch_name = None
322 |         else:
323 |             # Pick the first branch that is returned. Good or bad.
324 |             branch_name = branches[0]
325 | 
326 |     pieces["branch"] = branch_name
327 | 
328 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
329 |     # TAG might have hyphens.
330 |     git_describe = describe_out
331 | 
332 |     # look for -dirty suffix
333 |     dirty = git_describe.endswith("-dirty")
334 |     pieces["dirty"] = dirty
335 |     if dirty:
336 |         git_describe = git_describe[: git_describe.rindex("-dirty")]
337 | 
338 |     # now we have TAG-NUM-gHEX or HEX
339 | 
340 |     if "-" in git_describe:
341 |         # TAG-NUM-gHEX
342 |         mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
343 |         if not mo:
344 |             # unparsable. Maybe git-describe is misbehaving?
345 |             pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
346 |             return pieces
347 | 
348 |         # tag
349 |         full_tag = mo.group(1)
350 |         if not full_tag.startswith(tag_prefix):
351 |             if verbose:
352 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
353 |                 print(fmt % (full_tag, tag_prefix))
354 |             pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
355 |                 full_tag,
356 |                 tag_prefix,
357 |             )
358 |             return pieces
359 |         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
360 | 
361 |         # distance: number of commits since tag
362 |         pieces["distance"] = int(mo.group(2))
363 | 
364 |         # commit: short hex revision ID
365 |         pieces["short"] = mo.group(3)
366 | 
367 |     else:
368 |         # HEX: no tags
369 |         pieces["closest-tag"] = None
370 |         count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
371 |         pieces["distance"] = int(count_out)  # total number of commits
372 | 
373 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
374 |     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
375 |     # Use only the last line.  Previous lines may contain GPG signature
376 |     # information.
377 |     date = date.splitlines()[-1]
378 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
379 | 
380 |     return pieces
381 | 
382 | 
383 | def plus_or_dot(pieces):
384 |     """Return a + if we don't already have one, else return a ."""
385 |     if "+" in pieces.get("closest-tag", ""):
386 |         return "."
387 |     return "+"
388 | 
389 | 
390 | def render_pep440(pieces):
391 |     """Build up version string, with post-release "local version identifier".
392 | 
393 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
394 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
395 | 
396 |     Exceptions:
397 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
398 |     """
399 |     if pieces["closest-tag"]:
400 |         rendered = pieces["closest-tag"]
401 |         if pieces["distance"] or pieces["dirty"]:
402 |             rendered += plus_or_dot(pieces)
403 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
404 |             if pieces["dirty"]:
405 |                 rendered += ".dirty"
406 |     else:
407 |         # exception #1
408 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
409 |         if pieces["dirty"]:
410 |             rendered += ".dirty"
411 |     return rendered
412 | 
413 | 
414 | def render_pep440_branch(pieces):
415 |     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
416 | 
417 |     The ".dev0" means not master branch. Note that .dev0 sorts backwards
418 |     (a feature branch will appear "older" than the master branch).
419 | 
420 |     Exceptions:
421 |     1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
422 |     """
423 |     if pieces["closest-tag"]:
424 |         rendered = pieces["closest-tag"]
425 |         if pieces["distance"] or pieces["dirty"]:
426 |             if pieces["branch"] != "master":
427 |                 rendered += ".dev0"
428 |             rendered += plus_or_dot(pieces)
429 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
430 |             if pieces["dirty"]:
431 |                 rendered += ".dirty"
432 |     else:
433 |         # exception #1
434 |         rendered = "0"
435 |         if pieces["branch"] != "master":
436 |             rendered += ".dev0"
437 |         rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
438 |         if pieces["dirty"]:
439 |             rendered += ".dirty"
440 |     return rendered
441 | 
442 | 
443 | def pep440_split_post(ver):
444 |     """Split pep440 version string at the post-release segment.
445 | 
446 |     Returns the release segments before the post-release and the
447 |     post-release version number (or -1 if no post-release segment is present).
448 |     """
449 |     vc = str.split(ver, ".post")
450 |     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
451 | 
452 | 
453 | def render_pep440_pre(pieces):
454 |     """TAG[.postN.devDISTANCE] -- No -dirty.
455 | 
456 |     Exceptions:
457 |     1: no tags. 0.post0.devDISTANCE
458 |     """
459 |     if pieces["closest-tag"]:
460 |         if pieces["distance"]:
461 |             # update the post release segment
462 |             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
463 |             rendered = tag_version
464 |             if post_version is not None:
465 |                 rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
466 |             else:
467 |                 rendered += ".post0.dev%d" % (pieces["distance"])
468 |         else:
469 |             # no commits, use the tag as the version
470 |             rendered = pieces["closest-tag"]
471 |     else:
472 |         # exception #1
473 |         rendered = "0.post0.dev%d" % pieces["distance"]
474 |     return rendered
475 | 
476 | 
477 | def render_pep440_post(pieces):
478 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
479 | 
480 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
481 |     (a dirty tree will appear "older" than the corresponding clean one),
482 |     but you shouldn't be releasing software with -dirty anyways.
483 | 
484 |     Exceptions:
485 |     1: no tags. 0.postDISTANCE[.dev0]
486 |     """
487 |     if pieces["closest-tag"]:
488 |         rendered = pieces["closest-tag"]
489 |         if pieces["distance"] or pieces["dirty"]:
490 |             rendered += ".post%d" % pieces["distance"]
491 |             if pieces["dirty"]:
492 |                 rendered += ".dev0"
493 |             rendered += plus_or_dot(pieces)
494 |             rendered += "g%s" % pieces["short"]
495 |     else:
496 |         # exception #1
497 |         rendered = "0.post%d" % pieces["distance"]
498 |         if pieces["dirty"]:
499 |             rendered += ".dev0"
500 |         rendered += "+g%s" % pieces["short"]
501 |     return rendered
502 | 
503 | 
504 | def render_pep440_post_branch(pieces):
505 |     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
506 | 
507 |     The ".dev0" means not master branch.
508 | 
509 |     Exceptions:
510 |     1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
511 |     """
512 |     if pieces["closest-tag"]:
513 |         rendered = pieces["closest-tag"]
514 |         if pieces["distance"] or pieces["dirty"]:
515 |             rendered += ".post%d" % pieces["distance"]
516 |             if pieces["branch"] != "master":
517 |                 rendered += ".dev0"
518 |             rendered += plus_or_dot(pieces)
519 |             rendered += "g%s" % pieces["short"]
520 |             if pieces["dirty"]:
521 |                 rendered += ".dirty"
522 |     else:
523 |         # exception #1
524 |         rendered = "0.post%d" % pieces["distance"]
525 |         if pieces["branch"] != "master":
526 |             rendered += ".dev0"
527 |         rendered += "+g%s" % pieces["short"]
528 |         if pieces["dirty"]:
529 |             rendered += ".dirty"
530 |     return rendered
531 | 
532 | 
533 | def render_pep440_old(pieces):
534 |     """TAG[.postDISTANCE[.dev0]] .
535 | 
536 |     The ".dev0" means dirty.
537 | 
538 |     Exceptions:
539 |     1: no tags. 0.postDISTANCE[.dev0]
540 |     """
541 |     if pieces["closest-tag"]:
542 |         rendered = pieces["closest-tag"]
543 |         if pieces["distance"] or pieces["dirty"]:
544 |             rendered += ".post%d" % pieces["distance"]
545 |             if pieces["dirty"]:
546 |                 rendered += ".dev0"
547 |     else:
548 |         # exception #1
549 |         rendered = "0.post%d" % pieces["distance"]
550 |         if pieces["dirty"]:
551 |             rendered += ".dev0"
552 |     return rendered
553 | 
554 | 
555 | def render_git_describe(pieces):
556 |     """TAG[-DISTANCE-gHEX][-dirty].
557 | 
558 |     Like 'git describe --tags --dirty --always'.
559 | 
560 |     Exceptions:
561 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
562 |     """
563 |     if pieces["closest-tag"]:
564 |         rendered = pieces["closest-tag"]
565 |         if pieces["distance"]:
566 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
567 |     else:
568 |         # exception #1
569 |         rendered = pieces["short"]
570 |     if pieces["dirty"]:
571 |         rendered += "-dirty"
572 |     return rendered
573 | 
574 | 
575 | def render_git_describe_long(pieces):
576 |     """TAG-DISTANCE-gHEX[-dirty].
577 | 
578 |     Like 'git describe --tags --dirty --always -long'.
579 |     The distance/hash is unconditional.
580 | 
581 |     Exceptions:
582 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
583 |     """
584 |     if pieces["closest-tag"]:
585 |         rendered = pieces["closest-tag"]
586 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
587 |     else:
588 |         # exception #1
589 |         rendered = pieces["short"]
590 |     if pieces["dirty"]:
591 |         rendered += "-dirty"
592 |     return rendered
593 | 
594 | 
595 | def render(pieces, style):
596 |     """Render the given version pieces into the requested style."""
597 |     if pieces["error"]:
598 |         return {
599 |             "version": "unknown",
600 |             "full-revisionid": pieces.get("long"),
601 |             "dirty": None,
602 |             "error": pieces["error"],
603 |             "date": None,
604 |         }
605 | 
606 |     if not style or style == "default":
607 |         style = "pep440"  # the default
608 | 
609 |     if style == "pep440":
610 |         rendered = render_pep440(pieces)
611 |     elif style == "pep440-branch":
612 |         rendered = render_pep440_branch(pieces)
613 |     elif style == "pep440-pre":
614 |         rendered = render_pep440_pre(pieces)
615 |     elif style == "pep440-post":
616 |         rendered = render_pep440_post(pieces)
617 |     elif style == "pep440-post-branch":
618 |         rendered = render_pep440_post_branch(pieces)
619 |     elif style == "pep440-old":
620 |         rendered = render_pep440_old(pieces)
621 |     elif style == "git-describe":
622 |         rendered = render_git_describe(pieces)
623 |     elif style == "git-describe-long":
624 |         rendered = render_git_describe_long(pieces)
625 |     else:
626 |         raise ValueError("unknown style '%s'" % style)
627 | 
628 |     return {
629 |         "version": rendered,
630 |         "full-revisionid": pieces["long"],
631 |         "dirty": pieces["dirty"],
632 |         "error": None,
633 |         "date": pieces.get("date"),
634 |     }
635 | 
636 | 
637 | def get_versions():
638 |     """Get version information or return default if unable to do so."""
639 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
640 |     # __file__, we can work backwards from there to the root. Some
641 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
642 |     # case we can only use expanded keywords.
643 | 
644 |     cfg = get_config()
645 |     verbose = cfg.verbose
646 | 
647 |     try:
648 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
649 |     except NotThisMethod:
650 |         pass
651 | 
652 |     try:
653 |         root = os.path.realpath(__file__)
654 |         # versionfile_source is the relative path from the top of the source
655 |         # tree (where the .git directory might live) to this file. Invert
656 |         # this to find the root from __file__.
657 |         for _ in cfg.versionfile_source.split("/"):
658 |             root = os.path.dirname(root)
659 |     except NameError:
660 |         return {
661 |             "version": "0+unknown",
662 |             "full-revisionid": None,
663 |             "dirty": None,
664 |             "error": "unable to find root of source tree",
665 |             "date": None,
666 |         }
667 | 
668 |     try:
669 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
670 |         return render(pieces, cfg.style)
671 |     except NotThisMethod:
672 |         pass
673 | 
674 |     try:
675 |         if cfg.parentdir_prefix:
676 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
677 |     except NotThisMethod:
678 |         pass
679 | 
680 |     return {
681 |         "version": "0+unknown",
682 |         "full-revisionid": None,
683 |         "dirty": None,
684 |         "error": "unable to compute version",
685 |         "date": None,
686 |     }
687 | 


--------------------------------------------------------------------------------
/daskqueue/backend.py:
--------------------------------------------------------------------------------
1 | class Backend:
2 |     pass
3 | 


--------------------------------------------------------------------------------
/daskqueue/queue/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_queue import BaseQueue
2 | from .durable_queue import DurableQueue
3 | from .transient_queue import TransientQueue
4 | 


--------------------------------------------------------------------------------
/daskqueue/queue/base_queue.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractclassmethod
 2 | from enum import Enum, auto
 3 | from typing import Optional
 4 | from uuid import UUID
 5 | 
 6 | 
 7 | class Durability(Enum):
 8 |     DURABLE = auto()
 9 |     TRANSIENT = auto()
10 | 
11 | 
12 | class BaseQueue(ABC):
13 |     def __init__(self, durability: Durability, maxsize: Optional[int] = None) -> None:
14 |         self.durability = durability
15 |         self.maxsize = maxsize
16 | 
17 |     @abstractclassmethod
18 |     def qsize(self):
19 |         raise NotImplementedError("Needs a qsize method ")
20 | 
21 |     @abstractclassmethod
22 |     async def put(self):
23 |         raise NotImplementedError("Need an async put method")
24 | 
25 |     @abstractclassmethod
26 |     async def put_many(self):
27 |         raise NotImplementedError("Need an async put method")
28 | 
29 |     @abstractclassmethod
30 |     async def get(self):
31 |         raise NotImplementedError("Needs an async get method")
32 | 
33 |     @abstractclassmethod
34 |     async def ack(self, msg_id: UUID):
35 |         raise NotImplementedError("Needs an async get method")
36 | 


--------------------------------------------------------------------------------
/daskqueue/queue/durable_queue.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import glob
  3 | import os
  4 | from typing import Any, Dict, List, Optional, Tuple
  5 | from uuid import UUID
  6 | 
  7 | from distributed.worker import get_worker
  8 | 
  9 | from daskqueue.Protocol import Message
 10 | from daskqueue.segment import INDEX_MAX_BYTES, MAX_BYTES
 11 | from daskqueue.segment.index_record import IdxRecord
 12 | from daskqueue.segment.index_segment import IndexSegment
 13 | from daskqueue.segment.log_segment import FullSegment, LogAccess, LogSegment
 14 | from daskqueue.utils import logger
 15 | 
 16 | from .base_queue import BaseQueue, Durability
 17 | 
 18 | 
 19 | class DurableQueue(BaseQueue):
 20 |     def __init__(
 21 |         self,
 22 |         name: str,
 23 |         dirpath: str,
 24 |         durability: Durability = Durability.DURABLE,
 25 |         exchange: str = "default",
 26 |         maxsize: Optional[int] = None,
 27 |         log_max_bytes: int = MAX_BYTES,
 28 |         index_max_bytes: int = INDEX_MAX_BYTES,
 29 |         ack_timeout: int = 5,
 30 |         retry: bool = False,
 31 |     ):
 32 |         self.name = name
 33 |         self.dirpath = dirpath
 34 | 
 35 |         # NOTE: A queue handles message from one exchange
 36 |         self.exchange = exchange
 37 |         self.queue_dir = os.path.join(self.dirpath, f"{self.exchange}-{self.name}")
 38 | 
 39 |         # If maxsize is None, the queue size is infinite
 40 |         self.log_max_bytes = log_max_bytes
 41 |         self.index_max_bytes = index_max_bytes
 42 |         self.maxsize = maxsize
 43 |         self.ack_timeout = ack_timeout
 44 |         self.retry = retry
 45 | 
 46 |         # Get the IOLoop running on the worker
 47 |         self.worker_loop = asyncio.new_event_loop()  # self._io_loop
 48 |         asyncio.set_event_loop(self.worker_loop)
 49 | 
 50 |         self.ro_segments: Dict[int, LogSegment] = {}
 51 |         self.active_segment: LogSegment = None
 52 |         self.index_segment: IndexSegment = None
 53 | 
 54 |         # TODO(@Amine) : Parse the storage
 55 |         self.setup_storage()
 56 | 
 57 |         super().__init__(durability=durability, maxsize=self.maxsize)
 58 | 
 59 |     @property
 60 |     def _worker(self):
 61 |         return get_worker()
 62 | 
 63 |     @property
 64 |     def _io_loop(self) -> asyncio.BaseEventLoop:
 65 |         if self._worker:
 66 |             return self._worker.io_loop
 67 | 
 68 |     def setup_storage(self) -> Any:
 69 |         if not os.path.exists(self.queue_dir):
 70 |             os.makedirs(self.queue_dir)
 71 | 
 72 |         self.ro_segments, self.active_segment = self._load_segments(self.queue_dir)
 73 |         self.index_segment = self._load_index(self.queue_dir)
 74 | 
 75 |     def _load_index(self, path: str) -> IndexSegment:
 76 |         name = f"{self.exchange}-{self.name}.index"
 77 |         index_path = os.path.join(path, name)
 78 |         return IndexSegment(
 79 |             index_path,
 80 |             max_bytes=self.index_max_bytes,
 81 |             retry=self.retry,
 82 |             ack_timeout=self.ack_timeout,
 83 |         )
 84 | 
 85 |     def _load_segments(self, path: str) -> Tuple[List[LogSegment], LogSegment]:
 86 |         segments = glob.glob(path + "/*.log")
 87 |         if segments:
 88 |             segments.sort()
 89 |             active_seg_path = segments.pop()
 90 |             list_ro_segment = [
 91 |                 LogSegment(sp, LogAccess.RO, max_bytes=self.log_max_bytes)
 92 |                 for sp in segments
 93 |             ]
 94 |             return {int(seg.name): seg for seg in list_ro_segment}, LogSegment(
 95 |                 active_seg_path, LogAccess.RW, max_bytes=self.log_max_bytes
 96 |             )
 97 |         else:
 98 |             # TODO : Determine this length
 99 |             seg_name = str(0).rjust(20, "0") + ".log"
100 |             seg_path = os.path.join(path, seg_name)
101 |             return {}, LogSegment(seg_path, LogAccess.RW, max_bytes=self.log_max_bytes)
102 | 
103 |     def new_active_segment(self):
104 |         self.active_segment.read_only()
105 |         # Appending the closed segment to the list of read-only segments
106 |         self.ro_segments[int(self.active_segment.name)] = self.active_segment
107 |         # TODO : Should probably add the archival info in the file footer ?
108 |         file_no = int(self.active_segment.name) + 1
109 |         seg_name = str(file_no).rjust(20, "0") + ".log"
110 |         seg_path = os.path.join(self.queue_dir, seg_name)
111 |         return LogSegment(seg_path, LogAccess.RW, max_bytes=self.log_max_bytes)
112 | 
113 |     def put_sync(self, item: Message) -> IdxRecord:
114 |         # Append to Active Segment
115 |         # TODO : Can't retry forever, I should add a wrapper to retrie a number of times
116 |         if len(item.serialize()) > (self.log_max_bytes - 8):
117 |             raise ValueError(
118 |                 "Cannot append message bigger than the max log semgent size"
119 |             )
120 |         try:
121 |             offset = self.active_segment.append(item)
122 |         except FullSegment:
123 |             self.active_segment = self.new_active_segment()
124 |             offset = self.active_segment.append(item)
125 | 
126 |         # Add to Log Index
127 |         return self.index_segment.push(item.id, offset)
128 | 
129 |     def get_sync(self) -> Optional[Message]:
130 |         index_record = self.index_segment.pop()
131 |         if index_record is None:
132 |             return None
133 |         file_no = index_record.offset.file_no
134 | 
135 |         # TODO : Could probably keep an ordered set of segments (RO + Active)
136 |         if file_no in self.ro_segments:
137 |             record = self.ro_segments[file_no].read(index_record.offset)
138 |         else:
139 |             record = self.active_segment.read(index_record.offset)
140 | 
141 |         # Update the msg delivered timestamp to match record
142 |         record.msg.delivered_timestamp = index_record.timestamp
143 |         return record.msg
144 | 
145 |     async def put(self, item: Message, timeout=None):
146 |         return self.put_sync(item)
147 | 
148 |     async def put_many(self, list_items: List[Message]):
149 |         for item in list_items:
150 |             self.put_sync(item)
151 | 
152 |     async def get(self, timeout=None) -> Optional[Message]:
153 |         return self.get_sync()
154 | 
155 |     async def get_many(self, n: int, timeout=None) -> List[Optional[Message]]:
156 |         return [self.get_sync() for _ in range(n)]
157 | 
158 |     def qsize(self):
159 |         return len(self.index_segment)
160 | 
161 |     def ack_sync(self, timestamp: float, msg_id: UUID):
162 |         logger.debug(f"[Queue-{self.name}] Ack item {msg_id}")
163 |         return self.index_segment.ack(timestamp, msg_id)
164 | 
165 |     async def ack(self, timestamp: float, msg_id: UUID):
166 |         return self.ack_sync(timestamp, msg_id)
167 | 
168 |     async def ack_many(self, items: List[Tuple[float, UUID]]):
169 |         await asyncio.gather(*[self.ack(item[0], item[1]) for item in items])
170 | 
171 |     def close(self):
172 |         [log.close_file() for log in self.ro_segments.values()]
173 |         self.active_segment.close_file()
174 |         self.index_segment.close()
175 | 
176 |     def stop_gc(self):
177 |         self.index_segment.stop_gc_event.set()
178 | 


--------------------------------------------------------------------------------
/daskqueue/queue/queue_exceptions.py:
--------------------------------------------------------------------------------
1 | class Empty(Exception):
2 |     pass
3 | 
4 | 
5 | class Full(Exception):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/daskqueue/queue/sql_queue.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sqlite3
 3 | from typing import Optional
 4 | 
 5 | from .base_queue import BaseQueue, Durability
 6 | 
 7 | 
 8 | class SQLQueue(BaseQueue):
 9 |     def __init__(
10 |         self,
11 |         durability: Durability,
12 |         maxsize: Optional[int] = None,
13 |         db_path: Optional[str] = None,
14 |     ) -> None:
15 |         super().__init__(durability, maxsize)
16 | 
17 |         if self.durability == Durability.DURABLE:
18 |             self.check_path(db_path)
19 |             self.db_path = db_path
20 |         else:
21 |             self.db_path = ":memory:"
22 | 
23 |         self.conn = self.setup_conn()
24 | 
25 |     @staticmethod
26 |     def check_path(path: str) -> bool:
27 |         """Checks for read and write access"""
28 |         root_dir = os.path.basename(path)
29 |         if path is None or not os.access(root_dir, os.R_OK | os.W_OK):
30 |             raise ValueError("Please provide a valid path")
31 | 
32 |     def setup_conn(self) -> sqlite3.Connection:
33 |         conn = sqlite3.connect(self.db_path)
34 |         conn.execute(
35 |             """CREATE TABLE IF NOT EXISTS Queue
36 |                 (
37 |                 message BLOB NOT NULL,
38 |                 message_id TEXT NOT NULL,
39 |                 status INTEGER,
40 |                 in_time INTEGER NOT NULL DEFAULT (strftime('%s','now')),
41 |                 lock_time INTEGER,
42 |                 done_time INTEGER
43 |                 )
44 |         """
45 |         )
46 | 


--------------------------------------------------------------------------------
/daskqueue/queue/transient_queue.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import time
  3 | from queue import Empty, Full, Queue
  4 | from threading import Event, Thread
  5 | from typing import List, Optional, Tuple
  6 | from uuid import UUID
  7 | 
  8 | from distributed.worker import get_worker
  9 | from sortedcontainers import SortedDict
 10 | 
 11 | from daskqueue.Protocol import Message
 12 | from daskqueue.utils import logger
 13 | 
 14 | from .base_queue import BaseQueue, Durability
 15 | 
 16 | 
 17 | class TransientQueue(BaseQueue):
 18 |     def __init__(
 19 |         self,
 20 |         maxsize=-1,
 21 |         ack_timeout: int = 5,
 22 |         retry: bool = False,
 23 |     ):
 24 |         # If maxsize is less than or equal to zero, the queue size is infinite
 25 |         self.maxsize = maxsize
 26 |         # Get the IOLoop running on the worker
 27 |         self.queue = Queue(maxsize=maxsize)
 28 |         self.delivered = SortedDict()
 29 | 
 30 |         # Garbage collection tasks for delivered unacked message
 31 |         self.ack_timeout = ack_timeout
 32 |         self.retry = retry
 33 |         self.stop_gc_event = Event()
 34 |         self._gc_thread = Thread(target=self._background_gc, daemon=True)
 35 |         self._gc_thread.start()
 36 |         try:
 37 |             self.loop = self._io_loop.asyncio_loop
 38 |             asyncio.set_event_loop(self.loop)
 39 |         except ValueError:
 40 |             self.loop = None
 41 | 
 42 |         super().__init__(durability=Durability.TRANSIENT, maxsize=maxsize)
 43 | 
 44 |     @property
 45 |     def _worker(self):
 46 |         return get_worker()
 47 | 
 48 |     @property
 49 |     def _io_loop(self):
 50 |         if self._worker:
 51 |             return self._worker.io_loop
 52 | 
 53 |     def qsize(self):
 54 |         return self.queue.qsize() + len(self.delivered)
 55 | 
 56 |     def empty(self):
 57 |         return self.queue.empty()
 58 | 
 59 |     def full(self):
 60 |         return self.queue.full()
 61 | 
 62 |     async def put_many(self, list_items):
 63 |         for item in list_items:
 64 |             self.queue.put(item, block=False)
 65 | 
 66 |     async def put(self, item, timeout=None):
 67 |         logger.debug(f"[Queue] Put item: {item}")
 68 |         self.queue.put(item, timeout=timeout)
 69 | 
 70 |     def put_sync(self, item):
 71 |         return self.queue.put(item)
 72 | 
 73 |     def get_sync(self, timeout=None):
 74 |         try:
 75 |             item = self.queue.get(block=False)
 76 |             if isinstance(item, Message):
 77 |                 item.delivered_timestamp = time.time()
 78 |                 self.delivered[item.delivered_timestamp] = item
 79 |             return item
 80 |         except Empty:
 81 |             return None
 82 | 
 83 |     async def get(self, timeout=None):
 84 |         return self.get_sync(timeout)
 85 | 
 86 |     async def get_many(self, n: int, timeout=None) -> List[Optional[Message]]:
 87 |         return [self.get_sync(timeout) for _ in range(n)]
 88 | 
 89 |     def put_nowait(self, item):
 90 |         self.queue.put_nowait(item)
 91 | 
 92 |     def put_nowait_batch(self, items):
 93 |         # If maxsize is <=0, queue is unbounded, so no need to check size.
 94 |         if self.maxsize > 0 and len(items) + self.qsize() > self.maxsize:
 95 |             raise Full(
 96 |                 f"Cannot add {len(items)} items to queue of size "
 97 |                 f"{self.qsize()} and maxsize {self.maxsize}."
 98 |             )
 99 |         for item in items:
100 |             self.queue.put_nowait(item)
101 | 
102 |     def get_nowait(self):
103 |         try:
104 |             return self.queue.get_nowait()
105 |         except asyncio.QueueEmpty:
106 |             return None
107 | 
108 |     def get_nowait_batch(self, num_items):
109 |         if num_items > self.qsize():
110 |             raise Empty(
111 |                 f"Cannot get {num_items} items from queue of size " f"{self.qsize()}."
112 |             )
113 |         return [self.queue.get_nowait() for _ in range(num_items)]
114 | 
115 |     async def ack(self, timestamp: float, msg_id: UUID):
116 |         logger.debug(f"[Queue] Ack item {msg_id}")
117 |         item = self.delivered.pop(timestamp, None)
118 | 
119 |         if item is None or item != msg_id:
120 |             # raise ValueError("Msg doesnt exist in the delivered list")
121 |             pass
122 |         return True
123 | 
124 |     async def ack_many(self, items: List[Tuple[float, UUID]]):
125 |         await asyncio.gather(*[self.ack(item[0], item[1]) for item in items])
126 | 
127 |     def _background_gc(self):
128 |         while not self.stop_gc_event.is_set():
129 |             now = time.time()
130 |             cutoff = self.delivered.bisect_left(now)
131 |             for record in self.delivered.keys()[:cutoff]:
132 |                 idx_record = self.delivered.pop(record)
133 |                 if self.retry:
134 |                     self.ready[idx_record.msg_id] = idx_record
135 |             time.sleep(self.ack_timeout)
136 | 
137 |     def stop_gc(self):
138 |         self.stop_gc_event.set()
139 | 


--------------------------------------------------------------------------------
/daskqueue/segment/__init__.py:
--------------------------------------------------------------------------------
 1 | FORMAT_VERSION = (0, 1)
 2 | 
 3 | FILE_IDENTIFIER = b"\x53\x34\x4e\x40"
 4 | 
 5 | INDEX_FILE_IDENTIFIER = b"\x5f\x44\x31\x40"
 6 | 
 7 | FOOTER = b"\x52\x3f\x4a\x43"
 8 | 
 9 | HEADER_SIZE = 8
10 | 
11 | MAX_BYTES = 50 * 1024 * 1024  # 1MB
12 | 
13 | INDEX_MAX_BYTES = 100 * 1024 * 1024  # 1MB
14 | 


--------------------------------------------------------------------------------
/daskqueue/segment/index_record.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | from binascii import crc32
 3 | from dataclasses import dataclass
 4 | from enum import IntEnum, auto
 5 | from uuid import UUID
 6 | 
 7 | from .log_record import RecordOffset
 8 | 
 9 | 
10 | class MessageStatus(IntEnum):
11 |     READY = auto()
12 |     DELIVERED = auto()
13 |     ACKED = auto()
14 |     FAILED = auto()
15 | 
16 | 
17 | @dataclass
18 | class IdxRecord:
19 |     msg_id: UUID
20 |     status: MessageStatus
21 |     offset: RecordOffset
22 |     timestamp: float
23 | 
24 | 
25 | class IdxRecordProcessor:
26 |     RECORD_SIZE = 41
27 | 
28 |     def parse_bytes(self, buffer: bytes) -> IdxRecord:
29 |         assert len(buffer) == self.RECORD_SIZE
30 |         checksum_data = buffer[4:]
31 |         s = 0
32 |         checksum = struct.unpack("!I", buffer[s : s + 4])[0]
33 |         s += 4
34 |         if not self._verify_checksum(checksum, checksum_data):
35 |             raise Exception("Corrupt data detected: invalid checksum")
36 | 
37 |         timestamp = struct.unpack("!d", buffer[s : s + 8])[0]
38 |         s += 8
39 |         msg_id = UUID(bytes=buffer[s : s + 16])
40 |         s += 16
41 |         status = MessageStatus(struct.unpack("!b", buffer[s : s + 1])[0])
42 |         s += 1
43 |         file_no, offset, size = struct.unpack("!III", buffer[s : s + 12])
44 |         record = IdxRecord(
45 |             msg_id=msg_id,
46 |             status=status,
47 |             offset=RecordOffset(file_no, offset, size),
48 |             timestamp=timestamp,
49 |         )
50 | 
51 |         return record
52 | 
53 |     def _verify_checksum(self, retrieved_checksum: int, checksum_data: bytes):
54 |         # key is the bytes of the key,
55 |         return crc32(checksum_data) & 0xFFFFFFFF == retrieved_checksum
56 | 
57 |     def serialize_idx_record(self, idx_record: IdxRecord):
58 |         # <CRC-4><TIMESTAMP><MSG_ID-4><STATUS-1><POINTER>
59 |         timestamp_bytes = struct.pack("!d", idx_record.timestamp)  # 8 bytes
60 |         id = idx_record.msg_id.bytes  # 16 bytes
61 |         status = struct.pack("!b", idx_record.status)  # 1 bytes
62 | 
63 |         data = timestamp_bytes + id + status + idx_record.offset.pack()  # 37 bytes
64 |         checksum = struct.pack("!I", crc32(data) & 0xFFFFFFFF)  # 4 byes
65 | 
66 |         # TODO : Test to see if alignement improves performance??
67 |         blob = checksum + data  # 41 bytes
68 |         return blob
69 | 


--------------------------------------------------------------------------------
/daskqueue/segment/index_segment.py:
--------------------------------------------------------------------------------
  1 | import mmap
  2 | import os
  3 | import struct
  4 | import time
  5 | from collections import OrderedDict
  6 | from io import FileIO
  7 | from threading import Event, Thread
  8 | from typing import Dict, Optional, Tuple
  9 | from uuid import UUID
 10 | 
 11 | from sortedcontainers import SortedDict
 12 | 
 13 | from daskqueue.Protocol import Message
 14 | from daskqueue.segment import (
 15 |     FORMAT_VERSION,
 16 |     HEADER_SIZE,
 17 |     INDEX_FILE_IDENTIFIER,
 18 |     INDEX_MAX_BYTES,
 19 | )
 20 | 
 21 | from .index_record import IdxRecord, IdxRecordProcessor, MessageStatus
 22 | from .log_record import RecordOffset
 23 | 
 24 | 
 25 | class IndexSegment:
 26 |     def __init__(
 27 |         self,
 28 |         path: str,
 29 |         max_bytes: int = INDEX_MAX_BYTES,
 30 |         ack_timeout: int = 5,
 31 |         retry: bool = False,
 32 |     ):
 33 |         self.path = path
 34 |         self.max_bytes = max_bytes
 35 |         self.name = self.parse_name(path)
 36 | 
 37 |         # In-memory datastructures
 38 |         self.delivered = SortedDict()
 39 |         self.ready = OrderedDict()
 40 |         self.processor = IdxRecordProcessor()
 41 |         self.retry = retry
 42 | 
 43 |         # Garbage collection tasks for delivered unacked message
 44 |         self.stop_gc_event = Event()
 45 |         self.ack_timeout = ack_timeout
 46 |         self._gc_thread = Thread(target=self._background_gc, daemon=True)
 47 |         self._gc_thread.start()
 48 | 
 49 |         # Setup
 50 |         self.created, self.file = self.create_or_open(path)
 51 |         self._mm_obj = self.mmap_index_segment()
 52 |         self.load_index()
 53 | 
 54 |     @property
 55 |     def closed(self) -> bool:
 56 |         return self._mm_obj.closed
 57 | 
 58 |     def __len__(self) -> int:
 59 |         """Return the length of both pending and delivered and unacked items ."""
 60 |         return len(self.ready) + len(self.delivered)
 61 | 
 62 |     def create_or_open(self, path) -> Tuple[bool, FileIO]:
 63 |         if not os.path.exists(path):
 64 |             with open(self.path, "wb") as f:
 65 |                 off = self._write_header(f)
 66 |                 f.write((self.max_bytes - off) * b"\0")
 67 |             return True, open(self.path, "r+b", 0)
 68 | 
 69 |         f = open(self.path, "r+b", 0)
 70 |         self.check_file(f)
 71 |         return False, f
 72 | 
 73 |     def _write_header(self, file):
 74 |         version_byte = struct.pack("!HH", *FORMAT_VERSION)
 75 |         return file.write(version_byte + INDEX_FILE_IDENTIFIER)
 76 | 
 77 |     def check_file(self, file):
 78 |         header = file.read(HEADER_SIZE)
 79 |         version = struct.unpack("!HH", header[:4])
 80 |         fid = header[4:]
 81 |         if fid != INDEX_FILE_IDENTIFIER or version != FORMAT_VERSION:
 82 |             file.close()
 83 |             raise Exception("The file is not a daskqueue index_segment.")
 84 | 
 85 |     def mmap_index_segment(self):
 86 |         mm_obj = mmap.mmap(self.file.fileno(), 0)
 87 |         mm_obj.seek(HEADER_SIZE)
 88 |         return mm_obj
 89 | 
 90 |     def load_index(self) -> Dict[UUID, IdxRecord]:
 91 |         assert self._mm_obj.tell() == HEADER_SIZE
 92 |         cur = HEADER_SIZE
 93 |         while cur < self.max_bytes:
 94 |             try:
 95 |                 buffer = self._mm_obj[cur : cur + self.processor.RECORD_SIZE]
 96 |                 if buffer == self.processor.RECORD_SIZE * b"\x00":
 97 |                     raise ValueError("End of file")
 98 | 
 99 |                 idx_record = self.processor.parse_bytes(buffer)
100 |                 cur += self.processor.RECORD_SIZE
101 |                 self.update_index(idx_record)
102 |                 self._mm_obj.seek(cur)
103 |             except ValueError:
104 |                 break
105 |             except AssertionError:
106 |                 break
107 | 
108 |     def update_index(
109 |         self,
110 |         idx_record: IdxRecord,
111 |     ):
112 |         if idx_record.status == MessageStatus.READY:
113 |             self.ready[idx_record.msg_id] = idx_record
114 | 
115 |         elif idx_record.status == MessageStatus.DELIVERED:
116 |             self.ready.pop(idx_record.msg_id, None)
117 |             self.delivered[idx_record.timestamp] = idx_record
118 | 
119 |         elif idx_record.status == MessageStatus.ACKED:
120 |             self.delivered.pop(idx_record.timestamp, None)
121 | 
122 |     def append(
123 |         self,
124 |         msg_id: UUID,
125 |         status: MessageStatus,
126 |         offset: RecordOffset,
127 |         delivered_timestamp: float = None,
128 |     ) -> IdxRecord:
129 |         # Write to disk .index file
130 |         tmstmp = time.time()
131 |         if status == MessageStatus.ACKED:
132 |             idx_record = IdxRecord(msg_id, status, offset, delivered_timestamp)
133 |         else:
134 |             idx_record = IdxRecord(msg_id, status, offset, tmstmp)
135 |         idx_record_bytes = self.processor.serialize_idx_record(idx_record)
136 |         _ = self._mm_obj.write(idx_record_bytes)
137 |         # Update internal mem index
138 |         self.update_index(idx_record)
139 |         return idx_record
140 | 
141 |     def push(self, msg_id: UUID, offset: RecordOffset) -> IdxRecord:
142 |         return self.append(msg_id, MessageStatus.READY, offset)
143 | 
144 |     def pop(self) -> Optional[IdxRecord]:
145 |         try:
146 |             idx_record: IdxRecord = self.ready.popitem(last=False)[1]
147 |             return self.append(
148 |                 idx_record.msg_id, MessageStatus.DELIVERED, idx_record.offset
149 |             )
150 |         except KeyError:
151 |             return None
152 | 
153 |     def drop(msg: Message):
154 |         pass
155 | 
156 |     def ack(self, timestamp: float, msg_id: Message):
157 |         # Pop the delivered messages from the queue
158 |         idx_record = self.delivered.pop(timestamp, default=None)
159 |         if idx_record is None or idx_record.msg_id != msg_id:
160 |             raise ValueError("Msg doesnt exist in the delivered list")
161 | 
162 |         # Update
163 |         return self.append(
164 |             msg_id=msg_id,
165 |             status=MessageStatus.ACKED,
166 |             offset=idx_record.offset,
167 |             delivered_timestamp=timestamp,
168 |         )
169 | 
170 |     # TODO: Compat
171 |     def _compact(self):
172 |         pass
173 | 
174 |     def _background_gc(self):
175 |         while not self.stop_gc_event.is_set():
176 |             now = time.time()
177 |             cutoff = self.delivered.bisect_left(now)
178 |             for t in self.delivered.keys()[:cutoff]:
179 |                 idx_record = self.delivered.pop(t)
180 |                 # TODO : Will retry indefinetly
181 |                 if self.retry:
182 |                     self.ready[idx_record.msg_id] = idx_record
183 |                     self.ready.move_to_end(idx_record.msg_id, last=False)
184 |                 else:
185 |                     # TODO: For now, we ack the failed garbage collected messages
186 |                     self.append(
187 |                         msg_id=idx_record.msg_id,
188 |                         status=MessageStatus.ACKED,
189 |                         offset=idx_record.offset,
190 |                         delivered_timestamp=now,
191 |                     )
192 |             time.sleep(self.ack_timeout)
193 | 
194 |     def parse_name(self, path):
195 |         filename = os.path.basename(path)
196 |         return os.path.splitext(filename)[0]
197 | 
198 |     def close(self) -> bool:
199 |         self.stop_gc_event.set()
200 |         self._mm_obj.flush()
201 |         self.file.flush()
202 |         self._mm_obj.close()
203 |         self.file.close()
204 |         return self.closed
205 | 


--------------------------------------------------------------------------------
/daskqueue/segment/log_record.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import struct
 3 | from binascii import crc32
 4 | from dataclasses import dataclass
 5 | 
 6 | import cloudpickle
 7 | 
 8 | from daskqueue.Protocol import Message
 9 | from daskqueue.segment import FOOTER
10 | 
11 | 
12 | @dataclass
13 | class RecordOffset:
14 |     __slots__ = ["file_no", "offset", "size"]
15 |     file_no: int
16 |     offset: int
17 |     size: int
18 | 
19 |     def pack(self):
20 |         return struct.pack("!III", self.file_no, self.offset, self.size)
21 | 
22 | 
23 | @dataclass
24 | class Record:
25 |     checksum: int
26 |     msg_size: int
27 |     msg: Message
28 |     footer: bytes
29 | 
30 | 
31 | class RecordProcessor:
32 |     def parse_bytes(self, buffer: bytes) -> Record:
33 |         footer = buffer[-len(FOOTER) :]
34 |         checksum_data = buffer[4:-4]
35 |         s = 0
36 |         checksum = struct.unpack("!I", buffer[:4])[0]
37 |         s += 4
38 |         msg_size = struct.unpack("!I", buffer[s : s + 4])[0]
39 |         s += 4
40 | 
41 |         try:
42 |             msg = pickle.loads(buffer[s : s + msg_size])
43 |         except pickle.PicklingError:
44 |             msg = cloudpickle.loads(buffer[s : s + msg_size])
45 | 
46 |         record = Record(
47 |             checksum=checksum,
48 |             msg_size=msg_size,
49 |             msg=msg,
50 |             footer=footer,
51 |         )
52 | 
53 |         if not (footer == FOOTER) or not self._verify_checksum(checksum, checksum_data):
54 |             raise Exception("Corrupt data detected: invalid checksum")
55 | 
56 |         return record
57 | 
58 |     def _verify_checksum(self, retrieved_checksum: int, checksum_data: bytes):
59 |         # key is the bytes of the key,
60 |         return crc32(checksum_data) & 0xFFFFFFFF == retrieved_checksum
61 | 
62 |     def create_record(self, msg: Message):
63 |         msg_bytes = msg.serialize()
64 |         msg_size = struct.pack("!I", len(msg_bytes))
65 |         # CRC covers : checksum(<MSG_SIZE><MSG>)
66 |         data = msg_size + msg_bytes
67 |         checksum = struct.pack("!I", crc32(data) & 0xFFFFFFFF)
68 |         blob = checksum + data + FOOTER
69 |         return blob
70 | 


--------------------------------------------------------------------------------
/daskqueue/segment/log_segment.py:
--------------------------------------------------------------------------------
  1 | import mmap
  2 | import os
  3 | import struct
  4 | from enum import Enum, auto
  5 | 
  6 | from daskqueue.Protocol import Message
  7 | from daskqueue.segment import (
  8 |     FILE_IDENTIFIER,
  9 |     FOOTER,
 10 |     FORMAT_VERSION,
 11 |     HEADER_SIZE,
 12 |     MAX_BYTES,
 13 | )
 14 | 
 15 | from .log_record import RecordOffset, RecordProcessor
 16 | 
 17 | 
 18 | class FullSegment(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | class LogAccess(Enum):
 23 |     RO = auto()  # closed segment
 24 |     RW = auto()  # closed
 25 | 
 26 | 
 27 | class LogSegment:
 28 |     # TODO : construct a header
 29 | 
 30 |     def __init__(self, path: str, status: LogAccess, max_bytes: int = MAX_BYTES):
 31 |         self.path = path
 32 |         self.status = status
 33 |         self.max_bytes = max_bytes
 34 |         self.w_cursor = 0
 35 |         self.offset_range = ()
 36 | 
 37 |         self.name = self.parse_name(path)
 38 | 
 39 |         self.file = self.create_or_open(path)
 40 |         self._mm_obj = self.mmap_segment(status)
 41 | 
 42 |         self.processor = RecordProcessor()
 43 | 
 44 |     def create_or_open(self, path):
 45 |         # File Structure :
 46 |         # Where each block has the following structure:
 47 |         if not os.path.exists(path):
 48 |             with open(self.path, "wb") as f:
 49 |                 off = self._write_header(f)
 50 |                 f.write((self.max_bytes - off) * b"\0")
 51 |             f = open(self.path, "r+b", 0)
 52 |         else:
 53 |             f = open(self.path, "r+b", 0)
 54 |             self.check_file(f)
 55 |         return f
 56 | 
 57 |     def _write_header(self, file):
 58 |         version_byte = struct.pack("!HH", *FORMAT_VERSION)
 59 |         return file.write(version_byte + FILE_IDENTIFIER)
 60 | 
 61 |     def check_file(self, file):
 62 |         header = file.read(HEADER_SIZE)
 63 |         version = struct.unpack("!HH", header[:4])
 64 |         fid = header[4:]
 65 |         if fid != FILE_IDENTIFIER or version != (0, 1):
 66 |             file.close()
 67 |             raise Exception("The file is not the compatible with daskqueue logsegment.")
 68 | 
 69 |     def mmap_segment(self, status):
 70 |         mm_obj = mmap.mmap(self.file.fileno(), 0)
 71 | 
 72 |         self.w_cursor = 8
 73 | 
 74 |         if status == LogAccess.RW:
 75 |             # Seek to the latest write positon
 76 |             last_write = mm_obj.rfind(FOOTER)
 77 |             if last_write > 0:
 78 |                 self.w_cursor = last_write + len(FOOTER)
 79 |                 mm_obj.seek(self.w_cursor)
 80 |             else:
 81 |                 mm_obj.seek(8)
 82 | 
 83 |                 self.w_cursor = 8
 84 |                 mm_obj.seek(8)
 85 | 
 86 |         return mm_obj
 87 | 
 88 |     def append(self, msg: Message) -> RecordOffset:
 89 |         if self.status != LogAccess.RW:
 90 |             raise Exception("Can't append to a closed segment")
 91 | 
 92 |         try:
 93 |             offset = self._mm_obj.tell()
 94 |             record_bytes = self.processor.create_record(msg)
 95 |             n_bytes = self._mm_obj.write(record_bytes)
 96 | 
 97 |             # Update write cursor
 98 |             self.w_cursor += n_bytes
 99 |             return RecordOffset(file_no=int(self.name), offset=offset, size=n_bytes)
100 | 
101 |         except ValueError:
102 |             raise FullSegment("The log segment is full")
103 | 
104 |     def read(self, offset: RecordOffset):
105 |         return self.processor.parse_bytes(
106 |             self._mm_obj[offset.offset : offset.offset + offset.size]
107 |         )
108 | 
109 |     @property
110 |     def closed(self) -> bool:
111 |         if self.status == LogAccess.RO:
112 |             return True
113 |         return self._mm_obj.closed
114 | 
115 |     def archive(self):
116 |         self.status = LogAccess.RO
117 |         return self.w_cursor
118 | 
119 |     def read_only(self) -> int:
120 |         self.status = LogAccess.RO
121 |         return self.w_cursor
122 | 
123 |     def parse_name(self, path):
124 |         filename = os.path.basename(path)
125 |         return os.path.splitext(filename)[0]
126 | 
127 |     def parse_offset():
128 |         pass
129 | 
130 |     def close_file(self):
131 |         self.read_only()
132 |         self._mm_obj.flush()
133 |         self._mm_obj.close()
134 |         self.file.close()
135 | 


--------------------------------------------------------------------------------
/daskqueue/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logger import logger
2 | 


--------------------------------------------------------------------------------
/daskqueue/utils/funcs.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from typing import Generator, Iterable
 3 | 
 4 | from daskqueue.Protocol import Message
 5 | 
 6 | 
 7 | def msg_grouper(n: int, iterable: Iterable, **kwargs) -> Generator:
 8 |     it = iter(iterable)
 9 |     while True:
10 |         chunk = list(itertools.islice(it, n))
11 |         if chunk:
12 |             yield [Message(func, *args, **kwargs) for func, *args in chunk]
13 |         else:
14 |             break
15 | 


--------------------------------------------------------------------------------
/daskqueue/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
 5 | 
 6 | # Create a custom logger
 7 | logger = logging.getLogger(__name__)
 8 | logger.setLevel(LOGLEVEL)
 9 | 
10 | # Create handlers
11 | c_handler = logging.StreamHandler()
12 | # c_handler.setLevel(LOGLEVEL)
13 | c_handler.setLevel(logging.DEBUG)
14 | 
15 | # Create formatters and add it to handlers
16 | c_format = logging.Formatter("%(asctime)s,%(msecs)d %(levelname)s: %(message)s")
17 | c_handler.setFormatter(c_format)
18 | 
19 | # Add handlers to the logger
20 | logger.addHandler(c_handler)
21 | 


--------------------------------------------------------------------------------
/daskqueue/utils/start_cluster.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | from distributed import Client, Queue, LocalCluster
 3 | 
 4 | if __name__ == "__main__":
 5 |     cluster = LocalCluster(
 6 |         dashboard_address=":42166", threads_per_worker=1, n_workers=20
 7 |     )
 8 |     client = Client(cluster)
 9 | 
10 |     print(cluster.scheduler_address)
11 | 


--------------------------------------------------------------------------------
/examples/perf_copy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import asyncio
 4 | import shutil
 5 | from typing import List, Tuple
 6 | import uuid
 7 | 
 8 | import numpy as np
 9 | from distributed import Client, Queue
10 | 
11 | from daskqueue import ConsumerPool, QueuePool, ConsumerBaseClass
12 | from daskqueue.utils import logger
13 | 
14 | 
15 | def get_random_msg(
16 |     start_dir: str, dst_dir: str, l_files: List[str], size: int
17 | ) -> List[Tuple[str, str]]:
18 |     """Generate a random copy list.
19 |     Will randomly choose a file in `start_dir` and create a copy.
20 | 
21 |     Args:
22 |         start_dir (str): start directory
23 |         dst_dir(str): destination directory
24 |         l_files (List[str]): list of paths in directory
25 |         size (int): output size
26 | 
27 |     Returns:
28 |         List[Tuple[str, str]]: _description_
29 |     """
30 |     msg = []
31 |     for _ in range(size):
32 |         idx = np.random.randint(len(l_files))
33 |         src = os.path.join(start_dir, l_files[idx])
34 |         dst = os.path.join(dst_dir, str(uuid.uuid1()))
35 |         msg.append((src, dst))
36 |     return msg
37 | 
38 | 
39 | class CopyWorker(ConsumerBaseClass):
40 |     ## You should always implement a concrete `process_item` where you define your processing code.
41 |     # Take a look at the Implementation Section
42 |     def process_item(self, item):
43 |         src, dst = item
44 |         logger.info(item)
45 |         # shutil.copy(src, dst)
46 | 
47 | 
48 | def main():
49 | 
50 |     client = Client(
51 |         n_workers=5,
52 |         threads_per_worker=1,
53 |         dashboard_address=":3338",
54 |         direct_to_workers=True,
55 |     )
56 | 
57 |     ## Params
58 |     start_dir = "/home/amine/Documents"
59 |     dst_dir = "/home/amine/Documents"
60 |     n_queues = 1
61 |     n_consumers = 2
62 | 
63 |     # Queue Pool  with basic load balancing
64 |     queue_pool = QueuePool(client, n_queues)
65 | 
66 |     # Start Consummers
67 |     consumer_pool = ConsumerPool(client, queue_pool, CopyWorker, n_consumers)
68 |     consumer_pool.start()
69 | 
70 |     # Put copy Msg
71 |     l_files = os.listdir(start_dir)
72 | 
73 |     for _ in range(10):
74 |         msg = get_random_msg(start_dir, dst_dir, l_files, size=1000)
75 |         queue_pool.put_many(msg)
76 | 
77 |     ## Join to stop work
78 |     consumer_pool.join()
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     main()
83 | 


--------------------------------------------------------------------------------
/examples/perf_cpu_bound.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | import time
 3 | import os
 4 | from distributed import Client
 5 | from daskqueue import QueuePool, ConsumerPool
 6 | from daskqueue.utils import logger
 7 | 
 8 | 
 9 | def inc(x):
10 |     return x + 1
11 | 
12 | 
13 | def process_item():
14 |     return sum(i * i for i in range(10**8))
15 | 
16 | 
17 | def slowinc(x, delay):
18 |     time.sleep(delay)
19 |     return x + 1
20 |     # return sum(i * i for i in range(10**8))
21 | 
22 | 
23 | if __name__ == "__main__":
24 | 
25 |     n_queues = 10
26 |     n_consumers = 100
27 |     n_calls = 10000
28 |     delay = 0.01
29 | 
30 |     client = Client(
31 |         n_workers=10,
32 |         threads_per_worker=1,
33 |         dashboard_address=":3338",
34 |         # address="tcp://192.168.1.92:8786",
35 |         direct_to_workers=True,
36 |     )
37 | 
38 |     queue_pool = QueuePool(client, n_queues=n_queues)
39 | 
40 |     consumer_pool = ConsumerPool(
41 |         client,
42 |         queue_pool,
43 |         n_consumers=n_consumers,
44 |         max_concurrency=10000,
45 |     )
46 | 
47 |     tic = time.perf_counter()
48 |     queue_pool.batch_submit([(slowinc, 1, delay) for _ in range(n_calls)])
49 | 
50 |     toc = time.perf_counter()
51 | 
52 |     print(f"Submit all items in  {toc - tic:0.4f} seconds")
53 |     consumer_pool.start(timeout=1)
54 |     consumer_pool.join(timestep=0.001, print_timestep=1, progress=True)
55 | 
56 |     toc = time.perf_counter()
57 | 
58 |     print(queue_pool)
59 |     print(consumer_pool)
60 |     print(f"Processed all items in  {toc - tic:0.4f} seconds")
61 | 


--------------------------------------------------------------------------------
/examples/perf_docs_pages.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import asyncio
 4 | import shutil
 5 | from typing import List, Tuple
 6 | import uuid
 7 | 
 8 | import numpy as np
 9 | from distributed import Client, Queue
10 | 
11 | from daskqueue import ConsumerPool, QueuePool, ConsumerBaseClass
12 | from daskqueue.utils import logger
13 | 
14 | 
15 | def get_random_msg(
16 |     start_dir: str, dst_dir: str, l_files: List[str], size: int
17 | ) -> List[Tuple[str, str]]:
18 |     """Generate a random copy list.
19 |     Will randomly choose a file in `start_dir` and create a copy.
20 | 
21 |     Args:
22 |         start_dir (str): start directory
23 |         dst_dir(str): destination directory
24 |         l_files (List[str]): list of paths in directory
25 |         size (int): output size
26 | 
27 |     Returns:
28 |         List[Tuple[str, str]]: _description_
29 |     """
30 |     msg = []
31 |     for _ in range(size):
32 |         idx = np.random.randint(len(l_files))
33 |         src = os.path.join(start_dir, l_files[idx])
34 |         dst = os.path.join(dst_dir, str(uuid.uuid1()))
35 |         msg.append((src, dst))
36 |     return msg
37 | 
38 | 
39 | class CopyWorker(ConsumerBaseClass):
40 |     ## You should always implement a concrete `process_item` where you define your processing code.
41 |     # Take a look at the Implementation Section
42 |     def process_item(self, item):
43 |         src, dst = item
44 |         logger.info(item)
45 |         # shutil.copy(src, dst)
46 | 
47 | 
48 | def main():
49 | 
50 |     client = Client(
51 |         n_workers=5,
52 |         threads_per_worker=1,
53 |         dashboard_address=":3338",
54 |         direct_to_workers=True,
55 |     )
56 | 
57 |     ## Params
58 |     start_dir = "/home/amine/Documents"
59 |     dst_dir = "/home/amine/Documents"
60 |     n_queues = 1
61 |     n_consumers = 2
62 | 
63 |     # Queue Pool  with basic load balancing
64 |     queue_pool = QueuePool(client, n_queues)
65 | 
66 |     # Start Consummers
67 |     consumer_pool = ConsumerPool(client, queue_pool, CopyWorker, n_consumers)
68 |     consumer_pool.start()
69 | 
70 |     # Put copy Msg
71 |     l_files = os.listdir(start_dir)
72 | 
73 |     for _ in range(10):
74 |         msg = get_random_msg(start_dir, dst_dir, l_files, size=1000)
75 |         queue_pool.put_many(msg)
76 | 
77 |     ## Join to stop work
78 |     consumer_pool.join()
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     main()
83 | 


--------------------------------------------------------------------------------
/examples/perf_io_bound.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | import time
 3 | import pytest
 4 | from distributed import Client
 5 | from daskqueue import QueuePool, ConsumerBaseClass, ConsumerPool
 6 | 
 7 | from daskqueue.utils import logger
 8 | 
 9 | 
10 | def open_file():
11 |     time.sleep(1)
12 |     with open("/dev/urandom", "rb") as f:
13 |         return f.read(100)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     client = Client(
18 |         n_workers=5,
19 |         threads_per_worker=1,
20 |         dashboard_address=":3338",
21 |         direct_to_workers=True,
22 |     )
23 | 
24 |     ## Params
25 |     n_queues = 1
26 |     n_consumers = 2
27 | 
28 |     queue_pool = QueuePool(client, n_queues)
29 | 
30 |     consumer_pool = ConsumerPool(client, queue_pool, n_consumers=n_consumers)
31 | 
32 |     print(queue_pool)
33 |     print(consumer_pool)
34 | 
35 |     consumer_pool.start()
36 | 
37 |     for i in range(5):
38 |         queue_pool.submit(open_file)
39 | 
40 |     consumer_pool.join()
41 | 
42 |     print(queue_pool)
43 |     print(consumer_pool)
44 | 


--------------------------------------------------------------------------------
/figures/benchmark_v0.1.5/submittime_nqueues_v0.1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/benchmark_v0.1.5/submittime_nqueues_v0.1.5.png


--------------------------------------------------------------------------------
/figures/benchmark_v0.1.5/taskduration_workthroughput_v0.1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/benchmark_v0.1.5/taskduration_workthroughput_v0.1.5.png


--------------------------------------------------------------------------------
/figures/benchmark_v0.1.5/workthroughput_taskduration_nqueues_v0.1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/benchmark_v0.1.5/workthroughput_taskduration_nqueues_v0.1.5.png


--------------------------------------------------------------------------------
/figures/benchmark_v0.1.5/worktime_nqueues.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/benchmark_v0.1.5/worktime_nqueues.png


--------------------------------------------------------------------------------
/figures/copy async.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/copy async.PNG


--------------------------------------------------------------------------------
/figures/copy async2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/copy async2.PNG


--------------------------------------------------------------------------------
/figures/copy async3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/figures/copy async3.PNG


--------------------------------------------------------------------------------
/integration/test_integration_durable.py:
--------------------------------------------------------------------------------
  1 | from time import perf_counter
  2 | 
  3 | import click
  4 | 
  5 | from conftest import func
  6 | from daskqueue import ConsumerPool
  7 | from daskqueue.Protocol import Message
  8 | from daskqueue.queue.base_queue import Durability
  9 | from daskqueue.queue.durable_queue import DurableQueue
 10 | from daskqueue.QueuePool import QueuePool
 11 | 
 12 | N = 1_000
 13 | 
 14 | cprint = click.echo
 15 | 
 16 | 
 17 | def func_no_return():
 18 |     return None
 19 | 
 20 | 
 21 | def process_item():
 22 |     return sum(i * i for i in range(10**2))
 23 | 
 24 | 
 25 | def rdx_msg():
 26 |     msg = Message(func, 12)
 27 |     return msg
 28 | 
 29 | 
 30 | def test_durable_queue(durable_queue: DurableQueue):
 31 |     put_msgs = []
 32 |     get_msgs = []
 33 |     s = perf_counter()
 34 |     for _ in range(N):
 35 |         msg = rdx_msg()
 36 |         durable_queue.put_sync(msg)
 37 |         put_msgs.append(msg.id)
 38 | 
 39 |     e = perf_counter()
 40 | 
 41 |     w_ops = N / (e - s)  # op/s
 42 |     cprint("\n\t Mean write ops: " + click.style(f"{w_ops:.2f} wop/s", fg="green"))
 43 | 
 44 |     s = perf_counter()
 45 |     for _ in range(N):
 46 |         msg = durable_queue.get_sync()
 47 |         get_msgs.append(msg.id)
 48 |     e = perf_counter()
 49 |     r_ops = N / (e - s)  # op/s
 50 |     cprint("\n\t Mean read ops: " + click.style(f"{r_ops:.2f} rop/s", fg="green"))
 51 | 
 52 |     assert put_msgs == get_msgs
 53 | 
 54 | 
 55 | def test_durable_queue_gc(durable_queue: DurableQueue):
 56 |     put_msgs = []
 57 |     get_msgs = []
 58 |     s = perf_counter()
 59 |     for _ in range(N):
 60 |         msg = rdx_msg()
 61 |         durable_queue.put_sync(msg)
 62 |         put_msgs.append(msg.id)
 63 | 
 64 |     e = perf_counter()
 65 | 
 66 |     w_ops = N / (e - s)  # op/s
 67 |     cprint("\n\t Mean write ops: " + click.style(f"{w_ops:.2f} wop/s", fg="green"))
 68 | 
 69 |     s = perf_counter()
 70 |     for _ in range(N):
 71 |         msg = durable_queue.get_sync()
 72 |         get_msgs.append(msg.id)
 73 |     e = perf_counter()
 74 |     r_ops = N / (e - s)  # op/s
 75 |     cprint("\n\t Mean read ops: " + click.style(f"{r_ops:.2f} rop/s", fg="green"))
 76 | 
 77 |     assert put_msgs == get_msgs
 78 | 
 79 | 
 80 | def test_durable_queuepool(client, tmp_path):
 81 |     n_queues = 1
 82 |     n_consumers = 2
 83 |     n_calls = 20
 84 | 
 85 |     queue_pool = QueuePool(
 86 |         client, n_queues, durability=Durability.DURABLE, dirpath=str(tmp_path)
 87 |     )
 88 |     consumer_pool = ConsumerPool(client, queue_pool, n_consumers=n_consumers)
 89 | 
 90 |     for _ in range(n_calls):
 91 |         queue_pool.submit(func_no_return)
 92 | 
 93 |     assert n_calls == sum(list(queue_pool.get_queue_size().values()))
 94 | 
 95 |     tic = perf_counter()
 96 |     consumer_pool.start()
 97 |     consumer_pool.join(timestep=0.2, progress=True)
 98 |     toc = perf_counter()
 99 | 
100 |     cprint(
101 |         f"\n\tProcessed all {n_calls} in: "
102 |         + click.style(f"{toc - tic:0.4f} seconds", fg="green")
103 |     )
104 |     res = consumer_pool.results()
105 |     assert n_calls * [None] == [val for k in res for val in res[k].values()]
106 | 


--------------------------------------------------------------------------------
/integration/test_integration_durable_multiplelog.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | 
  3 | from conftest import func
  4 | from daskqueue.Protocol import Message
  5 | from daskqueue.queue.durable_queue import DurableQueue
  6 | 
  7 | N = 1_000
  8 | 
  9 | cprint = click.echo
 10 | 
 11 | 
 12 | def func_no_return():
 13 |     return None
 14 | 
 15 | 
 16 | def process_item():
 17 |     return sum(i * i for i in range(10**2))
 18 | 
 19 | 
 20 | def rdx_msg():
 21 |     msg = Message(func, 12)
 22 |     return msg
 23 | 
 24 | 
 25 | def test_durable_multiple_logsegments(tmp_path):
 26 |     put_msgs = []
 27 |     get_msgs = []
 28 |     index_bytes = 2048
 29 |     log_bytes = 1024
 30 |     durable_queue = DurableQueue(
 31 |         name="queue-0",
 32 |         dirpath=str(tmp_path),
 33 |         index_max_bytes=index_bytes,
 34 |         log_max_bytes=log_bytes,
 35 |     )
 36 | 
 37 |     len_msg = len(rdx_msg().serialize())
 38 |     N = 3 * (log_bytes // len_msg)
 39 | 
 40 |     for _ in range(N):
 41 |         msg = rdx_msg()
 42 |         durable_queue.put_sync(msg)
 43 |         put_msgs.append(msg.id)
 44 | 
 45 |     assert len(durable_queue.ro_segments) == 2
 46 | 
 47 |     for _ in range(N):
 48 |         msg = durable_queue.get_sync()
 49 |         get_msgs.append(msg.id)
 50 | 
 51 |     assert len(durable_queue.index_segment.ready) == 0
 52 |     assert len(durable_queue.index_segment.delivered) == N
 53 |     assert put_msgs == get_msgs
 54 | 
 55 | 
 56 | def test_durable_multiple_logsegments_reopen(tmp_path):
 57 |     put_msgs = []
 58 |     get_msgs = []
 59 |     index_bytes = 2048
 60 |     log_bytes = 1024
 61 |     durable_queue = DurableQueue(
 62 |         name="queue-0",
 63 |         dirpath=str(tmp_path),
 64 |         index_max_bytes=index_bytes,
 65 |         log_max_bytes=log_bytes,
 66 |     )
 67 | 
 68 |     len_msg = len(rdx_msg().serialize())
 69 |     N = 3 * (log_bytes // len_msg)
 70 |     M = N // 2
 71 | 
 72 |     for _ in range(N):
 73 |         msg = rdx_msg()
 74 |         durable_queue.put_sync(msg)
 75 |         put_msgs.append(msg.id)
 76 | 
 77 |     assert durable_queue.qsize() == N
 78 |     assert len(durable_queue.ro_segments) == 2
 79 | 
 80 |     # Pop some elements
 81 |     for i in range(M):
 82 |         msg = durable_queue.get_sync()
 83 |         if i % 4 == 0:
 84 |             durable_queue.ack_sync(msg.delivered_timestamp, msg.id)
 85 |         get_msgs.append(msg.id)
 86 | 
 87 |     ready_prev = durable_queue.index_segment.ready
 88 |     delivered_prev = durable_queue.index_segment.delivered
 89 | 
 90 |     durable_queue.close()
 91 |     # Ropen the queue
 92 |     durable_queue = DurableQueue(
 93 |         name="queue-0",
 94 |         dirpath=str(tmp_path),
 95 |         index_max_bytes=index_bytes,
 96 |         log_max_bytes=log_bytes,
 97 |     )
 98 | 
 99 |     assert durable_queue.index_segment.ready == ready_prev
100 |     assert durable_queue.index_segment.delivered == delivered_prev
101 | 


--------------------------------------------------------------------------------
/integration/test_integration_restart_cluster.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from daskqueue.ConsumerPool import ConsumerPool
 4 | from daskqueue.queue.base_queue import Durability
 5 | from daskqueue.QueuePool import QueuePool
 6 | 
 7 | cprint = click.echo
 8 | 
 9 | 
10 | def func_no_return():
11 |     return None
12 | 
13 | 
14 | def process_item():
15 |     return sum(i * i for i in range(10**2))
16 | 
17 | 
18 | def test_durable_restart_cluster(tmp_path, client):
19 |     n_queues = 1
20 |     n_consumers = 1
21 | 
22 |     queue_pool = QueuePool(
23 |         client, n_queues, durability=Durability.DURABLE, dirpath=str(tmp_path)
24 |     )
25 | 
26 |     for _ in range(10):
27 |         queue_pool.submit(func_no_return)
28 | 
29 |     # Other queue pool
30 |     queue_pool = QueuePool(
31 |         client, n_queues, durability=Durability.DURABLE, dirpath=str(tmp_path)
32 |     )
33 | 
34 |     consumer_pool = ConsumerPool(
35 |         client,
36 |         queue_pool=queue_pool,
37 |         n_consumers=n_consumers,
38 |         batch_size=10,
39 |     )
40 |     consumer_pool.start()
41 |     consumer_pool.join(0.1, progress=True)
42 |     res = consumer_pool.results()
43 | 
44 |     assert 10 * [None] == [val for k in res for val in res[k].values()]
45 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 6.0
3 | addopts = -v
4 | testpaths =
5 |     tests
6 |     integration
7 |     benchmark
8 | 


--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
  1 | -e .
  2 | aiohttp==3.8.1
  3 | aiosignal==1.2.0
  4 | anyio==3.6.1
  5 | appnope==0.1.3
  6 | argon2-cffi==21.3.0
  7 | argon2-cffi-bindings==21.2.0
  8 | asttokens==2.0.5
  9 | async-timeout==4.0.2
 10 | attrs==21.4.0
 11 | Babel==2.10.3
 12 | backcall==0.2.0
 13 | beautifulsoup4==4.11.1
 14 | black==22.6.0
 15 | bleach==5.0.1
 16 | bokeh==2.4.3
 17 | build==0.8.0
 18 | certifi==2022.6.15
 19 | cffi==1.15.1
 20 | cfgv==3.3.1
 21 | charset-normalizer==2.1.0
 22 | click==8.0.4
 23 | cloudpickle==2.1.0
 24 | commonmark==0.9.1
 25 | cryptography==37.0.4
 26 | dask==2022.7.1
 27 | dask-labextension==5.3.0
 28 | debugpy==1.6.2
 29 | decorator==5.1.1
 30 | defusedxml==0.7.1
 31 | distlib==0.3.5
 32 | distributed==2022.7.1
 33 | docutils==0.19
 34 | entrypoints==0.4
 35 | execnet==1.9.0
 36 | executing==0.8.3
 37 | fastjsonschema==2.16.1
 38 | filelock==3.7.1
 39 | flake8==4.0.1
 40 | frozenlist==1.3.0
 41 | fsspec==2022.5.0
 42 | HeapDict==1.0.1
 43 | identify==2.5.2
 44 | idna==3.3
 45 | importlib-metadata==4.12.0
 46 | iniconfig==1.1.1
 47 | ipykernel==6.15.1
 48 | ipython==8.4.0
 49 | ipython-genutils==0.2.0
 50 | jedi==0.18.1
 51 | jeepney==0.8.0
 52 | Jinja2==3.1.2
 53 | json5==0.9.8
 54 | jsonschema==4.7.2
 55 | jupyter-client==7.3.4
 56 | jupyter-core==4.11.1
 57 | jupyter-server==1.18.1
 58 | jupyter-server-proxy==3.2.1
 59 | jupyterlab==3.4.4
 60 | jupyterlab-pygments==0.2.2
 61 | jupyterlab-server==2.15.0
 62 | keyring==23.7.0
 63 | locket==1.0.0
 64 | MarkupSafe==2.1.1
 65 | matplotlib-inline==0.1.3
 66 | mccabe==0.6.1
 67 | mistune==0.8.4
 68 | msgpack==1.0.4
 69 | multidict==6.0.2
 70 | mypy==0.991
 71 | mypy-extensions==0.4.3
 72 | nbclassic==0.4.3
 73 | nbclient==0.6.6
 74 | nbconvert==6.5.0
 75 | nbformat==5.4.0
 76 | nest-asyncio==1.5.5
 77 | nodeenv==1.7.0
 78 | notebook==6.4.12
 79 | notebook-shim==0.1.0
 80 | numpy==1.23.1
 81 | packaging==21.3
 82 | pandas==1.4.3
 83 | pandocfilters==1.5.0
 84 | parso==0.8.3
 85 | partd==1.2.0
 86 | pathspec==0.9.0
 87 | pep517==0.12.0
 88 | pexpect==4.8.0
 89 | pickleshare==0.7.5
 90 | Pillow==9.2.0
 91 | pkginfo==1.8.3
 92 | platformdirs==2.5.2
 93 | pluggy==1.0.0
 94 | pre-commit==2.20.0
 95 | prometheus-client==0.14.1
 96 | prompt-toolkit==3.0.30
 97 | psutil==5.9.1
 98 | ptyprocess==0.7.0
 99 | pure-eval==0.2.2
100 | py==1.11.0
101 | pycodestyle==2.8.0
102 | pycparser==2.21
103 | pyflakes==2.4.0
104 | Pygments==2.12.0
105 | pypandoc==1.8.1
106 | pyparsing==3.0.9
107 | pyrsistent==0.18.1
108 | pytest==7.1.2
109 | pytest-asyncio==0.19.0
110 | pytest-xdist==3.1.0
111 | python-dateutil==2.8.2
112 | pytz==2022.1
113 | PyYAML==6.0
114 | pyzmq==23.2.0
115 | readme-renderer==35.0
116 | requests==2.28.1
117 | requests-toolbelt==0.9.1
118 | rfc3986==2.0.0
119 | rich==12.5.1
120 | SecretStorage==3.3.2
121 | Send2Trash==1.8.0
122 | simpervisor==0.4
123 | six==1.16.0
124 | sniffio==1.2.0
125 | sortedcontainers==2.4.0
126 | soupsieve==2.3.2.post1
127 | stack-data==0.3.0
128 | tblib==1.7.0
129 | terminado==0.15.0
130 | tinycss2==1.1.1
131 | toml==0.10.2
132 | tomli==2.0.1
133 | toolz==0.12.0
134 | tornado==6.1
135 | traitlets==5.3.0
136 | twine==4.0.1
137 | typing_extensions==4.3.0
138 | urllib3==1.26.10
139 | versioneer==0.22
140 | virtualenv==20.16.0
141 | wcwidth==0.2.5
142 | webencodings==0.5.1
143 | websocket-client==1.3.3
144 | yarl==1.7.2
145 | zict==2.2.0
146 | zipp==3.8.1
147 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.8.1
  2 | aiosignal==1.2.0
  3 | anyio==3.6.1
  4 | argon2-cffi==21.3.0
  5 | argon2-cffi-bindings==21.2.0
  6 | asttokens==2.0.5
  7 | async-timeout==4.0.2
  8 | attrs==21.4.0
  9 | Babel==2.10.3
 10 | backcall==0.2.0
 11 | beautifulsoup4==4.11.1
 12 | black==22.6.0
 13 | bleach==5.0.1
 14 | bokeh==2.4.3
 15 | build==0.8.0
 16 | certifi==2022.6.15
 17 | cffi==1.15.1
 18 | cfgv==3.3.1
 19 | charset-normalizer==2.1.0
 20 | click==8.0.4
 21 | cloudpickle==2.1.0
 22 | commonmark==0.9.1
 23 | cryptography==37.0.4
 24 | dask==2022.7.1
 25 | dask-labextension==5.3.0
 26 | debugpy==1.6.2
 27 | decorator==5.1.1
 28 | defusedxml==0.7.1
 29 | distlib==0.3.5
 30 | distributed==2022.7.1
 31 | docutils==0.19
 32 | entrypoints==0.4
 33 | executing==0.8.3
 34 | fastjsonschema==2.16.1
 35 | filelock==3.7.1
 36 | flake8==4.0.1
 37 | frozenlist==1.3.0
 38 | fsspec==2022.5.0
 39 | HeapDict==1.0.1
 40 | identify==2.5.2
 41 | idna==3.3
 42 | importlib-metadata==4.12.0
 43 | iniconfig==1.1.1
 44 | ipykernel==6.15.1
 45 | ipython==8.4.0
 46 | ipython-genutils==0.2.0
 47 | jedi==0.18.1
 48 | jeepney==0.8.0
 49 | Jinja2==3.1.2
 50 | json5==0.9.8
 51 | jsonschema==4.7.2
 52 | jupyter-client==7.3.4
 53 | jupyter-core==4.11.1
 54 | jupyter-server==1.18.1
 55 | jupyter-server-proxy==3.2.1
 56 | jupyterlab==3.4.4
 57 | jupyterlab-pygments==0.2.2
 58 | jupyterlab-server==2.15.0
 59 | keyring==23.7.0
 60 | locket==1.0.0
 61 | MarkupSafe==2.1.1
 62 | matplotlib-inline==0.1.3
 63 | mccabe==0.6.1
 64 | mistune==0.8.4
 65 | msgpack==1.0.4
 66 | multidict==6.0.2
 67 | mypy-extensions==0.4.3
 68 | nbclassic==0.4.3
 69 | nbclient==0.6.6
 70 | nbconvert==6.5.0
 71 | nbformat==5.4.0
 72 | nest-asyncio==1.5.5
 73 | nodeenv==1.7.0
 74 | notebook==6.4.12
 75 | notebook-shim==0.1.0
 76 | numpy==1.23.1
 77 | packaging==21.3
 78 | pandas==1.4.3
 79 | pandocfilters==1.5.0
 80 | parso==0.8.3
 81 | partd==1.2.0
 82 | pathspec==0.9.0
 83 | pep517==0.12.0
 84 | pexpect==4.8.0
 85 | pickleshare==0.7.5
 86 | Pillow==9.2.0
 87 | pkginfo==1.8.3
 88 | platformdirs==2.5.2
 89 | pluggy==1.0.0
 90 | pre-commit==2.20.0
 91 | prometheus-client==0.14.1
 92 | prompt-toolkit==3.0.30
 93 | psutil==5.9.1
 94 | ptyprocess==0.7.0
 95 | pure-eval==0.2.2
 96 | py==1.11.0
 97 | pycodestyle==2.8.0
 98 | pycparser==2.21
 99 | pyflakes==2.4.0
100 | Pygments==2.12.0
101 | pypandoc==1.8.1
102 | pyparsing==3.0.9
103 | pyrsistent==0.18.1
104 | pytest==7.1.2
105 | pytest-asyncio==0.19.0
106 | python-dateutil==2.8.2
107 | pytz==2022.1
108 | PyYAML==6.0
109 | pyzmq==23.2.0
110 | readme-renderer==35.0
111 | requests==2.28.1
112 | requests-toolbelt==0.9.1
113 | rfc3986==2.0.0
114 | rich==12.5.1
115 | SecretStorage==3.3.2
116 | Send2Trash==1.8.0
117 | simpervisor==0.4
118 | six==1.16.0
119 | sniffio==1.2.0
120 | sortedcontainers==2.4.0
121 | soupsieve==2.3.2.post1
122 | stack-data==0.3.0
123 | tblib==1.7.0
124 | terminado==0.15.0
125 | tinycss2==1.1.1
126 | toml==0.10.2
127 | tomli==2.0.1
128 | toolz==0.12.0
129 | tornado==6.1
130 | traitlets==5.3.0
131 | twine==4.0.1
132 | typing_extensions==4.3.0
133 | urllib3==1.26.10
134 | versioneer==0.22
135 | virtualenv==20.16.0
136 | wcwidth==0.2.5
137 | webencodings==0.5.1
138 | websocket-client==1.3.3
139 | yarl==1.7.2
140 | zict==2.2.0
141 | zipp==3.8.1
142 | -e .
143 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = daskqueue/_version.py
5 | versionfile_build = daskqueue/_version.py
6 | tag_prefix =
7 | parentdir_prefix = daskqueue-
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import versioneer
 2 | from setuptools import find_packages, setup
 3 | 
 4 | from os import path
 5 | import os
 6 | 
 7 | # read the contents of your README file
 8 | this_directory = path.abspath(path.dirname(__file__))
 9 | 
10 | description = "daskqueue distributed queue package"
11 | 
12 | long_description = open("README.md").read()
13 | 
14 | 
15 | setup(
16 |     name="daskqueue",
17 |     packages=find_packages(exclude=["*tests*"]),
18 |     version=versioneer.get_version(),
19 |     cmdclass=versioneer.get_cmdclass(),
20 |     license="MIT",
21 |     description=description,
22 |     project_urls={
23 |         "Source": "https://github.com/AmineDiro/daskqueue/",
24 |     },
25 |     author="Amine Dirhoussi",
26 |     maintainer_email="aminedirhoussi1@gmail.com",
27 |     long_description=long_description,
28 |     long_description_content_type="text/markdown",
29 |     keywords=["Distributed Task Queue"],
30 |     install_requires=["numpy", "dask>=2022.7.1", "distributed>=2022.7.1"],
31 |     python_requires=">3.6",
32 |     include_package_data=True,
33 |     classifiers=[
34 |         "Development Status :: 3 - Alpha",
35 |         "Intended Audience :: Developers",
36 |         "Topic :: Software Development :: Build Tools",
37 |         "License :: OSI Approved :: MIT License",
38 |         "Programming Language :: Python :: 3",
39 |         "Programming Language :: Python :: 3.6",
40 |         "Programming Language :: Python :: 3.7",
41 |         "Programming Language :: Python :: 3.8",
42 |         "Programming Language :: Python :: 3.9",
43 |     ],
44 | )
45 | 


--------------------------------------------------------------------------------
/tests/test_consumer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | from re import I
 4 | 
 5 | import pytest
 6 | from distributed import Client
 7 | from distributed.utils_test import gen_cluster
 8 | 
 9 | from daskqueue.Consumer import ConsumerBaseClass, DummyConsumer
10 | from daskqueue.queue.base_queue import Durability
11 | from daskqueue.QueuePool import QueuePool, QueuePoolActor
12 | 
13 | logging.basicConfig(
14 |     level=logging.INFO,
15 |     format="%(asctime)s,%(msecs)d %(levelname)s: %(message)s",
16 |     datefmt="%H:%M:%S",
17 | )
18 | 
19 | 
20 | @gen_cluster(cluster_dump_directory=False)
21 | async def test_async_consumer_create(s, a, b):
22 |     async with Client(s.address, asynchronous=True) as c:
23 |         worker = c.submit(
24 |             DummyConsumer,
25 |             1,
26 |             "test-consumer",
27 |             "test",
28 |             1,
29 |             10000,
30 |             1,
31 |             True,
32 |             workers=[a.address],
33 |             actor=True,
34 |         )
35 |         worker = await worker
36 |         assert hasattr(worker, "get_items")
37 |         assert hasattr(worker, "len_items")
38 |         assert hasattr(worker, "start")
39 |         assert hasattr(worker, "_consume")
40 |         assert hasattr(worker, "cancel")
41 |         assert hasattr(worker, "is_consumming")
42 | 
43 | 
44 | def test_create_consumer_concrete():
45 |     class Worker(ConsumerBaseClass):
46 |         pass
47 | 
48 |     pool = "test"
49 |     with pytest.raises(Exception) as e_info:
50 |         worker = Worker(pool)
51 |         print(f"{e_info}")
52 | 
53 | 
54 | @gen_cluster(
55 |     client=True,
56 |     cluster_dump_directory=False,
57 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
58 | )
59 | async def test_consummer_get_item(c, s, a, b):
60 |     async with Client(s.address, asynchronous=True) as c:
61 |         queue_pool = await c.submit(
62 |             QueuePoolActor, 1, Durability.TRANSIENT, 1, False, actor=True
63 |         )
64 |         await queue_pool.put(1)
65 |         consumer = await c.submit(
66 |             DummyConsumer, 1, "test-consumer", queue_pool, 1, 1000, 1, False, actor=True
67 |         )
68 |         await consumer.start()
69 |         assert await consumer.done() == False
70 | 
71 |         await consumer.cancel()
72 |         assert await consumer.done() == True
73 | 
74 |         n_items = await consumer.len_items()
75 |         assert n_items == 1
76 | 
77 | 
78 | @gen_cluster(
79 |     client=True,
80 |     cluster_dump_directory=False,
81 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
82 | )
83 | async def test_consummer_get_items(c, s, a, b):
84 |     async with Client(s.address, asynchronous=True) as c:
85 |         queue_pool = await c.submit(QueuePoolActor, 1, actor=True)
86 |         await queue_pool.put(1)
87 |         await queue_pool.put(1)
88 |         consumer = await c.submit(
89 |             DummyConsumer, 1, "test-consumer", queue_pool, 1, 1000, 1, True, actor=True
90 |         )
91 |         await consumer.start()
92 |         await consumer.cancel()
93 |         assert await consumer.is_consumming() == False
94 |         assert await consumer.done() == True
95 | 


--------------------------------------------------------------------------------
/tests/test_consumer_pool.py:
--------------------------------------------------------------------------------
  1 | from time import perf_counter
  2 | 
  3 | from distributed.utils_test import cleanup, client, cluster_fixture, gen_cluster, loop
  4 | 
  5 | from conftest import sleep_func
  6 | from daskqueue.ConsumerPool import ConsumerPool
  7 | from daskqueue.QueuePool import QueuePool
  8 | 
  9 | add = lambda x, y: x + y
 10 | func_no_return = lambda: None
 11 | 
 12 | 
 13 | def test_consumer_pool_create(client):
 14 |     n_queues = 1
 15 |     queue_pool = QueuePool(client, n_queues)
 16 |     n_consumers = 2
 17 |     consumer_pool = ConsumerPool(client, queue_pool=queue_pool, n_consumers=n_consumers)
 18 | 
 19 |     assert hasattr(consumer_pool, "start")
 20 |     assert hasattr(consumer_pool, "join")
 21 |     assert hasattr(consumer_pool, "cancel")
 22 |     assert hasattr(consumer_pool, "results")
 23 |     assert hasattr(consumer_pool, "nb_consumed")
 24 | 
 25 |     assert 2 == len(consumer_pool)
 26 | 
 27 | 
 28 | def test_consumer_pool_submit_pure(client):
 29 |     n_queues = 1
 30 |     queue_pool = QueuePool(client, n_queues)
 31 | 
 32 |     n_consumers = 2
 33 |     consumer_pool = ConsumerPool(client, queue_pool=queue_pool, n_consumers=n_consumers)
 34 | 
 35 |     for _ in range(10):
 36 |         queue_pool.submit(add, 1, 1)
 37 | 
 38 |     consumer_pool.start()
 39 |     consumer_pool.join()
 40 |     res = consumer_pool.results()
 41 |     assert 10 * [2] == [val for k in res for val in res[k].values()]
 42 |     assert sum(queue_pool.get_queue_size().values()) <= 0
 43 | 
 44 | 
 45 | def test_consumer_pool_submit_noreturn(client):
 46 |     n_queues = 1
 47 |     queue_pool = QueuePool(client, n_queues)
 48 |     n_consumers = 10
 49 | 
 50 |     consumer_pool = ConsumerPool(
 51 |         client, queue_pool=queue_pool, n_consumers=n_consumers, batch_size=1
 52 |     )
 53 |     for _ in range(10):
 54 |         queue_pool.submit(func_no_return)
 55 | 
 56 |     consumer_pool.start()
 57 |     consumer_pool.join()
 58 |     res = consumer_pool.results()
 59 |     assert 10 * [None] == [val for k in res for val in res[k].values()]
 60 | 
 61 | 
 62 | def test_consumer_pool_ack_late(client):
 63 |     n_queues = 1
 64 |     n_consumers = 1
 65 |     queue_pool = QueuePool(client, n_queues)
 66 | 
 67 |     consumer_pool = ConsumerPool(
 68 |         client,
 69 |         queue_pool=queue_pool,
 70 |         n_consumers=n_consumers,
 71 |         batch_size=1,
 72 |         early_ack=False,
 73 |     )
 74 |     for _ in range(10):
 75 |         queue_pool.submit(func_no_return)
 76 | 
 77 |     consumer_pool.start()
 78 |     consumer_pool.join(0.1)
 79 |     res = consumer_pool.results()
 80 | 
 81 |     assert 10 * [None] == [val for k in res for val in res[k].values()]
 82 | 
 83 | 
 84 | def test_consumer_pool_join(client):
 85 |     n_queues = 1
 86 |     n_consumers = 1
 87 |     queue_pool = QueuePool(client, n_queues)
 88 | 
 89 |     consumer_pool = ConsumerPool(
 90 |         client,
 91 |         queue_pool=queue_pool,
 92 |         n_consumers=n_consumers,
 93 |         batch_size=1,
 94 |         early_ack=False,
 95 |     )
 96 |     queue_pool.submit(sleep_func, 1)
 97 | 
 98 |     s = perf_counter()
 99 |     consumer_pool.start()
100 |     consumer_pool.join(0.1)
101 |     e = perf_counter()
102 |     res = consumer_pool.results()
103 | 
104 |     assert [1] == [val for k in res for val in res[k].values()]
105 |     assert e - s >= 1
106 | 


--------------------------------------------------------------------------------
/tests/test_durable_queue.py:
--------------------------------------------------------------------------------
 1 | from queue import Queue
 2 | 
 3 | from daskqueue.Protocol import Message
 4 | from daskqueue.queue.durable_queue import DurableQueue
 5 | 
 6 | 
 7 | def test_init_durable_queue(tmp_path):
 8 |     queue = DurableQueue(name="queue-0", dirpath=str(tmp_path))
 9 | 
10 |     # segment name
11 |     assert int(queue.active_segment.name) == 0
12 |     assert queue.index_segment.name == "default-queue-0"
13 | 
14 |     # Mem structure
15 |     assert len(queue.ro_segments) == 0
16 |     assert len(queue.index_segment) == 0
17 | 
18 | 
19 | def test_queue_push_sync(durable_queue: DurableQueue, msg: Message):
20 |     rec = durable_queue.put_sync(msg)
21 | 
22 |     assert rec.msg_id == msg.id
23 |     assert len(durable_queue.index_segment) == 1
24 |     assert durable_queue.active_segment.w_cursor == rec.offset.offset + rec.offset.size
25 | 
26 | 
27 | def test_queue_get_sync(durable_queue: DurableQueue, msg: Message):
28 |     [durable_queue.put_sync(msg) for _ in range(4)]
29 |     pop_msg = durable_queue.get_sync()
30 | 
31 |     assert msg.id == pop_msg.id
32 |     assert msg.data() == pop_msg.data()
33 | 


--------------------------------------------------------------------------------
/tests/test_generalconsumer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmineDiro/daskqueue/106fdcfd68db763bbadaa5f95f961552dcc3a219/tests/test_generalconsumer.py


--------------------------------------------------------------------------------
/tests/test_index_segment.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import struct
  4 | import tempfile
  5 | import time
  6 | 
  7 | import pytest
  8 | 
  9 | from conftest import func, index_segment
 10 | from daskqueue.Protocol import Message
 11 | from daskqueue.segment.log_record import RecordOffset
 12 | from daskqueue.segment.log_segment import LogSegment
 13 | 
 14 | logging.basicConfig(
 15 |     level=logging.INFO,
 16 |     format="%(asctime)s,%(msecs)d %(levelname)s: %(message)s",
 17 |     datefmt="%H:%M:%S",
 18 | )
 19 | 
 20 | from daskqueue.segment import FORMAT_VERSION, HEADER_SIZE, INDEX_FILE_IDENTIFIER
 21 | from daskqueue.segment.index_record import IdxRecord, IdxRecordProcessor, MessageStatus
 22 | from daskqueue.segment.index_segment import IndexSegment
 23 | 
 24 | 
 25 | def test_index_segment(tmp_path):
 26 |     name = str(1).rjust(10, "0") + ".index"
 27 |     index_path = tmp_path / name
 28 | 
 29 |     seg = IndexSegment(index_path)
 30 |     assert seg._mm_obj.closed == False
 31 |     assert seg._mm_obj.tell() == 8
 32 | 
 33 | 
 34 | def test_check_index_file(tmpdir):
 35 |     p = tmpdir.join("bad.index")
 36 |     p.write(b"test")
 37 | 
 38 |     with pytest.raises(Exception) as e_info:
 39 |         seg = IndexSegment(p)
 40 | 
 41 |     p = tmpdir.join("good.index")
 42 | 
 43 |     p.write(struct.pack("!HH", *FORMAT_VERSION) + INDEX_FILE_IDENTIFIER)
 44 | 
 45 |     seg = IndexSegment(p)
 46 |     assert seg._mm_obj.tell() == 8
 47 | 
 48 | 
 49 | def test_index_segment_append(msg, log_segment, index_segment):
 50 |     offset: RecordOffset = log_segment.append(msg)
 51 | 
 52 |     assert log_segment.w_cursor == HEADER_SIZE + offset.size
 53 | 
 54 |     # Record the msg to index
 55 |     index_segment.push(msg.id, offset)
 56 |     # idx_record = index_segment.pop(msg.id)
 57 | 
 58 |     assert msg.id in index_segment.ready
 59 |     assert index_segment.ready[msg.id].offset.offset == offset.offset
 60 |     assert index_segment.ready[msg.id].offset.size == offset.size
 61 | 
 62 | 
 63 | def test_index_segment_close(index_segment, msg):
 64 |     index_segment.close()
 65 |     assert index_segment.closed
 66 | 
 67 | 
 68 | def test_index_segment_read(msg, index_segment, log_segment):
 69 |     from conftest import func
 70 | 
 71 |     N = 10
 72 | 
 73 |     for _ in range(N):
 74 |         msg = Message(func, 1)
 75 |         offset = log_segment.append(msg)
 76 |         index_segment.append(msg.id, MessageStatus.READY, offset)
 77 | 
 78 |     index_segment.close()
 79 |     assert len(index_segment) == N
 80 | 
 81 | 
 82 | def test_index_segment_pop(msg, index_segment: IndexSegment, log_segment):
 83 |     from conftest import func
 84 | 
 85 |     N = 10
 86 |     M = 3
 87 |     for _ in range(N):
 88 |         msg = Message(func, 1)
 89 |         offset = log_segment.append(msg)
 90 |         index_segment.append(msg.id, MessageStatus.READY, offset)
 91 | 
 92 |     for _ in range(M):
 93 |         rec = index_segment.pop()
 94 |     assert len(index_segment.delivered) == M
 95 |     assert len(index_segment.ready) == N - M
 96 | 
 97 | 
 98 | def test_index_segment_ack(msg, index_segment: IndexSegment, log_segment):
 99 | 
100 |     N = 10
101 |     for _ in range(N):
102 |         msg = Message(func, 1)
103 |         offset = log_segment.append(msg)
104 |         index_segment.append(msg.id, MessageStatus.READY, offset)
105 | 
106 |     rec = index_segment.pop()
107 |     assert len(index_segment.delivered) == 1
108 | 
109 |     assert index_segment.delivered.keys()[0] == rec.timestamp
110 | 
111 |     delivered_rec = index_segment.ack(rec.timestamp, rec.msg_id)
112 |     assert len(index_segment.delivered) == 0
113 |     assert len(index_segment.ready) == N - 1
114 |     assert delivered_rec.timestamp == rec.timestamp
115 |     assert delivered_rec.status == MessageStatus.ACKED
116 | 
117 | 
118 | def test_index_processor(log_segment, index_segment, msg):
119 |     offset = log_segment.append(msg)
120 |     index_record = index_segment.push(msg.id, offset)
121 |     processor = IdxRecordProcessor()
122 | 
123 |     buffer = processor.serialize_idx_record(index_record)
124 |     index_record_bis = processor.parse_bytes(buffer)
125 |     assert index_record == index_record_bis
126 | 
127 | 
128 | def test_load_index(msg: Message, log_segment: LogSegment):
129 |     with tempfile.TemporaryDirectory() as tmpdirname:
130 |         name = f"default-queue-0.index"
131 |         index_path = os.path.join(tmpdirname, name)
132 |         index_segment = IndexSegment(index_path)
133 | 
134 |         for _ in range(10):
135 |             msg = Message(func, 1)
136 |             offset = log_segment.append(msg)
137 |             index_segment.push(msg.id, offset)
138 | 
139 |         index_segment.close()
140 |         assert index_segment.closed
141 | 
142 |         ready = index_segment.ready
143 |         delivered = index_segment.delivered
144 | 
145 |         index_segment = IndexSegment(index_path)
146 |         assert ready == index_segment.ready
147 |         assert delivered == index_segment.delivered
148 | 
149 | 
150 | def test_index_segment_gc(msg, tmp_path, log_segment: LogSegment):
151 |     name = str(1).rjust(10, "0") + ".index"
152 |     index_path = tmp_path / name
153 | 
154 |     index_segment = IndexSegment(index_path, ack_timeout=0.1, retry=False)
155 |     N = 10
156 |     M = 4
157 | 
158 |     for _ in range(N):
159 |         msg = Message(func, 1)
160 |         offset = log_segment.append(msg)
161 |         index_segment.append(msg.id, MessageStatus.READY, offset)
162 | 
163 |     for _ in range(M):
164 |         _ = index_segment.pop()
165 |     assert len(index_segment.delivered) == M
166 |     assert len(index_segment.ready) == N - M
167 | 
168 |     time.sleep(0.5)
169 |     assert len(index_segment.delivered) == 0
170 |     assert len(index_segment.ready) == N - M
171 | 
172 | 
173 | def test_index_segment_gc_reschedule(msg, tmp_path, log_segment: LogSegment):
174 |     name = str(1).rjust(10, "0") + ".index"
175 |     index_path = tmp_path / name
176 | 
177 |     index_segment = IndexSegment(index_path, ack_timeout=0.1, retry=True)
178 |     N = 2
179 | 
180 |     for _ in range(N):
181 |         msg = Message(func, 1)
182 |         offset = log_segment.append(msg)
183 |         index_segment.append(msg.id, MessageStatus.READY, offset)
184 | 
185 |     for _ in range(N):
186 |         _ = index_segment.pop()
187 | 
188 |     assert len(index_segment.delivered) == N
189 |     assert len(index_segment.ready) == 0
190 | 
191 |     time.sleep(0.2)
192 |     assert len(index_segment.ready) == N
193 |     assert len(index_segment.delivered) == 0
194 | 
195 | 
196 | def test_index_segment_stop_gc(tmp_path):
197 |     name = str(1).rjust(10, "0") + ".index"
198 |     index_path = tmp_path / name
199 | 
200 |     index_segment = IndexSegment(index_path, ack_timeout=0.1, retry=True)
201 | 
202 |     index_segment.stop_gc_event.set()
203 | 
204 |     time.sleep(0.1)
205 | 
206 |     assert index_segment._gc_thread.is_alive() == False
207 | 


--------------------------------------------------------------------------------
/tests/test_log_segment.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import struct
 4 | from re import I
 5 | 
 6 | import pytest
 7 | 
 8 | logging.basicConfig(
 9 |     level=logging.INFO,
10 |     format="%(asctime)s,%(msecs)d %(levelname)s: %(message)s",
11 |     datefmt="%H:%M:%S",
12 | )
13 | 
14 | from daskqueue.segment import FILE_IDENTIFIER, HEADER_SIZE
15 | from daskqueue.segment.log_segment import FullSegment, LogAccess, LogSegment
16 | 
17 | 
18 | def test_logsegment(tmp_path):
19 |     seg_name = str(0).rjust(20, "0") + ".log"
20 |     seg_path = tmp_path / seg_name
21 | 
22 |     seg = LogSegment(seg_path, LogAccess.RW, 1024)
23 |     assert seg._mm_obj.closed == False
24 |     assert seg.w_cursor == 8
25 |     assert seg._mm_obj.tell() == 8
26 | 
27 |     seg_name = str(1).rjust(20, "0") + ".log"
28 |     seg_path = tmp_path / seg_name
29 | 
30 |     seg = LogSegment(seg_path, LogAccess.RO, 1024)
31 |     assert seg.w_cursor == 8
32 | 
33 | 
34 | def test_check_segfile(tmpdir):
35 |     p = tmpdir.join("bad.log")
36 |     p.write(b"test")
37 | 
38 |     with pytest.raises(Exception) as e_info:
39 |         seg = LogSegment(p, LogAccess.RW, 1024)
40 | 
41 |     p = tmpdir.join("good.log")
42 | 
43 |     _FORMAT_VERSION = (0, 1)
44 |     p.write(struct.pack("!HH", *_FORMAT_VERSION) + FILE_IDENTIFIER)
45 | 
46 |     seg = LogSegment(p, LogAccess.RW, 1024)
47 |     assert seg._mm_obj.tell() == 8
48 |     assert seg.w_cursor == 8
49 | 
50 | 
51 | def test_logsegment_append(log_segment, msg):
52 |     offset = log_segment.append(msg)
53 | 
54 |     assert log_segment.w_cursor == HEADER_SIZE + offset.size
55 | 
56 |     # Can't write
57 |     with pytest.raises(FullSegment) as e_info:
58 |         [log_segment.append(msg) for _ in range(1000)]
59 | 
60 |     log_segment.close_file()
61 | 
62 |     with open(log_segment.path, "r+b") as f:
63 |         f.seek(offset.offset)
64 |         blob = f.read(offset.size)
65 |         record = log_segment.processor.parse_bytes(blob)
66 |         assert msg.data() == record.msg.data()
67 |         assert msg.timestamp == record.msg.timestamp
68 | 
69 | 
70 | def test_logsegment_close(log_segment, msg):
71 |     offset = log_segment.append(msg)
72 |     log_segment.close_file()
73 |     assert log_segment.closed
74 | 
75 | 
76 | def test_logseg_reopen(tmpdir, msg):
77 |     p = tmpdir.join("0000.log")
78 | 
79 |     log_segment = LogSegment(p, LogAccess.RW, 1024)
80 |     offset = log_segment.append(msg)
81 |     log_segment.read_only()
82 | 
83 |     assert log_segment.closed
84 | 
85 |     log_segment = LogSegment(p, LogAccess.RW, 1024)
86 | 
87 |     assert log_segment.w_cursor == offset.offset + offset.size
88 | 


--------------------------------------------------------------------------------
/tests/test_protocol.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from conftest import func
 4 | from daskqueue.Protocol import Message
 5 | 
 6 | 
 7 | def test_msg_serialize():
 8 |     msg = Message(func, 33)
 9 | 
10 |     b = msg.serialize()
11 |     msg_bis = pickle.loads(b)
12 | 
13 |     assert msg_bis.data() == msg.data()
14 | 


--------------------------------------------------------------------------------
/tests/test_queue.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from distributed import Client
 3 | from distributed.utils_test import gen_cluster
 4 | 
 5 | from daskqueue import QueuePool, TransientQueue
 6 | from daskqueue.queue.transient_queue import Full
 7 | 
 8 | 
 9 | @gen_cluster(
10 |     client=True,
11 |     cluster_dump_directory=False,
12 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
13 | )
14 | async def test_create_queue(c, s, a, b):
15 |     queue = await c.submit(TransientQueue, actor=True)
16 |     assert hasattr(queue, "qsize")
17 |     assert hasattr(queue, "empty")
18 |     assert hasattr(queue, "full")
19 |     assert hasattr(queue, "put_many")
20 |     assert hasattr(queue, "put")
21 |     assert hasattr(queue, "put_nowait")
22 |     assert hasattr(queue, "put_nowait_batch")
23 |     assert hasattr(queue, "get")
24 |     assert hasattr(queue, "get_nowait")
25 |     assert hasattr(queue, "get_nowait_batch")
26 | 
27 | 
28 | @gen_cluster(
29 |     client=True,
30 |     cluster_dump_directory=False,
31 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
32 | )
33 | async def test_get_from_empty_queue(c, s, a, b):
34 |     queue = await c.submit(TransientQueue, actor=True)
35 |     res = await queue.get(timeout=1)
36 |     assert res == None
37 | 
38 | 
39 | @gen_cluster(
40 |     client=True,
41 |     cluster_dump_directory=False,
42 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
43 | )
44 | async def test_put_in_queue(c, s, a, b):
45 |     queue = await c.submit(TransientQueue, actor=True)
46 |     res = await queue.put(1)
47 |     assert res == None
48 |     assert await queue.qsize() == 1
49 | 
50 | 
51 | @gen_cluster(
52 |     client=True,
53 |     cluster_dump_directory=False,
54 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
55 | )
56 | async def test_put_get_in_queue(c, s, a, b):
57 |     queue = await c.submit(TransientQueue, actor=True)
58 |     await queue.put(1)
59 |     res_get = await queue.get(timeout=1)
60 |     assert res_get == 1
61 | 
62 | 
63 | @gen_cluster(
64 |     client=True,
65 |     cluster_dump_directory=False,
66 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
67 | )
68 | async def test_getnowait_in_queue(c, s, a, b):
69 |     queue = await c.submit(TransientQueue, actor=True)
70 |     await queue.put(1)
71 |     res_get = await queue.get_nowait()
72 |     assert res_get == 1
73 | 
74 | 
75 | @gen_cluster(
76 |     client=True,
77 |     cluster_dump_directory=False,
78 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
79 | )
80 | async def test_put_limit_queue(c, s, a, b):
81 |     queue = await c.submit(TransientQueue, maxsize=1, actor=True)
82 |     _ = await queue.put(1, timeout=1)
83 |     with pytest.raises(Full) as e_info:
84 |         _ = await queue.put(1, timeout=1)
85 |         print(f"{e_info}")
86 | 


--------------------------------------------------------------------------------
/tests/test_queue_pool.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | from multiprocessing import dummy
 3 | 
 4 | import pytest
 5 | from distributed import Client
 6 | from distributed.utils_test import cleanup, client, cluster_fixture, gen_cluster, loop
 7 | 
 8 | from daskqueue.Consumer import ConsumerBaseClass
 9 | from daskqueue.QueuePool import QueuePool, QueuePoolActor
10 | 
11 | 
12 | @gen_cluster(
13 |     client=True,
14 |     cluster_dump_directory=False,
15 |     clean_kwargs={"threads": False, "instances": True, "processes": False},
16 | )
17 | async def test_putmany_queuepool(c, s, a, b):
18 |     n_queues = 2
19 |     queue_pool = await c.submit(QueuePoolActor, n_queues, actor=True)
20 |     res = await queue_pool.put_many([12, "skdfjs", 1213])
21 |     assert None == res
22 | 
23 | 
24 | def test_queue_pool_inteface_create(client):
25 |     n_queues = 2
26 |     queue_pool = QueuePool(client, n_queues)
27 |     assert n_queues == len(queue_pool)
28 |     assert 0 == sum(queue_pool.get_queue_size().values())
29 |     assert queue_pool[0].qsize().result() == 0
30 |     assert queue_pool[1].qsize().result() == 0
31 |     with pytest.raises(IndexError) as e_info:
32 |         _ = queue_pool[3]
33 | 
34 | 
35 | def test_queuepool_inteface_put(client):
36 |     n_queues = 1
37 |     queue_pool = QueuePool(client, n_queues)
38 |     _ = queue_pool.put(1)
39 |     assert 1 == sum(queue_pool.get_queue_size().values())
40 |     _ = queue_pool.put(1)
41 |     _ = queue_pool.put(1)
42 |     assert 3 == sum(queue_pool.get_queue_size().values())
43 | 
44 | 
45 | def test_queuepool_inteface_put_many(client):
46 |     n_queues = 1
47 |     queue_pool = QueuePool(client, n_queues)
48 |     _ = queue_pool.put_many([1, 2, 3])
49 |     assert 3 == sum(queue_pool.get_queue_size().values())
50 | 
51 | 
52 | def test_queuepool_inteface_submit(client):
53 |     n_queues = 1
54 |     queue_pool = QueuePool(client, n_queues)
55 | 
56 |     def dummy_func():
57 |         pass
58 | 
59 |     _ = queue_pool.submit(dummy_func)
60 |     assert 1 == sum(list(queue_pool.get_queue_size().values()))
61 | 
62 |     for _ in range(9):
63 |         _ = queue_pool.submit(dummy_func)
64 |     assert 10 == sum(list(queue_pool.get_queue_size().values()))
65 | 
66 | 
67 | def test_queuepool_inteface_batch_submit(client):
68 |     n_queues = 1
69 |     queue_pool = QueuePool(client, n_queues)
70 | 
71 |     def dummy_func():
72 |         pass
73 | 
74 |     _ = queue_pool.batch_submit([(dummy_func,) for _ in range(10)])
75 |     assert 10 == sum(list(queue_pool.get_queue_size().values()))
76 | 
77 | 
78 | def test_queuepool_inteface_submit_error(client):
79 |     n_queues = 1
80 |     queue_pool = QueuePool(client, n_queues)
81 | 
82 |     def dummy_func():
83 |         pass
84 | 
85 |     class Worker(ConsumerBaseClass):
86 |         pass
87 | 
88 |     with pytest.raises(RuntimeError) as e_info:
89 |         _ = queue_pool.submit(dummy_func, worker_class=Worker)
90 | 


--------------------------------------------------------------------------------