├── pyrallel
    ├── tests
    │   ├── __init__.py
    │   ├── test_map_reduce.py
    │   ├── test_queue.py
    │   └── test_parallel_processor.py
    ├── __version__.py
    ├── __init__.py
    ├── paralleller.py
    ├── map_reduce.py
    ├── parallel_processor.py
    └── queue.py
├── .coveragerc
├── requirements-dev.txt
├── requirements.txt
├── MANIFEST.in
├── docs
    ├── _static
    │   ├── logo.png
    │   ├── logo_barcode.png
    │   └── style.css
    ├── modules.rst
    ├── queue.rst
    ├── map_reduce.rst
    ├── parallel_processor.rst
    ├── index.rst
    ├── Makefile
    ├── installation.rst
    └── conf.py
├── requirements-docs.txt
├── .readthedocs.yml
├── Makefile
├── .github
    └── workflows
    │   ├── deploy.yml
    │   └── tests.yml
├── LICENSE
├── .travis.yml.bak
├── README.rst
├── .gitignore
└── setup.py


/pyrallel/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = pyrallel/tests/*


--------------------------------------------------------------------------------
/pyrallel/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.10'
2 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov<2.6
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | typing>=3.6
2 | multiprocess>=0.70
3 | dill>=0.3
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include VERSION
3 | include requirements.txt


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/usc-isi-i2/pyrallel/HEAD/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/_static/logo_barcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/usc-isi-i2/pyrallel/HEAD/docs/_static/logo_barcode.png


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
 1 | Modules
 2 | =======
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 | 
 7 |    parallel_processor.rst
 8 |    map_reduce.rst
 9 |    queue.rst
10 | 


--------------------------------------------------------------------------------
/docs/queue.rst:
--------------------------------------------------------------------------------
1 | Queue
2 | =====
3 | 
4 | .. automodule:: pyrallel.queue
5 |     :members:
6 |     :special-members:
7 |     :exclude-members: __dict__, __weakref__, __init__
8 | 


--------------------------------------------------------------------------------
/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | alabaster>=0.7.9
3 | Sphinx>=1.5.6
4 | sphinx-autobuild>=0.6.0
5 | sphinxcontrib-napoleon>=0.6.0
6 | nbsphinx>=0.3.4
7 | pandoc>=1.0.2
8 | 


--------------------------------------------------------------------------------
/docs/map_reduce.rst:
--------------------------------------------------------------------------------
1 | MapReduce
2 | =========
3 | 
4 | .. automodule:: pyrallel.map_reduce
5 |     :members:
6 |     :special-members:
7 |     :exclude-members: __dict__, __weakref__, __init__
8 | 


--------------------------------------------------------------------------------
/docs/parallel_processor.rst:
--------------------------------------------------------------------------------
1 | ParallelProcessor
2 | =================
3 | 
4 | .. automodule:: pyrallel.parallel_processor
5 |     :members:
6 |     :special-members:
7 |     :exclude-members: __dict__, __weakref__, __init__
8 | 


--------------------------------------------------------------------------------
/pyrallel/__init__.py:
--------------------------------------------------------------------------------
1 | from pyrallel.queue import *
2 | from pyrallel.paralleller import Paralleller
3 | from pyrallel.parallel_processor import ParallelProcessor, Mapper, ProgressThread
4 | from pyrallel.map_reduce import MapReduce
5 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | sphinx:
 7 |   configuration: docs/conf.py
 8 | 
 9 | python:
10 |   version: 3.8
11 |   install:
12 |     - requirements: requirements-docs.txt
13 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Pyrallel
 2 | ========
 3 | 
 4 | .. include:: ./../README.rst
 5 |       :start-after: begin-intro
 6 |       :end-before: end-intro
 7 | 
 8 | Installation
 9 | ------------
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    installation.rst
15 | 
16 | Modules
17 | -------
18 | 
19 | .. toctree::
20 |    :maxdepth: 3
21 | 
22 |    modules.rst
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: docs
 2 | 
 3 | docs:
 4 | 	@cd docs && make html
 5 | 
 6 | release:
 7 | 	@VERSION=$$(python -c "from pyrallel.__version__ import __version__;print(__version__)") && git tag $$VERSION
 8 | 
 9 | # locate all the files in this directory or below:
10 | FILES=`find . -name '*.py'`
11 | 
12 | # The command for running mypy:
13 | lint:
14 | 	python3 -m mypy $(FILES)
15 | 
16 | # Run the unit tests.
17 | test:
18 | 	python3 -m pytest -s pyrallel/tests/test_map_reduce.py
19 | 	python3 -m pytest -s pyrallel/tests/test_parallel_processor.py
20 | 	python3 -m pytest -s pyrallel/tests/test_queue.py
21 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pyrallel
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - '*'
 6 | jobs:
 7 |   deploy-to-pypi:
 8 |     name: Deploy to pypi
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: checkout code
12 |         uses: actions/checkout@v2
13 |       - name: Set up Python
14 |         uses: actions/setup-python@v2
15 |         with:
16 |           python-version: '3.6'
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install setuptools wheel twine
21 |       - name: Build and publish
22 |         env:
23 |           TWINE_USERNAME: usc_isi_i2_admin
24 |           TWINE_PASSWORD: ${{ secrets.PYPI }}
25 |         run: |
26 |           python setup.py sdist bdist_wheel
27 |           twine upload dist/*
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 USC ISI I2
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on: push
 3 | jobs:
 4 |   run-tests:
 5 |     name: Run pytest
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       matrix:
 9 |         python-version: [3.6, 3.7, 3.8, 3.9]
10 |     steps:
11 |       - name: Checkout code
12 |         uses: actions/checkout@v2
13 |       - name: Set up Python ${{ matrix.python-version }}
14 |         uses: actions/setup-python@v2
15 |         with:
16 |           python-version: ${{ matrix.python-version }}
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install -r requirements.txt
21 |           pip install -r requirements-dev.txt
22 |           pip install -e .
23 |           pip install coverage coveralls
24 |       - name: Test with pytest
25 |         run: |
26 |           python -m pytest -v --color=yes --cov pyrallel pyrallel/tests/test_*
27 |       - name: Coverage
28 |         env:
29 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30 |         run: |
31 |           # coverage run --omit pyrallel/tests/* -m pytest pyrallel/tests/test_*
32 |           # coverage report
33 |           coveralls --service=github
34 | 
35 | 


--------------------------------------------------------------------------------
/pyrallel/paralleller.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class Paralleller(ABC):
 5 |     """
 6 |     Paralleller is an abstract class defines common methods for concrete Parallellers.
 7 |     """
 8 | 
 9 |     @abstractmethod
10 |     def start(self):
11 |         """
12 |         Start processes and / or threads.
13 |         """
14 |         raise NotImplementedError
15 | 
16 |     @abstractmethod
17 |     def add_task(self, *args, **kwargs):
18 |         """
19 |         Add new task.
20 |         """
21 |         raise NotImplementedError
22 | 
23 |     @abstractmethod
24 |     def task_done(self):
25 |         """
26 |         All tasks are added.
27 |         """
28 |         raise NotImplementedError
29 | 
30 |     @abstractmethod
31 |     def join(self):
32 |         """
33 |         Wait until all processes (threads) finish.
34 |         """
35 |         raise NotImplementedError
36 | 
37 |     def map(self, tasks: iter):
38 |         """
39 |         Syntactic sugar for adding task from an iterable object.
40 | 
41 |         Args:
42 |             tasks (iter): Any iterable object.
43 |         """
44 |         for task in tasks:
45 |             self.add_task(task)
46 | 


--------------------------------------------------------------------------------
/docs/_static/style.css:
--------------------------------------------------------------------------------
 1 | @import url("https://fonts.googleapis.com/css?family=Ubuntu+Mono");
 2 | @import url("https://fonts.googleapis.com/css?family=Open+Sans");
 3 | 
 4 | pre, code {
 5 |   font-family: "Ubuntu Mono", "Consolas", "Menlo", "DejaVu Sans Mono", "Bitstream Vera Sans Mono", monospace;
 6 |   font-size: 15px;
 7 | }
 8 | 
 9 | h1, h2, h3, h4, h5, h6, p.admonition-title, div.sphinxsidebar input, body {
10 |   font-family: "Open Sans", "Helvetica", "Arial", sans-serif;
11 | }
12 | 
13 | div.sphinxsidebar ul li.toctree-l1 > a {
14 |     font-size: 100%;
15 | }
16 | 
17 | div.sphinxsidebar ul li.toctree-l2 > a {
18 |     font-size: 100%;
19 | }
20 | 
21 | div.sphinxsidebar ul li.toctree-l3 > a {
22 |     font-size: 100%;
23 | }
24 | 
25 | div.body {
26 |     max-width: 100%; /* overwrite basic.css */
27 | }
28 | 
29 | table.dataframe {
30 |     border-collapse: collapse;
31 |     /*width: 100%;*/
32 | }
33 | 
34 | table.dataframe th, table.dataframe td {
35 |     text-align: left;
36 |     padding: 8px;
37 | }
38 | 
39 | table.dataframe tr:nth-child(even) {
40 |     background-color: #f2f2f2;
41 | }
42 | 
43 | blockquote {
44 |     border-left: 5px solid #eeeeee;
45 |     padding: 10px 20px;
46 | }
47 | 
48 | div.sphinxsidebarwrapper p.logo {
49 |     margin-bottom: 30px;
50 | }
51 | 


--------------------------------------------------------------------------------
/.travis.yml.bak:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - '3.9'
 5 |   - '3.8'
 6 |   - '3.7'
 7 |   - '3.6'
 8 |   - '3.5'
 9 | 
10 | install:
11 |   - pip install -r requirements.txt
12 |   - pip install -r requirements-dev.txt
13 |   - pip install -e .
14 |   - pip install coveralls
15 | 
16 | script:
17 |   - py.test -v --color=yes --cov pyrallel
18 | 
19 | after_success:
20 |   - coveralls
21 | 
22 | notifications:
23 |   email: false
24 | 
25 | deploy:
26 |   - provider: pypi
27 |     user: usc_isi_i2_admin
28 |     password:
29 |       secure: QwVcmGEN4dJN1vi7HM0E4ZIgGM/kCKWJ323AoXDUtSwbZIUYX5sZsNoh+buJzfJR94geZqckf9ABSk22cazuXzrDBUeh73sOReILcCciEMxYWkrBDrvtr2rBBq2GOC8B8Xc3BzNcZGG1pVhoNFjr6/Co0rOIn6JmxPRBLjCoyT33bQGHchuXPbozhMNDtG1+p+j5+lrGZetdD6sSl8O3BCOkJtfor50LvgxLoYcqcOd6jj9DgY9r6fo7if43xESj07UfneZ+Eo+xVQ9NRsItD4sc2toC5wPcdggqVQ+cy/mc9A3SAbD/Y36Jz6RX1hM5LrnPEDRi/URlmBriwf59VygwSXaypfaex8aEsx5W7CPuexNbRg/qWojoZASE9GzQcAw6aamWIzJy6EOvKI5NmGzVLDUqU3U5Ow/7vLhb/iQi+09Du+5bPSmR2qZvO+uyIHjW8ZGgZQ7Q4uldokYjwca8jUWvx5HrT0B1gJPjV0PjNFrwVuA40TvodoHQJ+Ief0cE9ALsBH6VLEAduC+lWOV1B/bvEBGUgAgD2l1Gb7QKpq1bl1izoGB6H3V0qqXnHdIstMC+0rYwD19FDXPxThrIpuwtsx2KsgXbeFL/wpcABd40rVgLRt6Ol/dBR60DzAGn+L5fV9ahPZY/UNYZwlblPbDsZDP4kUDXYrhW6T4=
30 |     on:
31 |       tags: true
32 |       condition: $TRAVIS_PYTHON_VERSION = 3.6
33 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Pyrallel
 2 | ========
 3 | 
 4 | .. begin-intro
 5 | 
 6 | Yet another easy-to-use python parallel library for humans.
 7 | 
 8 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg
 9 |     :target: https://raw.githubusercontent.com/usc-isi-i2/pyrallel/master/LICENSE
10 |     :alt: License
11 | 
12 | .. image:: https://github.com/usc-isi-i2/pyrallel/workflows/Tests/badge.svg?branch=master
13 |     :target: https://github.com/usc-isi-i2/pyrallel/actions
14 |     :alt: Github actions
15 | 
16 | .. image:: https://coveralls.io/repos/github/usc-isi-i2/pyrallel/badge.svg?branch=master
17 |     :target: https://coveralls.io/github/usc-isi-i2/pyrallel?branch=master
18 |     :alt: Coveralls
19 | 
20 | .. image:: https://badge.fury.io/py/pyrallel.lib.svg
21 |     :target: https://badge.fury.io/py/pyrallel.lib
22 |     :alt: pypi
23 | 
24 | .. image:: https://readthedocs.org/projects/pyrallel/badge/?version=latest
25 |     :target: http://pyrallel.readthedocs.io/en/latest
26 |     :alt: Documents
27 | 
28 | - ParallelProcessor: Newbie-friendly process-based parallel computing api.
29 | - MapReduce: Ultimately simple map and reduce computing model.
30 | - ShmQueue: Extremely fast shared memory driven general purpose multiprocessing queue.
31 | 
32 | .. end-intro
33 | 
34 | Installation
35 | ------------
36 | ::
37 | 
38 |     pip install pyrallel.lib
39 | 
40 | 
41 | Documentation
42 | -------------
43 | 
44 | `Read the Doc <http://pyrallel.readthedocs.io>`_
45 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | .. note::
 5 | 
 6 |     Pyrallel supports Python 3 and it's tested under Python 3.4+.
 7 | 
 8 | pip
 9 | ----
10 | 
11 | Using pip to install::
12 | 
13 |     pip install pyrallel.lib
14 | 
15 | If you want to update installed Pyrallel::
16 | 
17 |     pip install -U pyrallel.lib
18 | 
19 | Development installation
20 | ------------------------
21 | 
22 | Install from source
23 | ```````````````````
24 | 
25 | ::
26 | 
27 |     git clone https://github.com/usc-isi-i2/pyrallel.git
28 |     cd paraly
29 | 
30 |     virtualenv pyrallel_env
31 |     source activate pyrallel_env
32 |     pip install -r requirements.txt
33 |     pip install -r requreiments-dev.txt
34 |     pip install -e .
35 | 
36 | Run tests
37 | `````````
38 | 
39 | Pyrallel uses `pytest <https://pytest.org/>`_ for unit tests. To run them, simply do following command from the root of Pyrallel package::
40 | 
41 |     pytest
42 | 
43 | If you need more detailed information, do::
44 | 
45 |     pytest -v --color=yes
46 | 
47 | Build documentation
48 | -------------------
49 | 
50 | Additional dependencies for building documentation should be installed first::
51 | 
52 |     pip install -r requirements-docs.txt
53 | 
54 | Documentation is powered by `Sphinx <http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_ , to generate it on your local, please run::
55 | 
56 |     cd docs
57 |     make html # the generated doc is located at _build/html/index.html
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .idea/
107 | local/


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from distutils.core import setup
 3 | from setuptools import find_packages
 4 | 
 5 | 
 6 | with open('README.rst', 'r') as fh:
 7 |     long_description = fh.read()
 8 | 
 9 | with open('requirements.txt', 'r') as f:
10 |     install_requires = list()
11 |     dependency_links = list()
12 |     for line in f:
13 |         re = line.strip()
14 |         if re:
15 |             if re.startswith('git+') or re.startswith('svn+') or re.startswith('hg+'):
16 |                 dependency_links.append(re)
17 |             else:
18 |                 install_requires.append(re)
19 | 
20 | about = {}
21 | with open(
22 |         os.path.join(os.path.abspath(os.path.dirname(__file__)), 'pyrallel', '__version__.py'),
23 |         'r', encoding='utf-8') as f:
24 |     exec(f.read(), about)
25 | 
26 | packages = find_packages()
27 | 
28 | setup(
29 |     name='pyrallel.lib',
30 |     version=about['__version__'],
31 |     packages=packages,
32 |     url='https://github.com/usc-isi-i2/pyrallel',
33 |     project_urls={
34 |         "Bug Tracker": "https://github.com/usc-isi-i2/pyrallel/issues",
35 |         "Documentation": "https://pyrallel.readthedocs.io",
36 |         "Source Code": "https://github.com/usc-isi-i2/pyrallel",
37 |     },
38 |     license='MIT',
39 |     author='USC/ISI',
40 |     author_email='yixiangy@isi.edu',
41 |     description='Yet another easy-to-use python3 parallel library for humans.',
42 |     long_description=long_description,
43 |     long_description_content_type='text/x-rst',
44 |     include_package_data=True,
45 |     install_requires=install_requires,
46 |     dependency_links=dependency_links,
47 |     classifiers=(
48 |         "Programming Language :: Python :: 3",
49 |         "Natural Language :: English",
50 |         "License :: OSI Approved :: MIT License",
51 |         "Operating System :: OS Independent",
52 |         "Topic :: Software Development :: Libraries",
53 |         "Topic :: Software Development :: Libraries :: Python Modules",
54 |     )
55 | )
56 | 


--------------------------------------------------------------------------------
/pyrallel/tests/test_map_reduce.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | 
 3 | from pyrallel.map_reduce import MapReduce
 4 | 
 5 | 
 6 | NUM_OF_PROCESSOR = max(2, int(mp.cpu_count() / 2))
 7 | 
 8 | 
 9 | def test_map_reduce_number():
10 | 
11 |     def mapper(x):
12 |         return x
13 | 
14 |     def reducer(r1, r2):
15 |         return r1 + r2
16 | 
17 |     mr = MapReduce(3, mapper, reducer)
18 |     mr.start()
19 |     mr.add_task(1)
20 |     mr.task_done()
21 |     assert mr.join() == 1
22 | 
23 |     mr = MapReduce(NUM_OF_PROCESSOR, mapper, reducer)
24 |     mr.start()
25 |     mr.add_task(1)
26 |     mr.task_done()
27 |     assert mr.join() == 1
28 | 
29 |     mr = MapReduce(1, mapper, reducer)
30 |     mr.start()
31 |     for i in range(1, 101):
32 |         mr.add_task(i)
33 |     mr.task_done()
34 |     assert mr.join() == 5050
35 | 
36 |     mr = MapReduce(NUM_OF_PROCESSOR, mapper, reducer)
37 |     mr.start()
38 |     for i in range(1, 101):
39 |         mr.add_task(i)
40 |     mr.task_done()
41 |     assert mr.join() == 5050
42 | 
43 |     mr = MapReduce(NUM_OF_PROCESSOR, mapper, reducer)
44 |     mr.start()
45 |     for i in range(1, 100001):
46 |         mr.add_task(i)
47 |     mr.task_done()
48 |     assert mr.join() == 5000050000
49 | 
50 | 
51 | def test_map_reduce_object():
52 | 
53 |     def mapper(k, v):
54 |         return {k: v}
55 | 
56 |     def reducer(r1, r2):
57 |         for k1, v1 in r1.items():
58 |             if k1 in r2:
59 |                 r2[k1] += v1
60 |             else:
61 |                 r2[k1] = v1
62 |         return r2
63 | 
64 |     mr = MapReduce(1, mapper, reducer)
65 |     mr.start()
66 |     for i in range(100):
67 |         if i % 2 == 0:
68 |             mr.add_task('a', i)
69 |         else:
70 |             mr.add_task('b', i)
71 |     mr.task_done()
72 |     assert mr.join() == {'a': 2450, 'b': 2500}
73 | 
74 |     mr = MapReduce(NUM_OF_PROCESSOR, mapper, reducer)
75 |     mr.start()
76 |     for i in range(100):
77 |         if i % 2 == 0:
78 |             mr.add_task('a', i)
79 |         else:
80 |             mr.add_task('b', i)
81 |     mr.task_done()
82 |     assert mr.join() == {'a': 2450, 'b': 2500}
83 | 


--------------------------------------------------------------------------------
/pyrallel/tests/test_queue.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | import queue
 3 | import pyrallel
 4 | import os
 5 | 
 6 | 
 7 | # 30 bytes each
 8 | # CONTENT = os.urandom(30)
 9 | CONTENT = b'\xaa' * 10 + b'\xbb' * 10 + b'\xcc' * 10
10 | 
11 | 
12 | def sender(sq):
13 |     for _ in range(10):
14 |         sq.put(CONTENT)
15 | 
16 | 
17 | def receiver(sq, q):
18 |     try:
19 |         while True:
20 |             content = sq.get(timeout=2)
21 |             q.put(content)
22 |     except queue.Empty:
23 |         return
24 | 
25 | 
26 | class DummySerializer(object):
27 |     def dumps(self, o):
28 |         return o
29 | 
30 |     def loads(self, d):
31 |         return d
32 | 
33 | 
34 | def test_shmqueue():
35 |     if not hasattr(pyrallel, 'ShmQueue'):
36 |         return
37 | 
38 |     params = [  # chunk size, maxsize
39 |         [50, 100],  # chunk size > content, maxsize is enough
40 |         [10, 100],  # chunk size < content, maxsize is enough
41 |         # [50, 1],  # chunk size > content, maxsize is limited
42 |         # [10, 1],  # chunk size < content, maxsize is limited
43 |     ]
44 | 
45 |     for mode in ['fork', 'spawn']:
46 |         mp.set_start_method(mode, force=True)
47 |         ShmQueueCls = getattr(pyrallel, 'ShmQueue')
48 |         for param in params:
49 |             sq = ShmQueueCls(chunk_size=param[0], maxsize=param[1], serializer=DummySerializer())
50 |             q = mp.Queue()
51 |             # 3 senders and 2 receivers
52 |             # each sender process add 10 content, in total 30 * 10 = 300 bytes
53 |             p_senders = [mp.Process(target=sender, args=(sq,)) for _ in range(3)]
54 |             p_receivers = [mp.Process(target=receiver, args=(sq, q)) for _ in range(2)]
55 | 
56 |             for p in p_senders:
57 |                 p.start()
58 |             for p in p_receivers:
59 |                 p.start()
60 | 
61 |             for p in p_senders:
62 |                 p.join()
63 |             for p in p_receivers:
64 |                 p.join()
65 |             sq.close()
66 | 
67 |             total_put = 30  # there should be in total 30 elements
68 |             while True:
69 |                 try:
70 |                     r = q.get(timeout=2)
71 |                     total_put -= 1
72 |                     assert r == CONTENT
73 |                 except queue.Empty:
74 |                     break
75 | 
76 |             assert total_put == 0
77 | 


--------------------------------------------------------------------------------
/pyrallel/tests/test_parallel_processor.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import multiprocessing as mp
  3 | 
  4 | from pyrallel.parallel_processor import ParallelProcessor, Mapper
  5 | 
  6 | 
  7 | NUM_OF_PROCESSOR = max(2, int(mp.cpu_count() / 2))
  8 | 
  9 | 
 10 | def test_basic():
 11 |     def dummy_computation():
 12 |         time.sleep(0.0001)
 13 | 
 14 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation)
 15 |     pp.start()
 16 | 
 17 |     for i in range(1000):
 18 |         pp.add_task()
 19 | 
 20 |     pp.task_done()
 21 |     pp.join()
 22 | 
 23 |     class MyMapper(Mapper):
 24 |         def enter(self):
 25 |             self.i = 0
 26 | 
 27 |         def process(self):
 28 |             dummy_computation()
 29 |             self.i += 1
 30 | 
 31 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, MyMapper)
 32 |     pp.start()
 33 | 
 34 |     for i in range(1000):
 35 |         pp.add_task()
 36 | 
 37 |     pp.task_done()
 38 |     pp.join()
 39 | 
 40 | 
 41 | def test_with_input():
 42 |     def dummy_computation_with_input(x, _idx):
 43 |         time.sleep(0.0001)
 44 | 
 45 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation_with_input, enable_process_id=True)
 46 |     pp.start()
 47 | 
 48 |     for i in range(1000):
 49 |         pp.add_task(i)
 50 | 
 51 |     pp.map(range(1000))
 52 | 
 53 |     pp.task_done()
 54 |     pp.join()
 55 | 
 56 |     class MyMapper(Mapper):
 57 |         def process(self, x):
 58 |             dummy_computation_with_input(x, _idx=self._idx)
 59 | 
 60 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, MyMapper)
 61 |     pp.start()
 62 | 
 63 |     for i in range(1000):
 64 |         pp.add_task(i)
 65 | 
 66 |     pp.task_done()
 67 |     pp.join()
 68 | 
 69 | 
 70 | def test_with_multiple_input():
 71 |     def dummy_computation_with_input(x, y):
 72 |         assert x * 2 == y
 73 |         time.sleep(0.0001)
 74 | 
 75 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation_with_input)
 76 |     pp.start()
 77 | 
 78 |     for i in range(1000):
 79 |         pp.add_task(i, y=i*2)
 80 | 
 81 |     pp.map([(i, i*2) for i in range(1000)])
 82 | 
 83 |     pp.task_done()
 84 |     pp.join()
 85 | 
 86 | 
 87 | def test_with_output():
 88 |     result = []
 89 | 
 90 |     def dummy_computation_with_input(x):
 91 |         time.sleep(0.0001)
 92 |         return x * x
 93 | 
 94 |     def collector(r):
 95 |         result.append(r)
 96 | 
 97 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation_with_input, collector=collector)
 98 |     pp.start()
 99 | 
100 |     for i in range(8):
101 |         pp.add_task(i)
102 | 
103 |     pp.task_done()
104 |     pp.join()
105 | 
106 |     for i in [0, 1, 4, 9, 16, 25, 36, 49]:
107 |         assert i in result
108 | 
109 | 
110 | def test_with_multiple_output():
111 |     result = []
112 | 
113 |     def dummy_computation_with_input(x):
114 |         time.sleep(0.0001)
115 |         return x * x, x * x
116 | 
117 |     def collector(r1, r2):
118 |         result.append(r1)
119 | 
120 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation_with_input, collector=collector)
121 |     pp.start()
122 | 
123 |     for i in range(8):
124 |         pp.add_task(i)
125 | 
126 |     pp.task_done()
127 |     pp.join()
128 | 
129 |     for i in [0, 1, 4, 9, 16, 25, 36, 49]:
130 |         assert i in result
131 | 
132 | 
133 | def test_with_progress():
134 |     def dummy_computation():
135 |         time.sleep(0.0001)
136 | 
137 |     def progress(p):
138 |         assert p['total'] >= p['added'] >= p['loaded'] >= p['processed']
139 | 
140 |     pp = ParallelProcessor(NUM_OF_PROCESSOR, dummy_computation, progress=progress, progress_total=10)
141 |     pp.start()
142 |     for i in range(10):
143 |         pp.add_task()
144 |     pp.task_done()
145 |     pp.join()
146 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # rltk documentation build configuration file, created by
  4 | # sphinx-quickstart on Thu Feb 23 13:46:31 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | import datetime
 22 | sys.path.insert(0, os.path.abspath('../pyrallel'))
 23 | sys.path.insert(0, os.path.abspath('../'))
 24 | 
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = ['sphinx.ext.autodoc', 'sphinxcontrib.napoleon', 'nbsphinx']
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | #
 43 | source_suffix = '.rst'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = 'Pyrallel'
 50 | copyright = '{}, USC/ISI'.format(datetime.datetime.now().year)
 51 | author = 'USC/ISI'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | from pyrallel.__version__ import __version__
 58 | # The short X.Y version.
 59 | version = '.'.join(__version__.split('.')[:2])
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = __version__
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This patterns also effect to html_static_path and html_extra_path
 73 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = 'sphinx'
 77 | 
 78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 79 | todo_include_todos = False
 80 | 
 81 | 
 82 | # -- Options for HTML output ----------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = 'alabaster' # default, alabaster, pyramid, bizstyle
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a theme
 90 | # further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | html_theme_options = {
 94 |     'logo': 'logo.png',
 95 |     'page_width': '1380px',
 96 |     'sidebar_width': '220px',
 97 |     'github_user': 'usc-isi-i2',
 98 |     'github_repo': 'pyrallel',
 99 |     'github_banner': 'true',
100 |     'github_type': 'star',
101 |     'extra_nav_links': {
102 |         'pyrallel @ GitHub': 'https://github.com/usc-isi-i2/pyrallel',
103 |         'pyrallel @ PyPI': 'https://pypi.org/project/pyrallel',
104 |         'Issue Tracker': 'https://github.com/usc-isi-i2/pyrallel/issues',
105 |         'USC/ISI CKG': 'http://usc-isi-i2.github.io/'
106 |     },
107 |     'show_powered_by': False
108 | }
109 | 
110 | html_show_sourcelink = False
111 | 
112 | html_sidebars = {
113 |     '**': [
114 |         'about.html',
115 |         'localtoc.html',
116 |         'navigation.html',
117 |         # 'relations.html',
118 |         'searchbox.html',
119 |         # 'donate.html',
120 |     ]
121 | }
122 | 
123 | # Add any paths that contain custom static files (such as style sheets) here,
124 | # relative to this directory. They are copied after the builtin static files,
125 | # so a file named "default.css" will overwrite the builtin "default.css".
126 | html_static_path = ['_static']
127 | 
128 | 
129 | # -- Options for HTMLHelp output ------------------------------------------
130 | 
131 | # Output file base name for HTML help builder.
132 | htmlhelp_basename = 'pyralleldoc'
133 | 
134 | 
135 | # -- Options for LaTeX output ---------------------------------------------
136 | 
137 | latex_elements = {
138 |     # The paper size ('letterpaper' or 'a4paper').
139 |     #
140 |     # 'papersize': 'letterpaper',
141 | 
142 |     # The font size ('10pt', '11pt' or '12pt').
143 |     #
144 |     # 'pointsize': '10pt',
145 | 
146 |     # Additional stuff for the LaTeX preamble.
147 |     #
148 |     # 'preamble': '',
149 | 
150 |     # Latex figure (float) alignment
151 |     #
152 |     # 'figure_align': 'htbp',
153 | }
154 | 
155 | # Grouping the document tree into LaTeX files. List of tuples
156 | # (source start file, target name, title,
157 | #  author, documentclass [howto, manual, or own class]).
158 | latex_documents = [
159 |     (master_doc, 'pyrallel.tex', 'Pyrallel Documentation',
160 |      u'USC/ISI', 'manual'),
161 | ]
162 | 
163 | 
164 | # -- Options for manual page output ---------------------------------------
165 | 
166 | # One entry per manual page. List of tuples
167 | # (source start file, name, description, authors, manual section).
168 | man_pages = [
169 |     (master_doc, 'Pyrallel', 'Pyrallel Documentation',
170 |      [author], 1)
171 | ]
172 | 
173 | 
174 | # -- Options for Texinfo output -------------------------------------------
175 | 
176 | # Grouping the document tree into Texinfo files. List of tuples
177 | # (source start file, target name, title, author,
178 | #  dir menu entry, description, category)
179 | texinfo_documents = [
180 |     (master_doc, 'Pyrallel', 'Pyrallel Documentation',
181 |      author, 'Pyrallel', 'Yet another easy-to-use python3 parallel library for humans.',
182 |      'Miscellaneous'),
183 | ]
184 | 
185 | 
186 | def setup(app):
187 |     app.add_stylesheet('style.css')


--------------------------------------------------------------------------------
/pyrallel/map_reduce.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a multiprocessing-based map reduce computing model.
  3 | 
  4 | It's different from normal MapReduce model:
  5 | 
  6 | - Manager fires up mapper and reducer processes simultaneously: Output of mapper is identical to reducer, \
  7 |     so reducers don't need to wait until all mappers finish.
  8 | - Data can be passed to mapper gradually: Mappers are waiting to consume data until user tells them no more new data \
  9 |     will be added.
 10 | - Reducing is not between two mapper's output (though the api to user is as this) \
 11 |     but output and context: Data pickling (serialization) and unpickling \
 12 |     (unserialization) for IPC are time consuming. As an alternation, each reducer process holds a context \
 13 |     which aggregates output in reducing step. \
 14 |     Once all output is reduced, reducing will be among contexts.
 15 | - It doesn't support shuffling and reduce-by-key.
 16 | 
 17 | Example::
 18 | 
 19 |     def mapper(x):
 20 |         time.sleep(0.0001)
 21 |         return x
 22 | 
 23 |     def reducer(r1, r2):
 24 |         return r1 + r2
 25 | 
 26 |     mr = MapReduce(8, mapper, reducer)
 27 |     mr.start()
 28 | 
 29 |     for i in range(10000):
 30 |         mr.add_task(i)
 31 | 
 32 |     mr.task_done()
 33 |     result = mr.join()
 34 | 
 35 |     print(result)
 36 | 
 37 | """
 38 | __all__ = ['MapReduce']
 39 | 
 40 | import multiprocess as mp
 41 | import multiprocess.queues as mpq
 42 | import queue
 43 | from typing import Callable
 44 | import sys
 45 | import logging
 46 | import uuid
 47 | import pickle
 48 | import math
 49 | 
 50 | from pyrallel import Paralleller
 51 | 
 52 | 
 53 | logger = logging.getLogger('MapReduce')
 54 | logger.setLevel(logging.ERROR)
 55 | stdout_handler = logging.StreamHandler(sys.stdout)
 56 | stdout_handler.setFormatter(logging.Formatter('%(asctime)-15s %(name)s [%(levelname)s] %(message)s'))
 57 | logger.addHandler(stdout_handler)
 58 | 
 59 | 
 60 | class ChunkedQueue(mpq.Queue):
 61 |     CHUNK_SIZE = 512 * 1024 * 1024
 62 | 
 63 |     def __init__(self, *args, **kwargs):
 64 |         ctx = mp.get_context()
 65 |         super().__init__(*args, **kwargs, ctx=ctx)
 66 |         self.buff = {}
 67 | 
 68 |     def put(self, obj, block=True, timeout=None):
 69 |         if not block:
 70 |             return super().put(obj=obj, block=False, timeout=timeout)
 71 | 
 72 |         chunk_size = self.__class__.CHUNK_SIZE
 73 |         msg_id = uuid.uuid4()
 74 |         msg_bytes = pickle.dumps(obj)
 75 |         num_of_chunks = math.ceil(len(msg_bytes) / chunk_size)
 76 |         logger.debug('putting data: #%s [%d], size: %d', msg_id, num_of_chunks, len(msg_bytes))
 77 |         for i in range(num_of_chunks):
 78 |             msg_obj = {
 79 |                 'b': msg_bytes[i * chunk_size : (i + 1) * chunk_size],  # body
 80 |                 'u': msg_id,  # msg id
 81 |                 'i': i + 1,  # chunk id
 82 |                 'n': num_of_chunks  # total number of chunks
 83 |             }
 84 |             super().put(obj=msg_obj, block=block, timeout=timeout)
 85 | 
 86 |     def get(self, block=True, timeout=None):
 87 |         if not block:
 88 |             return super().get(block=False, timeout=timeout)
 89 | 
 90 |         while True:
 91 |             msg_obj = super().get(block=block, timeout=timeout)
 92 |             logger.debug('getting data: #%s [%d/%d]', msg_obj['u'], msg_obj['i'], msg_obj['n'])
 93 |             # small message
 94 |             if msg_obj['u'] not in self.buff and msg_obj['i'] == msg_obj['n']:
 95 |                 return pickle.loads(msg_obj['b'])
 96 | 
 97 |             # chunked message
 98 |             if msg_obj['u'] not in self.buff:
 99 |                 self.buff[msg_obj['u']] = [None] * msg_obj['n']
100 |             self.buff[msg_obj['u']][msg_obj['i']-1] = msg_obj['b']
101 |             if msg_obj['i'] == msg_obj['n']:
102 |                 msg = pickle.loads(b''.join(self.buff[msg_obj['u']]))
103 |                 del self.buff[msg_obj['u']]
104 |                 return msg
105 | 
106 | 
107 | class MapReduce(Paralleller):
108 |     """
109 |     Args:
110 |         num_of_process (int): Number of process for both mappers and reducers.
111 |         mapper (Callable): Mapper function. The signature is `mapper(*args, **kwargs) -> object`.
112 |         reducer (Callable): Reducer function. The signature is `reduce(object, object) -> object`.
113 |                         `object` arguments are the returns from `mapper` s.
114 |         mapper_queue_size (int, optional): Maximum size of mapper queue, 0 by default means unlimited.
115 |         reducer_queue_size (int, optional): Maximum size of reduce queue, 0 by default means unlimited.
116 |     """
117 | 
118 |     CMD_NO_NEW_DATA = 1  # no more new user data
119 |     CMD_MAPPER_FINISH = 2  # mapper finished
120 |     CMD_REDUCER_WAITING = 3  # reducer is waiting
121 |     CMD_NO_RUNNING_MAPPER = 4  # no mapper is running
122 |     CMD_REDUCER_AWAKE = 5  # awake a reducer
123 |     CMD_REDUCER_KILL = 6  # kill a reducer
124 |     CMD_REDUCER_FINISH = 7  # reducer finished
125 | 
126 |     def __init__(self, num_of_process: int, mapper: Callable, reducer: Callable,
127 |                  mapper_queue_size: int = 0, reducer_queue_size: int = 0):
128 |         self._mapper_queue = mp.Queue(maxsize=mapper_queue_size)
129 |         self._reducer_queue = ChunkedQueue(maxsize=reducer_queue_size)
130 |         self._result_queue = ChunkedQueue()
131 |         self._mapper_cmd_queue = [mp.Queue() for _ in range(num_of_process)]
132 |         self._reducer_cmd_queue = [mp.Queue() for _ in range(num_of_process)]
133 |         self._manager_cmd_queue = mp.Queue()
134 | 
135 |         self._manager_process = mp.Process(target=self._run_manager)
136 |         self._mapper_process = [mp.Process(target=self._run_mapper, args=(i, ))
137 |                           for i in range(num_of_process)]
138 |         self._reducer_process = [mp.Process(target=self._run_reducer, args=(i, ))
139 |                           for i in range(num_of_process)]
140 | 
141 |         self._mapper = mapper
142 |         self._reducer = reducer
143 |         self._num_of_process = num_of_process
144 | 
145 |     def start(self):
146 |         """
147 |         Start all child processes.
148 |         """
149 |         # start manager, mapper and reducer processes
150 |         self._manager_process.start()
151 |         for m in self._mapper_process:
152 |             m.start()
153 |         for r in self._reducer_process:
154 |             r.start()
155 | 
156 |     def add_task(self, *args, **kwargs):
157 |         """
158 |         Add data.
159 | 
160 |         Args:
161 |             args: Same to args in `mapper` function.
162 |             kwargs: Same to kwargs in `mapper` function.
163 |         """
164 |         self._mapper_queue.put( (args, kwargs) )
165 | 
166 |     def task_done(self):
167 |         """
168 |         No more new task.
169 |         """
170 |         # no more user data
171 |         self._manager_cmd_queue.put( (self.__class__.CMD_NO_NEW_DATA,) )
172 | 
173 |     def join(self):
174 |         """
175 |         This method blocks until all mappers and reducers finish.
176 | 
177 |         Returns:
178 |             object: The final reduced object.
179 |         """
180 |         # reduced result
181 |         result = self._result_queue.get()
182 | 
183 |         # make sure all child processes exited
184 |         # (do this after clean up all queues to avoid deadlock
185 |         # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#all-start-methods
186 |         # "Joining processes that use queues")
187 |         for m in self._mapper_process:
188 |             m.join()
189 |         for r in self._reducer_process:
190 |             r.join()
191 |         self._manager_process.join()
192 | 
193 |         return result
194 | 
195 |     def _run_manager(self):
196 |         running_mapper = [1 for _ in range(self._num_of_process)]  # running mappers, 1 is running
197 |         running_reducer = [1 for _ in range(self._num_of_process)]  # running reducers, 1 is running
198 |         waiting_reducer = [0 for _ in range(self._num_of_process)]  # waiting reducers, 1 is waiting
199 |         killing_reducer = [0 for _ in range(self._num_of_process)]  # killing reducers, 1 is asked to kill
200 | 
201 |         # only return the index where mask shows 1
202 |         def apply_mask(mask):
203 |             for idx, m in enumerate(mask):
204 |                 if m == 1:
205 |                     yield idx
206 | 
207 |         while True:
208 |             try:
209 |                 cmd = self._manager_cmd_queue.get(timeout=0.1)
210 | 
211 |                 # no more user data, notify all mappers
212 |                 if cmd[0] == self.__class__.CMD_NO_NEW_DATA:
213 |                     for q in self._mapper_cmd_queue:
214 |                         q.put( (self.__class__.CMD_NO_NEW_DATA,) )
215 | 
216 |                 # a mapper finished
217 |                 elif cmd[0] == self.__class__.CMD_MAPPER_FINISH:
218 |                     idx = cmd[1]
219 |                     running_mapper[idx] = 0
220 |                     # notify reducers if all mappers are finished
221 |                     if sum(running_mapper) == 0:
222 |                         for r in self._reducer_cmd_queue:
223 |                             r.put( (self.__class__.CMD_NO_RUNNING_MAPPER,) )
224 | 
225 |                 # a reducer is waiting
226 |                 # if all reducers are waiting,
227 |                 # ask half of them to kill themselves and release held resources (context),
228 |                 # after being killed, wake up rest of the reducers
229 |                 elif cmd[0] == self.__class__.CMD_REDUCER_WAITING:
230 |                     idx = cmd[1]
231 |                     waiting_reducer[idx] = 1
232 |                     logger.info('waiting reducer #%d', idx)
233 | 
234 |                     # total num of running reducers
235 |                     running_reducer_num = len(list(apply_mask(running_reducer)))
236 |                     logger.info('running reducer num %d', running_reducer_num)
237 | 
238 |                     # only one reducer and nothing to reduce anymore
239 |                     if running_reducer_num == 1:
240 |                         # kill last reducer
241 |                         idx = next(apply_mask(running_reducer))
242 |                         self._reducer_cmd_queue[idx].put( (self.__class__.CMD_REDUCER_KILL,) )
243 |                         # return result to main process
244 |                         self._result_queue.put(self._reducer_queue.get())
245 |                         return
246 | 
247 |                     # total num of waiting reducers
248 |                     waiting_reducer_num = len(list(filter(lambda x: x > 0,
249 |                                     [waiting_reducer[idx] for idx in apply_mask(running_reducer)])))
250 |                     logger.info('waiting reducer num %d', waiting_reducer_num)
251 |                     logger.info('waiting reducer status %s', str(waiting_reducer))
252 | 
253 |                     # need to kill half of the reducers and release resources
254 |                     if running_reducer_num == waiting_reducer_num:
255 |                         # reset waiting reducer (for next round)
256 |                         waiting_reducer = [0 for _ in range(self._num_of_process)]
257 |                         # pick half of them to kill, notify these reducers
258 |                         kill_reducer_num = running_reducer_num - int(running_reducer_num / 2)
259 |                         notified_kill_reducer_num = 0
260 |                         for idx in apply_mask(running_reducer):
261 |                             self._reducer_cmd_queue[idx].put( (self.__class__.CMD_REDUCER_KILL,) )
262 |                             killing_reducer[idx] = 1
263 |                             notified_kill_reducer_num += 1
264 |                             logging.info('killing reducer #%d', idx)
265 |                             if kill_reducer_num == notified_kill_reducer_num:
266 |                                 break
267 | 
268 |                         # make sure these reducers are killed
269 |                         while True:
270 |                             cmd = self._manager_cmd_queue.get()
271 |                             # other command, put it back
272 |                             if cmd[0] != self.__class__.CMD_REDUCER_FINISH:
273 |                                 self._manager_cmd_queue.put(cmd)
274 |                             else:
275 |                                 idx = cmd[1]
276 |                                 # reset state for killed reducer
277 |                                 running_reducer[idx] = 0
278 |                                 killing_reducer[idx] = 0
279 |                                 logger.info('reducer killed #%d', idx)
280 | 
281 |                                 # all killed, wake up rest of the reducers
282 |                                 if sum(killing_reducer) == 0:
283 |                                     for idx in apply_mask(running_reducer):
284 |                                         logger.info('awaking reducer #%d', idx)
285 |                                         self._reducer_cmd_queue[idx].put( (self.__class__.CMD_REDUCER_AWAKE,) )
286 |                                     break
287 | 
288 |             except queue.Empty:
289 |                 continue
290 | 
291 |     def _run_mapper(self, idx):
292 |         no_new_data = False
293 | 
294 |         while True:
295 |             # cmd
296 |             try:
297 |                 cmd = self._mapper_cmd_queue[idx].get_nowait()
298 |                 if cmd[0] == self.__class__.CMD_NO_NEW_DATA:
299 |                     no_new_data = True
300 |             except queue.Empty:
301 |                 pass
302 | 
303 |             # data
304 |             try:
305 |                 data = self._mapper_queue.get(timeout=0.1)
306 |                 args, kwargs = data[0], data[1]
307 |                 result = self._mapper(*args, **kwargs)
308 |                 self._reducer_queue.put(result)
309 |             except queue.Empty:
310 |                 # no more new data, mapper finishes
311 |                 if no_new_data:
312 |                     self._manager_cmd_queue.put( (self.__class__.CMD_MAPPER_FINISH, idx) )
313 |                     return
314 |                 continue
315 | 
316 |     def _run_reducer(self, idx):
317 |         no_running_mapper = False
318 |         context = None  # it holds result of last reducing, and can be used in next reducing
319 | 
320 |         while True:
321 |             # cmd
322 |             try:
323 |                 cmd = self._reducer_cmd_queue[idx].get_nowait()
324 |                 if cmd[0] == self.__class__.CMD_NO_RUNNING_MAPPER:
325 |                     no_running_mapper = True
326 |             except queue.Empty:
327 |                 pass
328 | 
329 |             # data
330 |             try:
331 |                 if context is None:  # can't use "not" operator here, context could be empty object (list, dict, ...)
332 |                     context = self._reducer_queue.get(timeout=0.1)
333 | 
334 |                 m = self._reducer_queue.get(timeout=0.1)
335 |                 context = self._reducer(context, m)
336 |             except queue.Empty:
337 |                 # there are still some alive mappers, wait for their output
338 |                 if not no_running_mapper:
339 |                     continue
340 | 
341 |                 # no data in reducer queue, ask manager and wait for further action
342 |                 self._manager_cmd_queue.put( (self.__class__.CMD_REDUCER_WAITING, idx) )
343 |                 cmd = self._reducer_cmd_queue[idx].get()
344 |                 # awake
345 |                 if cmd[0] == self.__class__.CMD_REDUCER_AWAKE:
346 |                     continue
347 |                 # kill itself, put context back to reducer queue
348 |                 elif cmd[0] == self.__class__.CMD_REDUCER_KILL:
349 |                     if context is not None:
350 |                         self._reducer_queue.put(context)
351 |                     self._manager_cmd_queue.put( (self.__class__.CMD_REDUCER_FINISH, idx) )
352 |                     return
353 | 


--------------------------------------------------------------------------------
/pyrallel/parallel_processor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ParallelProcessor utilizes multiple CPU cores to process compute-intensive tasks.
  3 | 
  4 | 
  5 | If you have a some time-consuming statements in a for-loop and no state is shared among loops, you can map these
  6 | statements to different processes. Assume you need to process a couple of files, you can do this in parallel::
  7 | 
  8 |     def mapper(filename):
  9 |         with open(filename) as f_in, open(filename + '.out') as f_out:
 10 |             f_out.write(process_a_file(f_in.read()))
 11 | 
 12 |     pp = ParallelProcessor(2, mapper)
 13 |     pp.start()
 14 | 
 15 |     for fname in ['file1', 'file2', 'file3', 'file4']:
 16 |         pp.add_task(fname)
 17 | 
 18 |     pp.task_done()
 19 |     pp.join()
 20 | 
 21 | It's not required to write a cumbersome loop statement if you have iterable object or type (list, generator, etc).
 22 | Instead, you could use `map`::
 23 | 
 24 |     pp = ParallelProcessor(2, mapper)
 25 |     pp.start()
 26 | 
 27 |     pp.map(['file1', 'file2', 'file3', 'file4'])
 28 | 
 29 |     pp.task_done()
 30 |     pp.join()
 31 | 
 32 | Usually, some files are small and some are big, it would be better if it can keep all cores busy.
 33 | One way is to send content line by line to each process (assume content is line-separated)::
 34 | 
 35 |     def mapper(line, _idx):
 36 |         with open('processed_{}.out'.format(_idx), 'a') as f_out:
 37 |             f_out.write(process_a_line(line))
 38 | 
 39 |     pp = ParallelProcessor(2, mapper, enable_process_id=True)
 40 |     pp.start()
 41 | 
 42 |     for fname in ['file1', 'file2', 'file3', 'file4']:
 43 |         with open(fname) as f_in:
 44 |             for line in f_in:
 45 |                 pp.add_task(line)
 46 | 
 47 |     pp.task_done()
 48 |     pp.join()
 49 | 
 50 | One problem here is you need to acquire file descriptor every time the mapper is called.
 51 | To avoid this, use Mapper class to replace mapper function.
 52 | It allows user to define how the process is constructed and deconstructed::
 53 | 
 54 |     class MyMapper(Mapper):
 55 |         def enter(self):
 56 |             self.f = open('processed_{}.out'.format(self._idx), 'w')
 57 | 
 58 |         def exit(self, *args, **kwargs):
 59 |             self.f.close()
 60 | 
 61 |         def process(self, line):
 62 |             self.f.write(process_a_line(line))
 63 | 
 64 |     pp = ParallelProcessor(..., mapper=MyMapper, ...)
 65 | 
 66 | In some situations, you may need to use `collector` to collect data back from child processes to main process::
 67 | 
 68 |     processed = []
 69 | 
 70 |     def mapper(line):
 71 |         return process_a_line(line)
 72 | 
 73 |     def collector(data):
 74 |         processed.append(data)
 75 | 
 76 |     pp = ParallelProcessor(2, mapper, collector=collector)
 77 |     pp.start()
 78 | 
 79 |     for fname in ['file1', 'file2', 'file3', 'file4']:
 80 |         with open(fname) as f_in:
 81 |             for line in f_in:
 82 |                 pp.add_task(line)
 83 | 
 84 |     pp.task_done()
 85 |     pp.join()
 86 | 
 87 |     print(processed)
 88 | 
 89 | You can count the executions in `collector` to estimate the progress. To get the progress of mapper, \
 90 | create a progress function and set it in `ParallelProcessor`::
 91 | 
 92 |     def progress(p):
 93 | 
 94 |         # print('Total task: {}, Added to queue: {}, Mapper Loaded: {}, Mapper Processed {}'.format(
 95 |         #    p['total'], p['added'], p['loaded'], p['processed']))
 96 |         if p['processed'] % 10 == 0:
 97 |             print('Progress: {}%'.format(100.0 * p['processed'] / p['total']))
 98 | 
 99 |     pp = ParallelProcessor(8, mapper=mapper, progress=progress, progress_total=len(tasks))
100 |     pp.start()
101 | 
102 |     for t in tasks:
103 |         pp.add_task(t)
104 | 
105 | """
106 | 
107 | import multiprocess as mp
108 | import threading
109 | import queue
110 | import inspect
111 | import sys
112 | import typing
113 | from typing import Callable, Iterable
114 | 
115 | from pyrallel import Paralleller
116 | 
117 | if sys.version_info >= (3, 8):
118 |     from pyrallel import ShmQueue
119 | 
120 | 
121 | class Mapper(object):
122 |     """
123 |     Mapper class.
124 | 
125 |     This defines how mapper works.
126 | 
127 |     The methods will be called in following order::
128 | 
129 |         enter (one time) -> process (many times) -> exit (one time)
130 |     """
131 |     def __init__(self, idx):
132 |         self._idx = idx
133 |         self._progress_info = ProgressThread.init_mapper_progress_info()
134 | 
135 |     def __enter__(self):
136 |         self.enter()
137 |         return self
138 | 
139 |     def __exit__(self, exc_type, exc_val, exc_tb):
140 |         self.exit(exc_type, exc_val, exc_tb)
141 | 
142 |     def enter(self):
143 |         """
144 |         Invoked when subprocess is created and listening the queue.
145 |         """
146 |         pass
147 | 
148 |     def exit(self, *args, **kwargs):
149 |         """
150 |         Invoked when subprocess is going to exit. Arguments will be set if exception occurred.
151 |         """
152 |         pass
153 | 
154 |     def process(self, *args, **kwargs):
155 |         """
156 |         Same as mapper function, but `self` argument can provide additional context (e.g., `self._idx`).
157 |         """
158 |         raise NotImplementedError
159 | 
160 | 
161 | class CollectorThread(threading.Thread):
162 |     """
163 |     Handle collector in main process.
164 |     Create a thread and call ParallelProcessor.collect().
165 |     """
166 | 
167 |     def __init__(self, instance, collector):
168 |         super(CollectorThread, self).__init__()
169 |         self.collector = collector
170 |         self.instance = instance
171 | 
172 |     def run(self):
173 |         for batched_collector in self.instance.collect():
174 |             for o in batched_collector:
175 |                 self.collector(*o)
176 | 
177 | 
178 | class ProgressThread(threading.Thread):
179 |     """
180 |     Progress information in main process.
181 |     """
182 | 
183 |     P_ADDED = 0
184 |     P_LOADED = 1
185 |     P_PROCESSED = 2
186 |     P_TOTAL = 3
187 | 
188 |     def __init__(self, instance, progress, progress_total, num_of_processor):
189 |         super(ProgressThread, self).__init__()
190 |         self.progress_info = {
191 |             ProgressThread.P_ADDED: 0,
192 |             ProgressThread.P_LOADED: 0,
193 |             ProgressThread.P_PROCESSED: 0,
194 |             ProgressThread.P_TOTAL: progress_total
195 |         }
196 |         self.mapper_progress_info = [ProgressThread.init_mapper_progress_info() for _ in range(num_of_processor)]
197 |         self.instance = instance
198 |         self.progress = progress
199 | 
200 |     @staticmethod
201 |     def init_mapper_progress_info():
202 |         return {ProgressThread.P_LOADED: 0, ProgressThread.P_PROCESSED: 0}
203 | 
204 |     def refresh_progress_info(self):
205 |         self.progress_info[ProgressThread.P_LOADED] \
206 |             = sum([p[ProgressThread.P_LOADED] for p in self.mapper_progress_info])
207 |         self.progress_info[ProgressThread.P_PROCESSED] \
208 |             = sum([p[ProgressThread.P_PROCESSED] for p in self.mapper_progress_info])
209 | 
210 |     def run(self):
211 |         for idx, mapper_progress_info in self.instance.get_progress():
212 |             self.mapper_progress_info[idx] = mapper_progress_info
213 |             self.refresh_progress_info()
214 |             progress_info = {
215 |                 'added': self.progress_info[ProgressThread.P_ADDED],
216 |                 'loaded': self.progress_info[ProgressThread.P_LOADED],
217 |                 'processed': self.progress_info[ProgressThread.P_PROCESSED],
218 |                 'total': self.progress_info[ProgressThread.P_TOTAL],
219 |             }
220 |             self.progress(progress_info)
221 | 
222 | 
223 | class ParallelProcessor(Paralleller):
224 |     """
225 |     Args:
226 |         num_of_processor (int): Number of processes to use.
227 |         mapper (Callable / Mapper): Function or subclass of `Mapper` class.
228 |         max_size_per_mapper_queue (int, optional): Maximum size of mapper queue for one process.
229 |                                     If it's full, the corresponding process will be blocked.
230 |                                     0 by default means unlimited.
231 |         collector (Callable, optional): If the collector data needs to be get in main process (another thread),
232 |                                 set this handler, the arguments are same to the return from mapper.
233 |                                 The return result is one by one, order is arbitrary.
234 |         max_size_per_collector_queue (int, optional): Maximum size of collector queue for one process.
235 |                                     If it's full, the corresponding process will be blocked.
236 |                                     0 by default means unlimited.
237 |         enable_process_id (bool, optional): If it's true, an additional argument `_idx` (process id) will be
238 |                                 passed to `mapper` function. This has no effect for `Mapper` class.
239 |                                 It defaults to False.
240 |         batch_size (int, optional): Batch size, defaults to 1.
241 |         progress (Callable, optional): Progress function, which takes a dictionary as input.
242 |                                 The dictionary contains following keys: `total` can be set by `progress_total`,
243 |                                 `added` indicates the number of tasks has been added to the queue,
244 |                                 `loaded` indicates the number of tasks has been loaded to worker processes,
245 |                                 `processed` indicates the number of tasks has been processed by worker processes.
246 |                                 Defaults to None.
247 |         progress_total (int, optional): Total number of tasks. Defaults to None.
248 |         use_shm (bool, optional): When True, and when running on Python version 3.8 or later,
249 |                                 use ShmQueue for higher performance.  Defaults to False.
250 |         enable_collector_queues (bool, optional): When True, create a collector queue for each
251 |                                 processor.  When False, do not allocate collector queues, saving
252 |                                 resources.  Defaults to True.
253 |         single_mapper_queue (bool, optional): When True, allocate a single mapper queue that will
254 |                                 be shared between the worker processes.  Sending processes can
255 |                                 go to sleep when the mapper queue is full.  When False, each process
256 |                                 gets its own mapper queue, and CPU-intensive polling may be needed to
257 |                                 find a mapper queue which can accept a new request.
258 | 
259 |     Note:
260 |         - Do NOT implement heavy compute-intensive operations in collector, they should be in mapper.
261 |         - Tune the value for queue size and batch size will optimize performance a lot.
262 |         - `collector` only collects returns from `mapper` or `Mapper.process`.
263 |         - The frequency of executing `progress` function depends on CPU.
264 |     """
265 | 
266 |     # Command format in queue. Represent in tuple.
267 |     # The first element of tuple will be command, the rests are arguments or data.
268 |     # (CMD_XXX, args...)
269 |     CMD_DATA = 0
270 |     CMD_STOP = 1
271 | 
272 |     QSTATS_ON = 0
273 |     QSTATS_OFF = 1
274 | 
275 |     def __init__(self, num_of_processor: int, mapper: Callable, max_size_per_mapper_queue: int = 0,
276 |                  collector: Callable = None, max_size_per_collector_queue: int = 0,
277 |                  enable_process_id: bool = False, batch_size: int = 1, progress=None, progress_total = None,
278 |                  use_shm=False, enable_collector_queues=True,
279 |                  single_mapper_queue: bool = False):
280 |         self.num_of_processor = num_of_processor
281 |         self.single_mapper_queue = single_mapper_queue
282 |         if sys.version_info >= (3, 8):
283 |             self.collector_queues: typing.Optional[typing.Union[ShmQueue, mp.Queue]]
284 |         else:
285 |             self.collector_queues: typing.Optional[mp.Queue]
286 |         if use_shm:
287 |             if sys.version_info >= (3, 8):
288 |                 if single_mapper_queue:
289 |                     self.mapper_queues = [ShmQueue(maxsize=max_size_per_mapper_queue * num_of_processor)]
290 |                 else:
291 |                     self.mapper_queues = [ShmQueue(maxsize=max_size_per_mapper_queue) for _ in range(num_of_processor)]
292 |                 if enable_collector_queues:
293 |                     self.collector_queues = [ShmQueue(maxsize=max_size_per_collector_queue) for _ in range(num_of_processor)]
294 |                 else:
295 |                     self.collector_queues = None
296 |             else:
297 |                 raise ValueError("shm not available in this version of Python.")
298 |         else:
299 |             if single_mapper_queue:
300 |                 self.mapper_queues = [mp.Queue(maxsize=max_size_per_mapper_queue * num_of_processor)]
301 |             else:
302 |                 self.mapper_queues = [mp.Queue(maxsize=max_size_per_mapper_queue) for _ in range(num_of_processor)]
303 |             if enable_collector_queues:
304 |                 self.collector_queues = [mp.Queue(maxsize=max_size_per_collector_queue) for _ in range(num_of_processor)]
305 |                 self.collector_qstats = [self.QSTATS_ON for _ in range(num_of_processor)]
306 |             else:
307 |                 self.collector_queues = None
308 |                 
309 |         if self.collector_queues is not None:
310 |             if single_mapper_queue:
311 |                 self.processes = [mp.Process(target=self._run, args=(i, self.mapper_queues[0], self.collector_queues[i]))
312 |                                   for i in range(num_of_processor)]
313 |             else:
314 |                 self.processes = [mp.Process(target=self._run, args=(i, self.mapper_queues[i], self.collector_queues[i]))
315 |                                   for i in range(num_of_processor)]
316 |         else:
317 |             if single_mapper_queue:
318 |                 self.processes = [mp.Process(target=self._run, args=(i, self.mapper_queues[0], None))
319 |                                   for i in range(num_of_processor)]
320 |             else:
321 |                 self.processes = [mp.Process(target=self._run, args=(i, self.mapper_queues[i], None))
322 |                                   for i in range(num_of_processor)]
323 |         if progress is not None:
324 |             if sys.version_info >= (3, 8):
325 |                 self.progress_queues: typing.Optional[typing.Union[ShmQueue, mp.Queue]]
326 |             else:
327 |                 self.progress_queues: typing.Optional[mp.Queue]
328 |             if use_shm:
329 |                 if sys.version_info >= (3, 8):
330 |                     self.progress_queues = [ShmQueue(maxsize=1) for _ in range(num_of_processor)]
331 |                 else:
332 |                     raise ValueError("shm not available in this version of Python.")
333 |             else:
334 |                 self.progress_queues = [mp.Queue(maxsize=1) for _ in range(num_of_processor)]
335 |             self.progress_qstats = [self.QSTATS_ON for _ in range(num_of_processor)]
336 |         else:
337 |             self.progress_queues = None
338 |         self.progress = progress
339 | 
340 |         ctx = self
341 |         if not inspect.isclass(mapper) or not issubclass(mapper, Mapper):
342 |             class DefaultMapper(Mapper):
343 |                 def process(self, *args, **kwargs):
344 |                     if ctx.enable_process_id:
345 |                         kwargs['_idx'] = self._idx
346 |                     return mapper(*args, **kwargs)
347 |             self.mapper = DefaultMapper
348 |         else:
349 |             self.mapper = mapper
350 | 
351 |         self.collector = collector
352 |         self.mapper_queue_index = 0
353 |         self.enable_process_id = enable_process_id
354 |         self.batch_size = batch_size
355 |         self.batch_data = []
356 | 
357 |         # collector can be handled in each process or in main process after merging (collector needs to be set)
358 |         # if collector is set, it needs to be handled in main process;
359 |         # otherwise, it assumes there's no collector.
360 |         if collector:
361 |             self.collector_thread = CollectorThread(self, collector)
362 | 
363 |         if progress:
364 |             self.progress_thread = ProgressThread(self, progress, progress_total, num_of_processor)
365 | 
366 |     def start(self):
367 |         """
368 |         Start processes and threads.
369 |         """
370 |         if self.collector:
371 |             self.collector_thread.start()
372 |         if self.progress:
373 |             self.progress_thread.start()
374 |         for p in self.processes:
375 |             p.start()
376 | 
377 |     def join(self):
378 |         """
379 |         Block until processes and threads return.
380 |         """
381 |         if self.collector:
382 |             self.collector_thread.join()
383 |         if self.progress:
384 |             self.progress_thread.join()
385 |         for p in self.processes:
386 |             p.join()
387 |         for q in self.mapper_queues:
388 |             q.close()
389 |             q.join_thread()
390 |         if self.collector_queues is not None:
391 |             for q in self.collector_queues:
392 |                 q.close()
393 |                 q.join_thread()
394 |         if self.progress_queues is not None:
395 |             for q in self.progress_queues:
396 |                 q.close()
397 |                 q.join_thread()
398 |                 pass
399 | 
400 |     def task_done(self):
401 |         """
402 |         Indicate that all resources which need to add_task are added to processes.
403 |         (main process, blocked)
404 |         """
405 |         if len(self.batch_data) > 0:
406 |             self._add_task(self.batch_data)
407 |             self.batch_data = []
408 | 
409 |         for i in range(self.num_of_processor):
410 |             if self.single_mapper_queue:
411 |                 self.mapper_queues[0].put((ParallelProcessor.CMD_STOP,))
412 |             else:
413 |                 self.mapper_queues[i].put((ParallelProcessor.CMD_STOP,))
414 | 
415 |     def add_task(self, *args, **kwargs):
416 |         """
417 |         Add data to one a mapper queue.
418 | 
419 |         When a single mapper queue is in use, put the process to sleep if the
420 |         queue is full.  When multiple mapper queues are in use (one per process),
421 |         use CPU-intensive polling (round-robin processing) to find the next available
422 |         queue. (main process, blocked or unblocked depending upon single_mapper_queue)
423 |         """
424 |         self.batch_data.append((args, kwargs))
425 |         if self.progress:
426 |             self.progress_thread.progress_info[ProgressThread.P_ADDED] += 1
427 | 
428 |         if len(self.batch_data) == self.batch_size:
429 |             self._add_task(self.batch_data)
430 |             self.batch_data = []  # reset buffer
431 | 
432 |     def _add_task(self, batched_args):
433 |         if self.single_mapper_queue:
434 |             self.mapper_queues[0].put((ParallelProcessor.CMD_DATA, batched_args))
435 |         else:
436 |             while True:
437 |                 q = self.mapper_queues[self.mapper_queue_index]
438 |                 self.mapper_queue_index = (self.mapper_queue_index + 1) % self.num_of_processor
439 |                 try:
440 |                     q.put_nowait((ParallelProcessor.CMD_DATA, batched_args))
441 |                     return  # put in
442 |                 except queue.Full:
443 |                     continue  # find next available
444 | 
445 |     def _run(self, idx: int, mapper_queue: mp.Queue, collector_queue: typing.Optional[mp.Queue]):
446 |         """
447 |         Process's activity. It handles queue IO and invokes user's mapper handler.
448 |         (subprocess, blocked, only two queues can be used to communicate with main process)
449 |         """
450 |         with self.mapper(idx) as mapper:
451 |             while True:
452 |                 data = mapper_queue.get()
453 |                 if data[0] == ParallelProcessor.CMD_STOP:
454 |                     # print(idx, 'stop')
455 |                     self._update_progress(mapper, finish=True)
456 |                     if self.collector and collector_queue is not None:
457 |                         collector_queue.put((ParallelProcessor.CMD_STOP,))
458 |                     return
459 |                 elif data[0] == ParallelProcessor.CMD_DATA:
460 |                     batch_result = []
461 |                     for d in data[1]:
462 |                         args, kwargs = d[0], d[1]
463 |                         # print(idx, 'data')
464 |                         self._update_progress(mapper, type_=ProgressThread.P_LOADED)
465 |                         result = mapper.process(*args, **kwargs)
466 |                         self._update_progress(mapper, type_=ProgressThread.P_PROCESSED)
467 |                         if collector_queue is not None:
468 |                             if self.collector:
469 |                                 if not isinstance(result, tuple):  # collector must represent as tuple
470 |                                     result = (result,)
471 |                             batch_result.append(result)
472 |                     if collector_queue is not None and len(batch_result) > 0:
473 |                         collector_queue.put((ParallelProcessor.CMD_DATA, batch_result))
474 |                         batch_result = []  # reset buffer
475 | 
476 |     def _update_progress(self, mapper, type_=None, finish=False):
477 |         if self.progress:
478 |             try:
479 |                 if not finish:
480 |                     # No need to ensure the status will be pulled from main process
481 |                     # so if queue is full just skip this update
482 |                     mapper._progress_info[type_] += 1
483 |                     self.progress_queues[mapper._idx].put_nowait( (ParallelProcessor.CMD_DATA, mapper._progress_info) )
484 |                 else:
485 |                     # update the last progress of each mapper
486 |                     self.progress_queues[mapper._idx].put( (ParallelProcessor.CMD_STOP, mapper._progress_info) )
487 |             except queue.Full:
488 |                 pass
489 | 
490 |     def collect(self):
491 |         """
492 |         Get data from collector queue sequentially.
493 |         (main process, unblocked, using round robin to find next available queue)
494 |         """
495 |         if not self.collector:
496 |             return
497 | 
498 |         idx = 0
499 |         while True:
500 |             # all queues finished
501 |             if sum([int(s == self.QSTATS_OFF) for s in self.collector_qstats]) == self.num_of_processor:
502 |                 return
503 | 
504 |             # get next unfinished queue
505 |             while self.collector_qstats[idx] == self.QSTATS_OFF:
506 |                 idx = (idx + 1) % self.num_of_processor
507 |             q = self.collector_queues[idx]
508 | 
509 |             try:
510 |                 data = q.get_nowait()  # get out
511 |                 if data[0] == ParallelProcessor.CMD_STOP:
512 |                     self.collector_qstats[idx] = self.QSTATS_OFF
513 |                 elif data[0] == ParallelProcessor.CMD_DATA:
514 |                     yield data[1]
515 |             except queue.Empty:
516 |                 continue  # find next available
517 |             finally:
518 |                 idx = (idx + 1) % self.num_of_processor
519 | 
520 |     def get_progress(self):
521 |         """
522 |         Get progress information from each mapper.
523 |         (main process)
524 |         """
525 |         if not self.progress:
526 |             return
527 | 
528 |         idx = 0
529 |         while True:
530 |             # all queues finished
531 |             if sum([int(s == self.QSTATS_OFF) for s in self.progress_qstats]) == self.num_of_processor:
532 |                 return
533 | 
534 |             # get next unfinished queue
535 |             while self.progress_qstats[idx] == self.QSTATS_OFF:
536 |                 idx = (idx + 1) % self.num_of_processor
537 |             q = self.progress_queues[idx]
538 | 
539 |             try:
540 |                 data = q.get_nowait()
541 |                 if data[0] == ParallelProcessor.CMD_STOP:
542 |                     self.progress_qstats[idx] = self.QSTATS_OFF
543 |                 elif data[0] == ParallelProcessor.CMD_DATA:
544 |                     pass
545 |                 yield idx, data[1]
546 |             except queue.Empty:
547 |                 continue  # find next available
548 |             finally:
549 |                 idx = (idx + 1) % self.num_of_processor
550 | 


--------------------------------------------------------------------------------
/pyrallel/queue.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import multiprocessing as mp
  3 | import multiprocessing.queues as mpq
  4 | from queue import Full, Empty
  5 | import pickle
  6 | import math
  7 | # import uuid
  8 | import os
  9 | import struct
 10 | import sys
 11 | import time
 12 | import typing
 13 | import dill  # type: ignore
 14 | import zlib
 15 | 
 16 | 
 17 | if sys.version_info >= (3, 8):
 18 |     from multiprocessing.shared_memory import SharedMemory
 19 |     __all__ = ['ShmQueue']
 20 | else:
 21 |     from typing import TypeVar
 22 |     SharedMemory = TypeVar('SharedMemory')
 23 |     __all__ = []
 24 | 
 25 | 
 26 | class ShmQueue(mpq.Queue):
 27 |     """ShmQueue depends on shared memory instead of pipe to efficiently exchange data among processes.
 28 |     Shared memory is "System V style" memory blocks which can be shared and accessed directly by processes.
 29 |     This implementation is based on `multiprocessing.shared_memory.SharedMemory` hence requires Python >= 3.8.
 30 |     Its interface is almost identical to `multiprocessing.queue <https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.Queue>`_.
 31 |     But it allows one to specify the serializer, which by default is pickle.
 32 | 
 33 |     This implementation maintains two lists:  a free buffer list, and a ready message list.
 34 |     The list heads for both lists are stored in a single shared memory area.
 35 | 
 36 |     The free buffer list is linked by the next_block_id field in each shared
 37 |     buffer's metadata area.
 38 | 
 39 |     Messages are built out of chunks.  Each chunk occupies a single buffer.
 40 |     Each chunk contains a pointer (an integer identifier) to the next chunk's
 41 |     buffer using the next_chunk_block_id field in the shared buffer's metadata
 42 |     area. The list of ready messages links the first chunk of each ready
 43 |     message using the next_block_id field in the shared buffer's metadata
 44 |     area.
 45 | 
 46 |     Messages are serialized for transfer from the sender to the receiver.
 47 |     The serialized size of a message may not exceed the chunk size times
 48 |     the maximum queue size.  If the deadlock_immanent_check is enabled
 49 |     (which is True by default), a ValueError will be raised on an attempt
 50 |     to put a message that is too large.
 51 | 
 52 |     Args:
 53 |         chunk_size (int, optional): Size of each chunk. By default, it is `ShmQueue.DEFAULT_CHUNK_SIZE` (1*1024*1024). \
 54 |                                 If it is 0, it will be set to `ShmQueue.MAX_CHUNK_SIZE` (512*1024*1024).
 55 |         maxsize (int, optional): Maximum queue size, e.g. the maximum number of chunks available to a queue. \
 56 |                                 If it is 0 (default), it will be set to `ShmQueue.DEFAULT_MAXSIZE` (2).
 57 |         serializer (obj, optional): Serializer to serialize and deserialize data. \
 58 |                                 If it is None (default), pickle will be used. \
 59 |                                 The serializer should implement `loads(bytes data) -> object` \
 60 |                                 and `dumps(object obj) -> bytes`.
 61 |         integrity_check (bool, optional): When True, perform certain integrity checks on messages.
 62 |                                 1) After serializing a message, immediately deserialize it to check for validity.
 63 |                                 2) Save the length of a message after serialization.
 64 |                                 3) Compute a checksum of each chunk of the message.
 65 |                                 4) Include the total message size and chunk checksum in the metadata for each chunk.
 66 |                                 5) When pulling a chunk from the queue, verify the chunk checksum.
 67 |                                 6) After reassembling a message out of chunks, verify the total message size.
 68 |         deadlock_check (bool, optional): When fetching a writable block, print a message if two or more
 69 |                                 loops are needed to get a free block. (default is False)
 70 |         deadlock_immanent_check (bool, optional): Raise a ValueError if a message submitted to
 71 |                                 put(...) is too large to process.  (Default is True)
 72 |         watermark_check (bool, optional): When true, prit a mesage with the largest message size so far in chunks.
 73 |         use_semaphores (bool, optional): When true, use semaphores to control access to the free list and the 
 74 |                                 message list. The system will sleep when accessing these shared resources,
 75 |                                 instead of entering a polling loop.
 76 | 
 77 |     Note:
 78 |         - `close` needs to be invoked once to release memory and avoid a memory leak.
 79 |         - `qsize`, `empty` and `full` are implemented but may block.
 80 |         - Each shared queue consumes one shared memory area for the shared list heads
 81 |           and one shared memory area for each shared buffer.  The underlying code in
 82 |           multiprocessing.shared_memory.SharedMemory consumes one process file descriptor
 83 |           for each shared memory area.  There is a limit on the number of file descriptors
 84 |           that a process may have open.
 85 |         - Thus, there is a tradeoff between the chunk_size and maxsize:  smaller chunks
 86 |           use memory more effectively with some overhead cost, but may run into the limit
 87 |           on the number of open file descriptors to process large messages and avoid blocking.
 88 |           Larger chunks waste unused space, but are less likely to run into the open file descriptor
 89 |           limit or to block waiting for a free buffer.
 90 | 
 91 |     Example::
 92 | 
 93 |         def run(q):
 94 |             e = q.get()
 95 |             print(e)
 96 | 
 97 |         if __name__ == '__main__':
 98 |             q = ShmQueue(chunk_size=1024 * 4, maxsize=10)
 99 |             p = Process(target=run, args=(q,))
100 |             p.start()
101 |             q.put(100)
102 |             p.join()
103 |             q.close()
104 | 
105 |     """
106 | 
107 |     MAX_CHUNK_SIZE: int = 512 * 1024 * 1024
108 |     """int: The maximum allowable size for a buffer chunk.  512MB should be a large enough
109 |     value."""
110 | 
111 |     DEFAULT_CHUNK_SIZE: int = 1 * 1024 * 1024
112 |     """int: The default size for a buffer chunk."""
113 | 
114 |     DEFAULT_MAXSIZE: int = 2
115 |     """int: The default maximum size for a queue."""
116 | 
117 |     RESERVED_BLOCK_ID: int = 0xffffffff
118 |     """int: RESERVED_BLOCK_ID is stored in the list head pointer and next chunk
119 |     block id fields to indicate that thee is no next block.  This value is intended
120 |     to simplify debugging by removing stale next-block values.  It is not used to
121 |     test for blok chain termination;  counters are used for that purpose, instead."""
122 | 
123 |     META_STRUCT: typing.Mapping[str, typing.Tuple[int, int, str]] = {
124 |         'msg_id': (0, 12, '12s'),
125 |         'msg_size': (12, 16, 'I'),
126 |         'chunk_id': (16, 20, 'I'),
127 |         'total_chunks': (20, 24, 'I'),
128 |         'total_msg_size': (24, 28, 'I'),
129 |         'checksum': (28, 32, 'I'),
130 |         'src_pid': (32, 36, 'I'),
131 |         'next_chunk_block_id': (36, 40, 'I'),
132 |         'next_block_id': (40, 44, 'I')
133 |     }
134 |     """The per-buffer metadata structure parameters for struct.pack(...) and
135 |     struct.unpack(...)."""
136 |     
137 |     META_BLOCK_SIZE: int = 44
138 |     """int: The length of the buffer metadata structure in bytes."""
139 | 
140 |     LIST_HEAD_STRUCT: typing.Mapping[str, typing.Tuple[int, int, str]] = {
141 |         'first_block': (0, 4, 'I'),
142 |         'last_block': (4, 8, 'I'),
143 |         'block_count': (8, 12, 'I')
144 |     }
145 |     """The list head structure parameters for struct.pack(...) and
146 |     struct.unpack(...). The list header structure maintains a block
147 |     count in addition to first_block and last_block pointers."""
148 | 
149 |     LIST_HEAD_SIZE: int = 12
150 |     """int: The length of a list head structure in bytes."""
151 | 
152 |     FREE_LIST_HEAD: int = 0
153 |     """int: The index of the free buffer list head in the SharedMemory segment for
154 |     sharing message queue list heads between processes."""
155 |     
156 |     MSG_LIST_HEAD: int = 1
157 |     """int: The index of the queued message list head in the SharedMemory segment for
158 |     sharing message queue list heads between processes."""
159 | 
160 |     qid_counter: int = 0
161 |     """int: Each message queue has a queue ID (qid) that identifies the queue for
162 |     debugging messages. This mutable class counter is used to create new queue ID
163 |     values for newly-created queue. Implicitly, this assumes that message queues
164 |     will be created by a single initialization process, then distributed to worker
165 |     process.  If shared message queues will be created by multiple processes, then
166 |     the queue ID should be altered to incorporate the process ID (pid) of the
167 |     process that created the shared message queue, or an additional field should
168 |     be created and presented with the shared message queue's creator's pid.."""
169 | 
170 |     def __init__(self,
171 |                  chunk_size: int=DEFAULT_CHUNK_SIZE,
172 |                  maxsize: int=DEFAULT_MAXSIZE,
173 |                  serializer=None,
174 |                  integrity_check: bool=False,
175 |                  deadlock_check: bool=False,
176 |                  deadlock_immanent_check: bool=True,
177 |                  watermark_check: bool = False,
178 |                  use_semaphores: bool = True,
179 |                  verbose: bool=False):
180 |         ctx = mp.get_context() # TODO: What is the proper type hint here?
181 | 
182 |         super().__init__(maxsize, ctx=ctx)
183 | 
184 |         self.qid: int = self.__class__.qid_counter
185 |         self.__class__.qid_counter += 1
186 | 
187 |         self.verbose: bool = verbose
188 |         if self.verbose:
189 |             print("Starting ShmQueue qid=%d pid=%d chunk_size=%d maxsize=%d." % (self.qid, os.getpid(), chunk_size, maxsize), file=sys.stderr, flush=True) # ***
190 | 
191 |         self.chunk_size: int = min(chunk_size, self.__class__.MAX_CHUNK_SIZE) \
192 |             if chunk_size > 0 else self.__class__.MAX_CHUNK_SIZE
193 | 
194 |         self.maxsize: int = maxsize if maxsize > 0 else self.__class__.DEFAULT_MAXSIZE
195 | 
196 |         self.serializer = serializer or pickle
197 | 
198 |         self.integrity_check: bool = integrity_check
199 |         self.deadlock_check: bool = deadlock_check
200 |         self.deadlock_immanent_check: bool = deadlock_immanent_check
201 |         self.watermark_check: bool = watermark_check
202 |         self.chunk_watermark: int = 0
203 | 
204 |         self.mid_counter: int = 0
205 | 
206 |         self.producer_lock = ctx.Lock()
207 |         self.free_list_lock = ctx.Lock()
208 |         self.msg_list_lock = ctx.Lock()
209 | 
210 |         self.use_semaphores: bool = use_semaphores
211 |         if not use_semaphores:
212 |             # Put the None case first to make mypy happier.
213 |             self.free_list_semaphore: typing.Optional[typing.Any] = None # TODO: what is the type returned by ctx.Semaphore(0)?
214 |             self.msg_list_semaphore: typing.Optional[typing.Any] = None
215 |         else:
216 |             self.free_list_semaphore = ctx.Semaphore(0)
217 |             self.msg_list_semaphore = ctx.Semaphore(0)
218 |         
219 |         self.list_heads: SharedMemory = SharedMemory(create=True, size=self.__class__.LIST_HEAD_SIZE * 2)
220 |         self.init_list_head(self.__class__.FREE_LIST_HEAD)
221 |         self.init_list_head(self.__class__.MSG_LIST_HEAD)
222 | 
223 |         self.block_locks: typing.List[typing.Any] = [ctx.Lock()] * maxsize # TODO: what is the type returned by ctx.Lock()?
224 |         self.data_blocks: typing.List[SharedMemory] = []
225 |         block_id: int
226 |         for block_id in range(maxsize):
227 |             self.data_blocks.append(SharedMemory(create=True, size=self.__class__.META_BLOCK_SIZE + self.chunk_size))
228 |             self.add_free_block(block_id)
229 | 
230 |     def __getstate__(self):
231 |         """This routine retrieves queue information when forking a new process."""
232 |         return (self.qid,
233 |                 self.verbose,
234 |                 self.chunk_size,
235 |                 self.maxsize,
236 |                 dill.dumps(self.serializer),
237 |                 self.integrity_check,
238 |                 self.deadlock_check,
239 |                 self.deadlock_immanent_check,
240 |                 self.watermark_check,
241 |                 self.chunk_watermark,
242 |                 self.mid_counter,
243 |                 self.producer_lock,
244 |                 self.free_list_lock,
245 |                 self.msg_list_lock,
246 |                 self.use_semaphores,
247 |                 self.free_list_semaphore,
248 |                 self.msg_list_semaphore,
249 |                 dill.dumps(self.list_heads),
250 |                 self.block_locks,
251 |                 dill.dumps(self.data_blocks))
252 | 
253 |     def __setstate__(self, state):
254 |         """This routine saves queue information when forking a new process."""
255 |         (self.qid,
256 |          self.verbose,
257 |          self.chunk_size,
258 |          self.maxsize,
259 |          self.serializer,
260 |          self.integrity_check,
261 |          self.deadlock_check,
262 |          self.deadlock_immanent_check,
263 |          self.watermark_check,
264 |          self.chunk_watermark,
265 |          self.mid_counter,
266 |          self.producer_lock,
267 |          self.free_list_lock,
268 |          self.msg_list_lock,
269 |          self.use_semaphores,
270 |          self.free_list_semaphore,
271 |          self.msg_list_semaphore,
272 |          self.list_heads,
273 |          self.block_locks,
274 |          self.data_blocks) = state
275 | 
276 |         self.list_heads = dill.loads(self.list_heads)
277 |         self.data_blocks = dill.loads(self.data_blocks)
278 |         self.serializer = dill.loads(self.serializer)
279 | 
280 |     def get_list_head_field(self, lh: int, type_: str)->int:
281 |         """int: Get a field from a list head.
282 | 
283 |         Args:
284 |             lh (int): The index of the list head in the list head shared memory.
285 |             type (str): The name of the list head field."""
286 |         addr_s: typing.Optional[int]
287 |         addr_e: typing.Optional[int]
288 |         ctype: typing.Optional[str]
289 |         addr_s, addr_e, ctype = self.__class__.LIST_HEAD_STRUCT.get(type_, (None, None, None))
290 |         if addr_s is None or addr_e is None or ctype is None:
291 |             raise ValueError("get_list_head_field: unrecognized %s" % repr(type_))
292 |         return struct.unpack(ctype, self.list_heads.buf[(self.__class__.LIST_HEAD_SIZE * lh) + addr_s : (self.__class__.LIST_HEAD_SIZE * lh) + addr_e])[0]
293 | 
294 |     def set_list_head_field(self, lh: int, data: int, type_: str):
295 |         addr_s: typing.Optional[int]
296 |         addr_e: typing.Optional[int]
297 |         ctype: typing.Optional[str]
298 |         addr_s, addr_e, ctype = self.__class__.LIST_HEAD_STRUCT.get(type_, (None, None, None))
299 |         if addr_s is None or addr_e is None or ctype is None:
300 |             raise ValueError("get_list_head_field: unrecognized %s" % repr(type_))
301 | 
302 |         # TODO: find a better way to calm mypy's annoyance at the following:
303 |         self.list_heads.buf[(self.__class__.LIST_HEAD_SIZE * lh) + addr_s : (self.__class__.LIST_HEAD_SIZE * lh) + addr_e] = struct.pack(ctype, data) #type: ignore
304 | 
305 |     def get_meta(self, block: SharedMemory, type_: str)->typing.Union[bytes, int]:
306 |         """typing.Union[bytes, int]: Get a field from a block's metadata area in shared memory.
307 | 
308 |         Args:
309 |             block (SharedMemory): The shared memory for the data block.
310 |             type_ (str): The name of the metadata field to extract."""
311 |         addr_s: typing.Optional[int]
312 |         addr_e: typing.Optional[int]
313 |         ctype: typing.Optional[str]
314 |         addr_s, addr_e, ctype = self.__class__.META_STRUCT.get(type_, (None, None, None))
315 |         if addr_s is None or addr_e is None or ctype is None:
316 |             raise ValueError("get_meta: unrecognized %s" % repr(type_))
317 |         return struct.unpack(ctype, block.buf[addr_s : addr_e])[0]
318 | 
319 |     def set_meta(self, block: SharedMemory, data, type_: str):
320 |         addr_s: typing.Optional[int]
321 |         addr_e: typing.Optional[int]
322 |         ctype: typing.Optional[str]
323 |         addr_s, addr_e, ctype = self.__class__.META_STRUCT.get(type_, (None, None, None))
324 |         if addr_s is None or addr_e is None or ctype is None:
325 |             raise ValueError("set_meta: unrecognized %s" % repr(type_))
326 | 
327 |         # TODO: find a better way to calm mypy's annoyance at the following:
328 |         block.buf[addr_s : addr_e] = struct.pack(ctype, data) #type: ignore
329 | 
330 |     def get_data(self, block: SharedMemory, data_size: int)->bytes:
331 |         """bytes: Get a memoryview of the a shared memory data block.
332 | 
333 |         Args:
334 |             block (SharedMemory): The chared memory block.
335 |             data_size (int): The number of bytes in the returned memoryview slice."""
336 |         return block.buf[self.__class__.META_BLOCK_SIZE:self.__class__.META_BLOCK_SIZE+data_size]
337 | 
338 |     def set_data(self, block: SharedMemory, data: bytes, data_size: int):
339 |         # TODO: find a better way to calm mypy's annoyance at the following:
340 |         block.buf[self.__class__.META_BLOCK_SIZE:self.__class__.META_BLOCK_SIZE+data_size] = data # type: ignore
341 | 
342 |     def init_list_head(self, lh: int):
343 |         """Initialize a block list, clearing the block count and setting the first_block
344 |            and last_block fields to the reserved value that indicates that they are
345 |            void pointers.
346 | 
347 |         Args:
348 |             lh (int): The index of the list head in the list head shared memory area."""
349 |         self.set_list_head_field(lh, 0, 'block_count')
350 |         self.set_list_head_field(lh, self.__class__.RESERVED_BLOCK_ID, 'first_block')
351 |         self.set_list_head_field(lh, self.__class__.RESERVED_BLOCK_ID, 'last_block')
352 | 
353 |     def get_block_count(self, lh: int)->int:
354 |         """int: Get the count of blocks queued in a block list.
355 | 
356 |         Args:
357 |             lh (int): The index of the list head in the list head shared memory area.
358 |         """
359 |         return self.get_list_head_field(lh, 'block_count')
360 | 
361 |     def get_first_block(self, lh: int)->typing.Optional[int]:
362 |         """Get the first block on a block list, updating the list head fields.
363 | 
364 |         Args:
365 |             lh (int): The index of the list head in the list head shared memory area.
366 | 
367 |         Returns:
368 |             None: No block is available
369 |             int: The block_id of the first available block.
370 |         """
371 | 
372 |         block_count: int = self.get_block_count(lh)
373 |         if block_count == 0:
374 |             return None
375 | 
376 |         block_id: int = self.get_list_head_field(lh, 'first_block')
377 | 
378 |         block_count -= 1
379 |         if block_count == 0:
380 |             self.init_list_head(lh)
381 |         else:
382 |             with self.block_locks[block_id]:
383 |                 maybe_next_block_id: typing.Union[bytes, int] = self.get_meta(self.data_blocks[block_id], 'next_block_id')
384 |                 if isinstance(maybe_next_block_id, int):
385 |                     next_block_id: int = maybe_next_block_id
386 |                 else:
387 |                     raise ValueError("get_first_block internal error: next_block_id is not int.")
388 |             self.set_list_head_field(lh, next_block_id, 'first_block')
389 |             self.set_list_head_field(lh, block_count, 'block_count')
390 |         return block_id
391 | 
392 |     def add_block(self, lh: int, block_id: int):
393 |         """Add a block to a block list.
394 | 
395 |         Args:
396 |             lh (int): The index of the list head in the list head shared memory area.
397 |         """
398 |         block_count: int = self.get_list_head_field(lh, 'block_count')
399 |         if block_count == 0:
400 |             self.set_list_head_field(lh, block_id, 'first_block')
401 |             self.set_list_head_field(lh, block_id, 'last_block')
402 |             self.set_list_head_field(lh, 1, 'block_count')
403 |         
404 |         else:
405 |             last_block: int = self.get_list_head_field(lh, 'last_block')
406 |             with self.block_locks[last_block]:
407 |                 self.set_meta(self.data_blocks[last_block], block_id, 'next_block_id')
408 |             self.set_list_head_field(lh, block_id, 'last_block')
409 |             self.set_list_head_field(lh, block_count + 1, 'block_count')
410 |                 
411 |     def get_free_block_count(self)->int:
412 |         """int: Get the number of free blocks."""
413 |         with self.free_list_lock:
414 |             return self.get_block_count(self.__class__.FREE_LIST_HEAD)
415 | 
416 |     def get_first_free_block(self, block: bool, timeout: typing.Optional[float])->typing.Optional[int]:
417 |         """Get the first free block.
418 | 
419 |            When using semaphores, optionally block with an optional timeout.  If
420 |            you choose to block without a timeout, the method will not return until
421 |            a free block is available.
422 | 
423 |         Args:
424 |             block (bool): When True, and when using semaphores, wait until an
425 |                free block is available or a timeout occurs.
426 |             timeout (typing.Optional[float]): When block is True and timeout is
427 |                positive, block for at most timeout seconds attempting to acquire
428 |                the free block.
429 | 
430 |         Returns:
431 |             None: No block is available
432 |             int: The block_id of the first available block.
433 |         """
434 |         if self.free_list_semaphore is not None:
435 |             self.free_list_semaphore.acquire(block=block, timeout=timeout)
436 |         with self.free_list_lock:
437 |             return self.get_first_block(self.__class__.FREE_LIST_HEAD)
438 | 
439 |     def add_free_block(self, block_id: int):
440 |         """Return a block to the free block list.
441 | 
442 |         Args:
443 |             block_id (int): The identifier of the block being returned.
444 |         """
445 |         with self.free_list_lock:
446 |             self.add_block(self.__class__.FREE_LIST_HEAD, block_id)
447 |         if self.free_list_semaphore is not None:
448 |             self.free_list_semaphore.release()
449 | 
450 |     def get_msg_count(self)->int:
451 |         """int: Get the number of messages on the message list."""
452 |         with self.msg_list_lock:
453 |             return self.get_block_count(self.__class__.MSG_LIST_HEAD)
454 | 
455 |     def get_first_msg(self, block: bool, timeout: typing.Optional[float])->typing.Optional[int]:
456 |         """Take the first available message, if any, from the available message list.
457 | 
458 |            When using semaphores, optionally block with an optional timeout.  If
459 |            you choose to block without a timeout, the method will not return until
460 |            a free block is available.
461 | 
462 |         Args:
463 |             block (bool): When True, and when using semaphores, wait until an
464 |                message is available or a timeout occurs.
465 |             timeout (typing.Optional[float]): When block is True and timeout is
466 |                positive, block for at most timeout seconds attempting to acquire
467 |                the message.
468 | 
469 |         Returns:
470 |             None: No message is available
471 |             int: The block_id of the first chunk of the first available message.
472 |         """
473 |         if self.msg_list_semaphore is not None:
474 |             self.msg_list_semaphore.acquire(block=block, timeout=timeout)
475 |         with self.msg_list_lock:
476 |             return self.get_first_block(self.__class__.MSG_LIST_HEAD)
477 | 
478 |     def add_msg(self, block_id: int):
479 |         """Add a message to the available message list
480 | 
481 |         Args:
482 |             block_id (int): The block identifier of the first chunk of the message.
483 |         """
484 |         with self.msg_list_lock:
485 |             self.add_block(self.__class__.MSG_LIST_HEAD, block_id)
486 |         if self.msg_list_semaphore is not None:
487 |             self.msg_list_semaphore.release()
488 |         
489 |     def generate_msg_id(self)->bytes:
490 |         """bytes: Generate the next message identifier, but do not consume it.
491 | 
492 |         Note:
493 |             Message IDs are assigned independenyly by each process using the queue.
494 |             They need to be paired with the source process ID to be used to identify
495 |             a message for debugging.
496 |         """
497 |         return ("%012x" % (self.mid_counter + 1)).encode('utf-8')
498 | 
499 |     def consume_msg_id(self):
500 |         """Consume a message identifier.
501 | 
502 |         Note:
503 |             Message identifiers are consumed when we are certain that we can process
504 |             the message.  They will not be consumed if we start to process a message
505 |             but fail due to a conition such as insufficient free buffers.
506 |         """
507 |         self.mid_counter += 1
508 | 
509 |     def next_writable_block_id(self, block: bool, timeout: typing.Optional[float], msg_id: bytes, src_pid: int)->int:
510 |         """int: Get the block ID of the first free block.
511 | 
512 |         Get the block ID of the first free block, supporting
513 |         blocking/nonblocking modes and timeouts when blocking, even when
514 |         semaphores are not being used.  Store int he block's metadata area the
515 |         message ID for the message we are building and the pid of the process
516 |         acquiring the block.
517 | 
518 |         Args:
519 |             block (bool): When True, and when using semaphores, wait until an
520 |                free block is available or a timeout occurs.
521 |             timeout (typing.Optional[float]): When block is True and timeout is
522 |                positive, block for at most timeout seconds attempting to acquire
523 |                the free block.
524 |             msg_id (bytes): The message ID assigned to the message being built.
525 |             src_pid: The process ID (pid) of the process that is acquiring the block.
526 | 
527 |         Raises:
528 |             queue.Full: No block is available.  Full is raised immediately in nonblocking
529 |                mode, or after the timeout in blocking mode when a timeout is specified.
530 | 
531 |         """
532 |         looped: bool = False
533 |         loop_cnt: int = 0
534 |         time_start = time.time()
535 |         while True:
536 |             remaining_timeout: typing.Optional[float] = timeout
537 |             if remaining_timeout is not None:
538 |                 remaining_timeout -= (time.time() - time_start)
539 |                 if remaining_timeout <= 0:
540 |                     if self.verbose:
541 |                         print("next_writable_block_id: qid=%d src_pid=%d: queue FULL (timeout)" % (self.qid, src_pid), file=sys.stderr, flush=True) # ***
542 |                     raise Full
543 | 
544 |             block_id: typing.Optional[int] = self.get_first_free_block(block, remaining_timeout)
545 |             if block_id is not None:
546 |                 break
547 | 
548 |             if not block:
549 |                 if self.verbose:
550 |                     print("next_writable_block_id: qid=%d src_pid=%d: FULL (nonblocking)" % (self.qid, src_pid), file=sys.stderr, flush=True) # ***
551 |                 raise Full
552 | 
553 |             if self.deadlock_check or self.verbose:
554 |                 loop_cnt += 1
555 |                 if (self.verbose and loop_cnt == 2) or (self.deadlock_check and loop_cnt % 10000 == 0):
556 |                     looped = True
557 |                     print("next_writable_block_id: qid=%d src_pid=%d: looping (%d loops)" % (self.qid, src_pid, loop_cnt), file=sys.stderr, flush=True) # ***
558 | 
559 |         if looped:
560 |             print("next_writable_block_id: qid=%d src_pid=%d: looping ended after %d loops." % (self.qid, src_pid, loop_cnt), file=sys.stderr, flush=True) # ***
561 | 
562 |         with self.block_locks[block_id]:
563 |             data_block = self.data_blocks[block_id]
564 |             self.set_meta(data_block, msg_id, 'msg_id')
565 |             self.set_meta(data_block, src_pid, 'src_pid')
566 | 
567 |         return block_id
568 | 
569 |     def next_readable_msg(self, block: bool, timeout: typing.Optional[float]=None)->typing.Tuple[int, bytes, int, int, int]:
570 |         """Get the next available message, with blocking and timeouts.
571 | 
572 |         This method returns a 5-tuple: the data block and certain metadata.
573 |         The reason for this complexity is to
574 |         retrieve the metadata under a single access lock.
575 | 
576 |         Args:
577 |             block (bool): When True, and when using semaphores, wait until an
578 |                free block is available or a timeout occurs.
579 |             timeout (typing.Optional[float]): When block is True and timeout is
580 |                positive, block for at most timeout seconds attempting to acquire
581 |                the free block.
582 | 
583 |         Returns:
584 |             src_pid (int): The process iodentifier of the process that originated the message.
585 |             msg_id (bytes): The messag identifier.
586 |             block_id (int): The identifier for the first chunk in the message.
587 |             total_chunks (int): The total number of chunks in the message.
588 |             next_chunk_block_id (int): The identifier for the next chunk in the message.
589 | 
590 |         Raises:
591 |             queue.Empty: no messages are available and either nonblocking mode or a timeout occured.
592 |             ValueError: An internal error occured in accessing the message's metadata.
593 |         """
594 |         i = 0
595 |         time_start = time.time()
596 |         while True:
597 |             remaining_timeout: typing.Optional[float] = timeout
598 |             if remaining_timeout is not None:
599 |                 remaining_timeout -= (time.time() - time_start)
600 |                 if remaining_timeout <= 0:
601 |                     raise Empty
602 |             block_id: typing.Optional[int] = self.get_first_msg(block=block, timeout=remaining_timeout)
603 |             if block_id is not None:
604 |                 break
605 | 
606 |             if not block:
607 |                 raise Empty
608 |             
609 |         with self.block_locks[block_id]:
610 |             data_block = self.data_blocks[block_id]
611 |             src_pid: typing.Union[bytes, int] = self.get_meta(data_block, 'src_pid')
612 |             msg_id: typing.Union[bytes, int] = self.get_meta(data_block, 'msg_id')
613 |             total_chunks: typing.Union[bytes, int] = self.get_meta(data_block, 'total_chunks')
614 |             next_chunk_block_id: typing.Union[bytes, int] = self.get_meta(data_block, 'next_chunk_block_id')
615 |             if isinstance(src_pid, int) and isinstance (msg_id, bytes) and isinstance(total_chunks, int) and isinstance(next_chunk_block_id, int):
616 |                 return src_pid, msg_id, block_id, total_chunks, next_chunk_block_id
617 |             else:
618 |                 raise ValueError("next_readable_msg: internal error extracting data block metadata.")
619 | 
620 |     # def debug_data_block(self):
621 |     #     for b in self.data_blocks:
622 |     #         print(bytes(b.buf[0:24]))
623 | 
624 |     def put(self, msg: typing.Any, block: bool=True, timeout: typing.Optional[float]=None):
625 | 
626 |         """
627 |         Put an object into a shared memory queue.
628 | 
629 |         Args:
630 |             msg (obj): The object which is to be put into queue.
631 |             block (bool, optional): If it is set to True (default), it will return after an item is put into queue.
632 |             timeout (int, optional): A positive integer for the timeout duration in seconds, which is only effective when `block` is set to True.
633 | 
634 |         Raises:
635 |             queue.Full: Raised if the call times out or the queue is full when `block` is False.
636 |             ValueError: An internal error occured in accessing the message's metadata.
637 |             ValueError: A request was made to send a message that, when serialized, exceeds the capacity of the queue.
638 |             PicklingError: This exception is raised when the serializer is pickle and
639 |                 an error occured in serializing the message.
640 |             UnpicklingError: This exception is raised when the serializer is pickle and
641 |                 an error occured in deserializing the message for an integrity check.
642 | 
643 |         Note:
644 |             - Errors other then PicklingError might be raised if a serialized other then
645 |               pickle is specified.
646 |         """
647 |         if timeout is not None:
648 |             if not block:
649 |                 raise ValueError("A timeout is allowed only when not blocking.")
650 |             if timeout < 0:
651 |                 raise Full
652 | 
653 |         msg_id: bytes = self.generate_msg_id()
654 |         src_pid: int = os.getpid()
655 |         msg_body: bytes = self.serializer.dumps(msg) # type: ignore[union-attr]
656 |         if self.integrity_check:
657 |             total_msg_size: int = len(msg_body)
658 |             msg2: typing.Any = self.serializer.loads(msg_body) # type: ignore[union-attr]
659 |             if self.verbose:
660 |                 print("put: qid=%d src_pid=%d msg_id=%r: serialization integrity check is OK." % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # ***
661 |             
662 |         total_chunks: int = math.ceil(len(msg_body) / self.chunk_size)
663 |         if self.verbose:
664 |             print("put: qid=%d src_pid=%d msg_id=%r: total_chunks=%d len(msg_body)=%d chunk_size=%d" % (self.qid, src_pid, msg_id, total_chunks, len(msg_body), self.chunk_size), file=sys.stderr, flush=True) # ***
665 |         if self.watermark_check or self.verbose:
666 |             if total_chunks > self.chunk_watermark:
667 |                 print("put: qid=%d src_pid=%d msg_id=%r: total_chunks=%d maxsize=%d new watermark" % (self.qid, src_pid, msg_id, total_chunks, self.maxsize), file=sys.stderr, flush=True) # ***
668 |                 self.chunk_watermark = total_chunks
669 | 
670 |         if self.deadlock_immanent_check and total_chunks > self.maxsize:
671 |             raise ValueError("DEADLOCK IMMANENT: qid=%d src_pid=%d: total_chunks=%d > maxsize=%d" % (self.qid, src_pid, total_chunks, self.maxsize))
672 |         
673 |         time_start: float = time.time()
674 | 
675 |         # We acquire the producer lock to avoid deadlock if multiple
676 |         # producers need multiple chunks each.
677 |         lock_acquired: bool = self.producer_lock.acquire(timeout=timeout)
678 |         if not lock_acquired:
679 |             # We must have timed out.
680 |             if self.verbose:
681 |                 print("put: qid=%d src_pid=%d msg_id=%r: queue FULL" % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # ***
682 |             raise Full
683 | 
684 |         block_id: int
685 |         block_id_list: typing.List[int] = [ ]
686 |         try:
687 |             # In case we will process more than one chunk and this is a
688 |             # nonblocking or timed out request, start by reserving all the
689 |             # blocks that we will need.
690 |             i: int
691 |             for i in range(total_chunks):
692 |                 try:
693 |                     remaining_timeout: typing.Optional[float] = timeout
694 |                     if remaining_timeout is not None:
695 |                         remaining_timeout -= (time.time() - time_start)
696 |                         if remaining_timeout <= 0:
697 |                             if self.verbose:
698 |                                 print("put: qid=%d src_pid=%d msg_id=%r: queue FULL" % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # ***
699 |                             raise Full
700 | 
701 |                     block_id = self.next_writable_block_id(block, remaining_timeout, msg_id, src_pid)
702 |                     block_id_list.append(block_id)
703 | 
704 |                 except Full:
705 |                     # We failed to find a free block and/or a timeout occured.
706 |                     # Release the reserved blocks.
707 |                     if self.verbose:
708 |                         print("put: qid=%d src_pid=%d msg_id=%r: releasing %d blocks" % (self.qid, src_pid, msg_id, len(block_id_list)), file=sys.stderr, flush=True) # ***
709 |                     for block_id in block_id_list:
710 |                         self.add_free_block(block_id)
711 |                     raise
712 | 
713 |         finally:
714 |             # Now that we have acquired the full set of chunks, we can release
715 |             # the producer lock.  We don't want to hold it while we transfer
716 |             # data into the blocks.
717 |             if self.verbose:
718 |                 print("put: qid=%d src_pid=%d msg_id=%r: releasing producer lock" % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # *** 
719 |             self.producer_lock.release()
720 | 
721 |             # Consume this message ID.
722 |             self.consume_msg_id()
723 | 
724 |         if self.verbose:
725 |             print("put: qid=%d src_pid=%d msg_id=%r: acquired %d blocks" % (self.qid, src_pid, msg_id, total_chunks), file=sys.stderr, flush=True) # *** 
726 | 
727 |         # Now that we have a full set of blocks, build the
728 |         # chunks:
729 |         block_idx: int
730 |         for block_idx, block_id in enumerate(block_id_list):
731 |             chunk_id = block_idx + 1
732 |             if self.verbose:
733 |                 print("put: qid=%d src_pid=%d msg_id=%r: chunk_id=%d of total_chunks=%d" % (self.qid, src_pid, msg_id, chunk_id, total_chunks), file=sys.stderr, flush=True) # *** 
734 |                
735 |             data_block: SharedMemory = self.data_blocks[block_id]
736 |             chunk_data: bytes = msg_body[block_idx * self.chunk_size: (block_idx + 1) * self.chunk_size]
737 |             msg_size: int = len(chunk_data)
738 |             if self.verbose:
739 |                 print("put: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: block_id=%d msg_size=%d." % (self.qid, src_pid, msg_id, chunk_id, block_id, msg_size), file=sys.stderr, flush=True) # ***
740 |             if self.integrity_check:
741 |                 checksum: int = zlib.adler32(chunk_data)
742 |                 if self.verbose:
743 |                     print("put: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: checksum=%x total_msg_size=%d" % (self.qid, src_pid, msg_id, chunk_id, checksum, total_msg_size), file=sys.stderr, flush=True) # ***
744 | 
745 |             with self.block_locks[block_id]:
746 |                 self.set_meta(data_block, msg_id, 'msg_id')
747 |                 self.set_meta(data_block, msg_size, 'msg_size')
748 |                 self.set_meta(data_block, chunk_id, 'chunk_id')
749 |                 self.set_meta(data_block, total_chunks, 'total_chunks')
750 |                 if self.integrity_check:
751 |                     self.set_meta(data_block, total_msg_size, 'total_msg_size')
752 |                     self.set_meta(data_block, checksum, 'checksum')
753 |                 if chunk_id == total_chunks:
754 |                     # No more chunks, store a reserved value to simplify debugging.
755 |                     self.set_meta(data_block, self.__class__.RESERVED_BLOCK_ID, 'next_chunk_block_id')
756 |                 else:
757 |                     # Store the block ID of the next chunk.
758 |                     self.set_meta(data_block, block_id_list[block_idx + 1], 'next_chunk_block_id')
759 |                 self.set_data(data_block, chunk_data, msg_size)
760 | 
761 |         # Now that the entire message has built, queue it:
762 |         self.add_msg(block_id_list[0])
763 |         if self.verbose:
764 |             print("put: qid=%d src_pid=%d msg_id=%r: message sent" % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # *** 
765 | 
766 |     def get(self, block: bool=True, timeout: typing.Optional[float]=None)->typing.Any:
767 |         """
768 |         Get the next available message from the queue.
769 | 
770 |         Args:
771 |             block (bool, optional): If it is set to True (default), it will only return when an item is available.
772 |             timeout (int, optional): A positive integer for the timeout duration in seconds, which is only effective when `block` is set to True.
773 | 
774 |         Returns:
775 |             object: A message object retrieved from the queue.
776 | 
777 |         Raises:
778 |             queue.Empty: This exception will be raised if it times out or queue is empty when `block` is False.
779 |             ValueError: An internal error occured in accessing the message's metadata.
780 |             UnpicklingError: This exception is raised when the serializer is pickle and
781 |                 an error occured in deserializing the message.
782 | 
783 |         Note:
784 |             - Errors other then UnpicklingError might be raised if a serialized other then
785 |               pickle is specified.
786 |         """
787 |         time_start: float = time.time()
788 | 
789 |         # We will build a list of message chunks.  We can't
790 |         # release them until after we deserialize the data.
791 |         block_id: int
792 |         chunk_id: int
793 |         msg_block_ids: typing.List[int] = [ ]
794 |         data_block: SharedMemory
795 |         
796 |         try:
797 |             remaining_timeout: typing.Optional[float] = timeout
798 |             if remaining_timeout is not None:
799 |                 remaining_timeout -= (time.time() - time_start)
800 |                 if remaining_timeout <= 0:
801 |                     if self.verbose:
802 |                         print("put: qid=%d src_pid=%d msg_id=%r: queue EMPTY" % (self.qid, src_pid, msg_id), file=sys.stderr, flush=True) # ***
803 |                     raise Empty
804 | 
805 |             src_pid: int
806 |             msg_id: bytes
807 |             total_chunks: int
808 |             next_chunk_block_id: int
809 |             src_pid, msg_id, block_id, total_chunks, next_chunk_block_id = self.next_readable_msg(block, remaining_timeout) # This call might raise Empty.
810 |             if self.verbose:
811 |                 print("get: qid=%d src_pid=%d msg_id=%r: total_chunks=%d next_chunk_block_id=%d." % (self.qid, src_pid, msg_id, total_chunks, next_chunk_block_id), file=sys.stderr, flush=True) # ***
812 |             msg_block_ids.append(block_id)
813 | 
814 |             # Acquire the chunks for the rest of the message:
815 |             i: int
816 |             for i in range(1, total_chunks):
817 |                 chunk_id = i + 1
818 |                 if self.verbose:
819 |                     print("get: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: block_id=%d." % (self.qid, src_pid, msg_id, chunk_id, next_chunk_block_id), file=sys.stderr, flush=True) # ***
820 |                 msg_block_ids.append(next_chunk_block_id)
821 |                 data_block = self.data_blocks[next_chunk_block_id]
822 |                 with self.block_locks[next_chunk_block_id]:
823 |                     maybe_next_chunk_block_id: typing.Union[bytes, int] = self.get_meta(data_block, 'next_chunk_block_id')
824 |                     if isinstance(maybe_next_chunk_block_id, int):
825 |                         next_chunk_block_id = maybe_next_chunk_block_id
826 |                     else:
827 |                         raise ValueError("get: internal error getting next_chunk_block_id")
828 | 
829 |         except Exception:
830 |             # Release the data blocks (losing the message) if we get an
831 |             # unexpected exception:
832 |             if self.verbose:
833 |                 print("put: qid=%d: releasing data blocks due to Exception" % self.qid, file=sys.stderr, flush=True) # *** 
834 |             for block_id in msg_block_ids:
835 |                 self.add_free_block(block_id)
836 |             msg_block_ids.clear()
837 |             raise
838 | 
839 |         buf_msg_body: typing.List[bytes] = []
840 |         try:
841 |             block_idx: int
842 |             for block_idx, block_id in enumerate(msg_block_ids):
843 |                 chunk_id = block_idx + 1
844 |                 data_block = self.data_blocks[block_id]
845 |                 with self.block_locks[block_id]:
846 |                     maybe_msg_size: typing.Union[bytes, int] = self.get_meta(data_block, 'msg_size')
847 |                     if isinstance(maybe_msg_size, int):
848 |                         msg_size: int = maybe_msg_size
849 |                     else:
850 |                         raise ValueError("get: internal error getting msg_size")
851 |                     if self.integrity_check:
852 |                         if block_idx == 0:
853 |                             maybe_total_msg_size: typing.Union[bytes, int] = self.get_meta(data_block, 'total_msg_size')
854 |                             if isinstance(maybe_total_msg_size, int):
855 |                                 total_msg_size: int = maybe_total_msg_size
856 |                             else:
857 |                                 raise ValueError("set: internal errpor getting total_msg_size")
858 |                         maybe_checksum: typing.Union[bytes, int] = self.get_meta(data_block, 'checksum')
859 |                         if isinstance(maybe_checksum, int):
860 |                             checksum: int = maybe_checksum
861 |                         else:
862 |                             raise ValueError("get: internal error getting checksum")
863 |                     chunk_data: bytes = self.get_data(data_block, msg_size) # This may make a reference, not a deep copy.
864 |                 if self.verbose:
865 |                     print("get: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: block_id=%d msg_size=%d total_chunks=%d." % (self.qid, src_pid, msg_id, chunk_id, block_id, msg_size, total_chunks), file=sys.stderr, flush=True) # ***
866 |                 if self.integrity_check:
867 |                     checksum2: int = zlib.adler32(chunk_data)
868 |                     if checksum == checksum2:
869 |                         if self.verbose:
870 |                             print("get: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: checksum=%x is OK" % (self.qid, src_pid, msg_id, chunk_id, checksum), file=sys.stderr, flush=True) # ***
871 |                     else:
872 |                         raise ValueError("ShmQueue.get: qid=%d src_pid=%d msg_id=%r: chunk_id=%d: block_id=%d checksum=%x != checksum2=%x -- FAIL!" % (self.qid, src_pid, msg_id, chunk_id, block_id, checksum, checksum2)) # TODO: use a better exception
873 | 
874 |                 buf_msg_body.append(chunk_data) # This may copy the reference.
875 | 
876 |             msg_body: bytes = b''.join(buf_msg_body) # Even this might copy the references.
877 |             if self.integrity_check:
878 |                 if total_msg_size == len(msg_body):
879 |                     if self.verbose:
880 |                         print("get: qid=%d src_pid=%d msg_id=%r: total_msg_size=%d is OK" % (self.qid, src_pid, msg_id, total_msg_size), file=sys.stderr, flush=True) # ***
881 |                 else:
882 |                     raise ValueError("get: qid=%d src_pid=%d msg_id=%r: total_msg_size=%d != len(msg_body)=%d -- FAIL!" % (self.qid, src_pid, msg_id, total_msg_size, len(msg_body))) # TODO: use a beter exception.
883 | 
884 |             try:
885 |                 # Finally, we are guaranteed to copy the data.
886 |                 msg: typing.Any = self.serializer.loads(msg_body)  # type: ignore[union-attr]
887 | 
888 |                 # We could release the blocks here, but then we'd have to
889 |                 # release them in the except clause, too.
890 | 
891 |                 return msg
892 | 
893 |             except pickle.UnpicklingError as e:
894 |                 print("get: Fail: qid=%d src_pid=%d msg_id=%r: msg_size=%d chunk_id=%d total_chunks=%d." % (self.qid, src_pid, msg_id, msg_size, chunk_id, total_chunks), file=sys.stderr, flush=True) # ***
895 |                 if self.integrity_check:
896 |                     print("get: Fail: qid=%d src_pid=%d msg_id=%r: total_msg_size=%d checksum=%x" % (self.qid, src_pid, msg_id, total_msg_size, checksum), file=sys.stderr, flush=True) # ***
897 |                 raise
898 |     
899 |         finally:
900 |             # It is now safe to release the data blocks.  This is a good place
901 |             # to release them, because it covers error paths as well as the main return.
902 |             if self.verbose:
903 |                 print("get: qid=%d src_pid=%d msg_id=%r: releasing %d blocks." % (self.qid, src_pid, msg_id, len(msg_block_ids)), file=sys.stderr, flush=True) # ***
904 |             for block_id in msg_block_ids:
905 |                 self.add_free_block(block_id)
906 |             msg_block_ids.clear()
907 |             buf_msg_body.clear()
908 | 
909 |     def get_nowait(self)->typing.Any:
910 |         """
911 |         Equivalent to `get(False)`.
912 |         """
913 |         return self.get(False)
914 | 
915 |     def put_nowait(self, msg: typing.Any):
916 |         """
917 |         Equivalent to `put(obj, False)`.
918 |         """
919 |         return self.put(msg, False)
920 | 
921 |     def qsize(self)->int:
922 |         """int: Return the number of ready messages."""
923 |         return self.get_msg_count()
924 | 
925 |     def empty(self)->bool:
926 |         """bool: True when no messages are ready."""
927 |         return self.get_msg_count() == 0
928 | 
929 |     def full(self)->bool:
930 |         """bool: True when no free blocks are available."""
931 |         return self.get_free_block_count() == 0
932 | 
933 |     def close(self):
934 |         """
935 |         Indicate no more new data will be added and release the shared memory areas.
936 |         """
937 |         block: SharedMemory
938 |         for block in self.data_blocks:
939 |             block.close()
940 |             block.unlink()
941 | 
942 |         self.list_heads.close()
943 |         self.list_heads.unlink()
944 | 
945 |     def __del__(self):
946 |         pass
947 | 


--------------------------------------------------------------------------------