├── .github └── workflows │ ├── github-pages.yml │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bin └── mpire-dashboard ├── docs ├── Makefile ├── _static │ └── css │ │ └── custom.css ├── changelog.rst ├── conf.py ├── contributing.rst ├── getting_started.rst ├── index.rst ├── install.rst ├── mpire.rst ├── reference │ └── index.rst ├── troubleshooting.rst └── usage │ ├── apply.rst │ ├── dashboard.rst │ ├── index.rst │ ├── map │ ├── index.rst │ ├── map.rst │ ├── max_tasks_active.rst │ ├── numpy.rst │ ├── progress_bar.rst │ ├── task_chunking.rst │ ├── timeouts.rst │ ├── worker_init_exit.rst │ └── worker_lifespan.rst │ ├── mpire_dashboard.png │ ├── mpire_dashboard_error.png │ ├── mpire_dashboard_insights.png │ ├── mpire_dashboard_keyboard_interrupt.png │ └── workerpool │ ├── cpu_pinning.rst │ ├── dill.rst │ ├── index.rst │ ├── keep_alive.rst │ ├── order_tasks.rst │ ├── setup.rst │ ├── shared_objects.rst │ ├── start_method.rst │ ├── worker_id.rst │ ├── worker_insights.rst │ └── worker_state.rst ├── images └── benchmarks_averaged.png ├── mpire ├── __init__.py ├── async_result.py ├── comms.py ├── context.py ├── dashboard │ ├── __init__.py │ ├── connection_classes.py │ ├── connection_utils.py │ ├── dashboard.py │ ├── manager.py │ ├── static │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.min.css │ │ ├── fonts │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ ├── glyphicons-halflings-regular.svg │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ ├── glyphicons-halflings-regular.woff │ │ │ └── glyphicons-halflings-regular.woff2 │ │ ├── glyphicons.css │ │ ├── jquery-ui.min.js │ │ ├── jquery.min.js │ │ ├── refresh.js │ │ └── style.css │ ├── templates │ │ ├── index.html │ │ ├── menu_top_right.html │ │ ├── mpire.html │ │ └── progress_bar.html │ └── utils.py ├── exception.py ├── insights.py ├── params.py ├── pool.py ├── progress_bar.py ├── py.typed ├── signal.py ├── tqdm_utils.py ├── utils.py └── worker.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── test_async_result.py ├── test_comms.py ├── test_insights.py ├── test_params.py ├── test_pool.py ├── test_signal.py ├── test_utils.py └── utils.py /.github/workflows/github-pages.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish documentation to Github 8 | runs-on: ubuntu-20.04 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: Set up Python 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: "3.6" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine rich 20 | pip install .[dashboard] 21 | pip install .[dill] 22 | pip install .[docs] 23 | - name: Build documentation 24 | run: | 25 | sphinx-versioning build -r master ./docs/ ./docs/_build/html/ 26 | - name: Publish documentation to Github 27 | if: startsWith(github.ref, 'refs/tags') 28 | uses: peaceiris/actions-gh-pages@v3.8.0 29 | with: 30 | deploy_key: ${{ secrets.DEPLOY_GITHUB_PAGES_KEY }} 31 | external_repository: sybrenjansen/sybrenjansen.github.io 32 | publish_branch: main 33 | publish_dir: ./docs/_build/html/ 34 | destination_dir: mpire 35 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-20.04, windows-latest, macos-latest] 19 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install flake8 pytest 31 | pip install .[dashboard] 32 | pip install .[dill] 33 | pip install .[testing] 34 | - name: Set ulimit for macOS 35 | if: matrix.os == 'macos-latest' 36 | run: | 37 | ulimit -a 38 | ulimit -n 1024 39 | - name: Lint with flake8 40 | run: | 41 | # stop the build if there are Python syntax errors or undefined names 42 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 43 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 44 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 45 | - name: Test with pytest 46 | timeout-minutes: 30 47 | run: | 48 | pytest -v -o log_cli=true -s 49 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python distributions to PyPI 8 | runs-on: ubuntu-20.04 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: Set up Python 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: "3.6" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine 20 | - name: Build a binary wheel and a source tarball 21 | run: | 22 | python setup.py sdist 23 | python setup.py bdist_wheel 24 | - name: Publish distribution to PyPI 25 | if: startsWith(github.ref, 'refs/tags') 26 | uses: pypa/gh-action-pypi-publish@master 27 | with: 28 | user: __token__ 29 | password: ${{ secrets.PYPI_PASSWORD }} 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | build 4 | _build 5 | dist 6 | *.egg-info 7 | .eggs 8 | .pytest_cache 9 | 10 | # MacOS 11 | .DS_store 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sybren Jansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | recursive-include mpire/dashboard/static *.eot *.svg *.ttf *.woff *.woff2 *.js *.css 4 | recursive-include mpire/dashboard/templates *.html 5 | include requirements.txt 6 | include setup.cfg 7 | include MANIFEST.in 8 | include mpire/py.typed 9 | -------------------------------------------------------------------------------- /bin/mpire-dashboard: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import signal 4 | from typing import Sequence 5 | 6 | from mpire.dashboard import start_dashboard 7 | 8 | 9 | def get_port_range() -> Sequence: 10 | """ 11 | :return: port range 12 | """ 13 | def _port_range(range_str) -> Sequence: 14 | n1, n2 = map(int, range_str.split('-')) 15 | if len(range(n1, n2)) < 2: 16 | raise ValueError 17 | return range(n1, n2) 18 | 19 | parser = argparse.ArgumentParser(description='MPIRE Dashboard') 20 | parser.add_argument('--port-range', dest='port_range', required=False, default=range(8080, 8100), type=_port_range, 21 | help='Port range for starting a dashboard. The range should accommodate at least two ports: ' 22 | 'one for the webserver and one for the Python Manager server. Example: 6060-6080 will be ' 23 | 'converted to `range(6060, 6080)`. Default: `range(8080, 8100)`.') 24 | return parser.parse_args().port_range 25 | 26 | 27 | if __name__ == '__main__': 28 | # Obtain port range 29 | port_range = get_port_range() 30 | 31 | # Start a dashboard 32 | print("Starting MPIRE dashboard...") 33 | dashboard_details = start_dashboard(port_range) 34 | 35 | # Print some details on how to connect 36 | print() 37 | print("MPIRE dashboard started on http://localhost:{}".format(dashboard_details['dashboard_port_nr'])) 38 | print("Server is listening on {}:{}".format(dashboard_details['manager_host'], 39 | dashboard_details['manager_port_nr'])) 40 | print("-" * 50) 41 | signal.pause() 42 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lexsys.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lexsys.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/lexsys" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lexsys" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .strike { 2 | text-decoration: line-through; 3 | } 4 | 5 | /* From theme.css, but .section has been replaced by section to work around the new change of
to 6 |
, which for some reason happened 7 | */ 8 | .rst-content section ul { 9 | list-style:disc; 10 | line-height:24px; 11 | margin-bottom:24px 12 | } 13 | .rst-content section ul li { 14 | list-style:disc; 15 | margin-left:24px 16 | } 17 | .rst-content section ul li p:last-child, 18 | .rst-content section ul li ul { 19 | margin-top:0; 20 | margin-bottom:0 21 | } 22 | .rst-content section ul li li { 23 | list-style:circle 24 | } 25 | .rst-content section ul li li li { 26 | list-style:square 27 | } 28 | .rst-content section ul li ol li { 29 | list-style:decimal 30 | } 31 | .rst-content section ol { 32 | list-style:decimal; 33 | line-height:24px; 34 | margin-bottom:24px 35 | } 36 | .rst-content section ol li { 37 | list-style:decimal; 38 | margin-left:24px 39 | } 40 | .rst-content section ol li p:last-child, 41 | .rst-content section ol li ul { 42 | margin-bottom:0 43 | } 44 | .rst-content section ol li ul li { 45 | list-style:disc 46 | } 47 | .rst-content section ol.loweralpha, 48 | .rst-content section ol.loweralpha>li { 49 | list-style:lower-alpha 50 | } 51 | .rst-content section ol.upperalpha, 52 | .rst-content section ol.upperalpha>li { 53 | list-style:upper-alpha 54 | } 55 | .rst-content section ol li>*, 56 | .rst-content section ul li>* { 57 | margin-top:12px; 58 | margin-bottom:12px 59 | } 60 | .rst-content section ol li>:first-child, 61 | .rst-content section ul li>:first-child { 62 | margin-top:0 63 | } 64 | .rst-content section ol li>p, 65 | .rst-content section ol li>p:last-child, 66 | .rst-content section ul li>p, 67 | .rst-content section ul li>p:last-child { 68 | margin-bottom:12px 69 | } 70 | .rst-content section ol li>p:only-child, 71 | .rst-content section ol li>p:only-child:last-child, 72 | .rst-content section ul li>p:only-child, 73 | .rst-content section ul li>p:only-child:last-child { 74 | margin-bottom:0 75 | } 76 | .rst-content section ol li>ol, 77 | .rst-content section ol li>ul, 78 | .rst-content section ul li>ol, 79 | .rst-content section ul li>ul { 80 | margin-bottom:12px 81 | } 82 | .rst-content section ol.simple li>*, 83 | .rst-content section ol.simple li ol, 84 | .rst-content section ol.simple li ul, 85 | .rst-content section ul.simple li>*, 86 | .rst-content section ul.simple li ol, 87 | .rst-content section ul.simple li ul { 88 | margin-top:0; 89 | margin-bottom:0 90 | } -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contribution guidelines 2 | ======================= 3 | 4 | If you want to contribute to MPIRE, great! Please follow the steps below to ensure a smooth process: 5 | 6 | 1. Clone the project. 7 | 2. Create a new branch for your feature or bug fix. Give you branch a meaningful name. 8 | 3. Make your feature addition or bug fix. 9 | 4. Add tests for it and test it yourself. Make sure it both works for Unix and Windows based systems, or make sure to 10 | document why it doesn't work for one of the platforms. 11 | 5. Add documentation for it. Don't forget about the changelog: 12 | 13 | - Reference the issue number from GitHub in the changelog, if applicable (see current changelog for examples). 14 | - Don't mention a date or a version number here, but use ``Unreleased`` instead. 15 | 16 | 6. Commit with a meaningful commit message (e.g. the changelog). 17 | 7. Open a pull request. 18 | 8. Resolve any issues or comments by the reviewer. 19 | 9. Merge PR by squashing all your individual commits. 20 | 21 | Making a release 22 | ---------------- 23 | 24 | A release is only made by the project maintainer. The following steps are required: 25 | 26 | 1. Update the changelog with the release date and version number. Version numbers follow the `Semantic Versioning`_ 27 | guidelines 28 | 2. Update the version number in ``setup.py`` and ``docs/conf.py``. 29 | 3. Commit and push the changes. 30 | 4. Make sure the tests pass on GitHub Actions. 31 | 5. Create a tag for the release by using ``git tag -a vX.Y.Z -m "vX.Y.Z"``. 32 | 6. Push the tag to GitHub by using ``git push origin vX.Y.Z``. 33 | 34 | .. _Semantic Versioning: https://semver.org/ 35 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | Suppose you have a time consuming function that receives some input and returns its results. This could look like the 5 | following: 6 | 7 | .. code-block:: python 8 | 9 | import time 10 | 11 | def time_consuming_function(x): 12 | time.sleep(1) # Simulate that this function takes long to complete 13 | return ... 14 | 15 | results = [time_consuming_function(x) for x in range(10)] 16 | 17 | Running this function takes about 10 seconds to complete. 18 | 19 | Functions like these are known as `embarrassingly parallel`_ problems, functions that require little to no effort to 20 | turn into a parallel task. Parallelizing a simple function as this can be as easy as importing ``multiprocessing`` and 21 | using the ``multiprocessing.Pool`` class: 22 | 23 | .. _embarrassingly parallel: https://en.wikipedia.org/wiki/Embarrassingly_parallel 24 | 25 | .. code-block:: python 26 | 27 | from multiprocessing import Pool 28 | 29 | with Pool(processes=5) as pool: 30 | results = pool.map(time_consuming_function, range(10)) 31 | 32 | We configured to have 5 workers, so we can handle 5 tasks in parallel. As a result, this function will complete in about 33 | 2 seconds. 34 | 35 | MPIRE can be used almost as a drop-in replacement to ``multiprocessing``. We use the :obj:`mpire.WorkerPool` class and 36 | call one of the available ``map`` functions: 37 | 38 | .. code-block:: python 39 | 40 | from mpire import WorkerPool 41 | 42 | with WorkerPool(n_jobs=5) as pool: 43 | results = pool.map(time_consuming_function, range(10)) 44 | 45 | Similarly, this will complete in about 2 seconds. The differences in code are small: there's no need to learn a 46 | completely new multiprocessing syntax, if you're used to vanilla ``multiprocessing``. The additional available 47 | functionality, though, is what sets MPIRE apart. 48 | 49 | Progress bar 50 | ------------ 51 | 52 | Suppose we want to know the status of the current task: how many tasks are completed, how long before the work is ready? 53 | It's as simple as setting the ``progress_bar`` parameter to ``True``: 54 | 55 | .. code-block:: python 56 | 57 | with WorkerPool(n_jobs=5) as pool: 58 | results = pool.map(time_consuming_function, range(10), progress_bar=True) 59 | 60 | And it will output a nicely formatted tqdm_ progress bar. 61 | 62 | MPIRE also offers a dashboard, for which you need to install additional :ref:`dependencies `. See 63 | :ref:`Dashboard` for more information. 64 | 65 | .. _tqdm: https://tqdm.github.io/ 66 | 67 | 68 | Shared objects 69 | -------------- 70 | 71 | If you have one or more objects that you want to share between all workers you can make use of the copy-on-write 72 | ``shared_objects`` option of MPIRE. MPIRE will pass on these objects only once for each worker without 73 | copying/serialization. Only when the object is altered in the worker function it will start copying it for that worker. 74 | 75 | .. note:: 76 | 77 | Copy-on-write is not available on Windows, as it requires the start method ``fork``. 78 | 79 | .. code-block:: python 80 | 81 | def time_consuming_function(some_object, x): 82 | time.sleep(1) # Simulate that this function takes long to complete 83 | return ... 84 | 85 | def main(): 86 | some_object = ... 87 | with WorkerPool(n_jobs=5, shared_objects=some_object, start_method='fork') as pool: 88 | results = pool.map(time_consuming_function, range(10), progress_bar=True) 89 | 90 | See :ref:`shared_objects` for more details. 91 | 92 | Worker initialization 93 | --------------------- 94 | 95 | Need to initialize each worker before starting the work? Have a look at the ``worker_state`` and ``worker_init`` 96 | functionality: 97 | 98 | .. code-block:: python 99 | 100 | def init(worker_state): 101 | # Load a big dataset or model and store it in a worker specific worker_state 102 | worker_state['dataset'] = ... 103 | worker_state['model'] = ... 104 | 105 | def task(worker_state, idx): 106 | # Let the model predict a specific instance of the dataset 107 | return worker_state['model'].predict(worker_state['dataset'][idx]) 108 | 109 | with WorkerPool(n_jobs=5, use_worker_state=True) as pool: 110 | results = pool.map(task, range(10), worker_init=init) 111 | 112 | Similarly, you can use the ``worker_exit`` parameter to let MPIRE call a function whenever a worker terminates. You can 113 | even let this exit function return results, which can be obtained later on. See the :ref:`worker_init_exit` section for 114 | more information. 115 | 116 | 117 | Worker insights 118 | --------------- 119 | 120 | When your multiprocessing setup isn't performing as you want it to and you have no clue what's causing it, there's the 121 | worker insights functionality. This will give you some insight in your setup, but it will not profile the function 122 | you're running (there are other libraries for that). Instead, it profiles the worker start up time, waiting time and 123 | working time. When worker init and exit functions are provided it will time those as well. 124 | 125 | Perhaps you're sending a lot of data over the task queue, which makes the waiting time go up. Whatever the case, you 126 | can enable and grab the insights using the ``enable_insights`` flag and :meth:`mpire.WorkerPool.get_insights` function, 127 | respectively: 128 | 129 | .. code-block:: python 130 | 131 | with WorkerPool(n_jobs=5, enable_insights=True) as pool: 132 | results = pool.map(time_consuming_function, range(10)) 133 | insights = pool.get_insights() 134 | 135 | See :ref:`worker insights` for a more detailed example and expected output. 136 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the MPIRE documentation! 2 | =================================== 3 | 4 | MPIRE, short for MultiProcessing Is Really Easy, is a Python package for multiprocessing. MPIRE is faster in 5 | most scenarios, packs more features, and is generally more user-friendly than the default multiprocessing package. It 6 | combines the convenient map like functions of ``multiprocessing.Pool`` with the benefits of using copy-on-write shared 7 | objects of ``multiprocessing.Process``, together with easy-to-use worker state, worker insights, worker init and exit 8 | functions, timeouts, and progress bar functionality. 9 | 10 | Features 11 | -------- 12 | 13 | - Faster execution than other multiprocessing libraries. See benchmarks_. 14 | - Intuitive, Pythonic syntax 15 | - Multiprocessing with ``map``/``map_unordered``/``imap``/``imap_unordered``/``apply``/``apply_async`` functions 16 | - Easy use of copy-on-write shared objects with a pool of workers (copy-on-write is only available for start method 17 | ``fork``, so it's not supported on Windows) 18 | - Each worker can have its own state and with convenient worker init and exit functionality this state can be easily 19 | manipulated (e.g., to load a memory-intensive model only once for each worker without the need of sending it through a 20 | queue) 21 | - Progress bar support using tqdm_ (``rich`` and notebook widgets are supported) 22 | - Progress dashboard support 23 | - Worker insights to provide insight into your multiprocessing efficiency 24 | - Graceful and user-friendly exception handling 25 | - Timeouts, including for worker init and exit functions 26 | - Automatic task chunking for all available map functions to speed up processing of small task queues (including numpy 27 | arrays) 28 | - Adjustable maximum number of active tasks to avoid memory problems 29 | - Automatic restarting of workers after a specified number of tasks to reduce memory footprint 30 | - Nested pool of workers are allowed when setting the ``daemon`` option 31 | - Child processes can be pinned to specific or a range of CPUs 32 | - Optionally utilizes dill_ as serialization backend through multiprocess_, enabling parallelizing more exotic objects, 33 | lambdas, and functions in iPython and Jupyter notebooks. 34 | 35 | MPIRE has been tested on Linux, macOS, and Windows. There are a few minor known caveats for Windows and macOS users, 36 | which can be found at :ref:`troubleshooting_windows`. 37 | 38 | .. _benchmarks: https://towardsdatascience.com/mpire-for-python-multiprocessing-is-really-easy-d2ae7999a3e9 39 | .. _dill: https://pypi.org/project/dill/ 40 | .. _multiprocess: https://github.com/uqfoundation/multiprocess 41 | .. _tqdm: https://tqdm.github.io/ 42 | 43 | Contents 44 | -------- 45 | 46 | .. toctree:: 47 | :hidden: 48 | 49 | self 50 | 51 | .. toctree:: 52 | :maxdepth: 3 53 | :titlesonly: 54 | 55 | install 56 | getting_started 57 | usage/index 58 | troubleshooting 59 | reference/index 60 | contributing 61 | changelog 62 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | :ref:`MPIRE ` builds are distributed through PyPi_. 5 | 6 | .. _PyPi: https://pypi.org/ 7 | 8 | MPIRE can be installed through pip: 9 | 10 | .. code-block:: bash 11 | 12 | pip install mpire 13 | 14 | and is available through conda-forge: 15 | 16 | .. code-block:: bash 17 | 18 | conda install -c conda-forge mpire 19 | 20 | 21 | Dependencies 22 | ------------ 23 | 24 | - Python >= 3.8 25 | 26 | Python packages (installed automatically when installing MPIRE): 27 | 28 | - tqdm 29 | - pygments 30 | - pywin32 (Windows only) 31 | - importlib_resources (Python < 3.9 only) 32 | 33 | .. note:: 34 | 35 | When using MPIRE on Windows with conda, you might need to install ``pywin32`` using ``conda install pywin32`` when 36 | encountering a ``DLL failed to load`` error. 37 | 38 | .. _dilldep: 39 | 40 | Dill 41 | ~~~~ 42 | 43 | For some functions or tasks it can be useful to not rely on pickle, but on some more powerful serialization backend, 44 | like dill_. ``dill`` isn't installed by default as it has a BSD license, while MPIRE has an MIT license. If you want 45 | to use it, the license of MPIRE will change to a BSD license as well, as required by the original BSD license. See the 46 | `BSD license of multiprocess`_ for more information. 47 | 48 | You can enable ``dill`` by executing: 49 | 50 | .. code-block:: bash 51 | 52 | pip install mpire[dill] 53 | 54 | This will install multiprocess_, which uses ``dill`` under the hood. You can enable the use of ``dill`` by setting 55 | ``use_dill=True`` in the :obj:`mpire.WorkerPool` constructor. 56 | 57 | .. _dill: https://pypi.org/project/dill/ 58 | .. _multiprocess: https://github.com/uqfoundation/multiprocess 59 | .. _BSD license of multiprocess: https://github.com/uqfoundation/multiprocess/blob/master/LICENSE 60 | 61 | 62 | .. _richdep: 63 | 64 | Rich progress bars 65 | ~~~~~~~~~~~~~~~~~~ 66 | 67 | If you want to use rich_ progress bars, you have to install the dependencies for it manually: 68 | 69 | .. code-block:: bash 70 | 71 | pip install rich 72 | 73 | 74 | .. _rich: https://github.com/Textualize/rich 75 | 76 | 77 | .. _dashboarddep: 78 | 79 | Dashboard 80 | ~~~~~~~~~ 81 | 82 | Optionally, you can install the dependencies for the MPIRE dashboard, which depends on Flask_. Similarly as with 83 | ``dill``, ``Flask`` has a BSD-license. Installing these dependencies will change the license of MPIRE to BSD as well. 84 | See the `BSD license of Flask`_ for more information. 85 | 86 | The dashboard allows you to see progress information from a browser. This is convenient when running scripts in a 87 | notebook or screen, or want to share the progress information with others. Install the appropriate dependencies to 88 | enable this: 89 | 90 | .. code-block:: bash 91 | 92 | pip install mpire[dashboard] 93 | 94 | .. _Flask: https://flask.palletsprojects.com/en/1.1.x/ 95 | .. _BSD license of Flask: https://github.com/pallets/flask/blob/main/LICENSE.rst 96 | -------------------------------------------------------------------------------- /docs/mpire.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _secret: 4 | 5 | "The Empire" 6 | ============ 7 | 8 | .. code-block:: none 9 | 10 | ,ooo888888888888888oooo, 11 | o8888YYYYYY77iiiiooo8888888o 12 | 8888YYYY77iiYY8888888888888888 13 | [88YYY77iiY88888888888888888888] 14 | 88YY7iYY888888888888888888888888 15 | [88YYi 88888888888888888888888888] 16 | i88Yo8888888888888888888888888888i 17 | i] ^^^88888888^^^ o [i 18 | oi8 i o8o i 8io 19 | ,77788o ^^ ,oooo8888888ooo, ^ o88777, 20 | 7777788888888888888888888888888888877777 21 | 77777888888888888888888888888888877777 22 | 77777788888888^7777777^8888888777777 23 | ,oooo888 ooo 88888778888^7777ooooo7777^8887788888 ,o88^^^^888oo 24 | o8888777788[];78 88888888888888888888888888888888888887 7;8^ 888888888oo^88 25 | o888888iii788 ]; o 78888887788788888^;;^888878877888887 o7;[]88888888888888o 26 | 88888877 ii78[]8;7o 7888878^ ^8788^;;;;;;^878^ ^878877 o7;8 ]878888888888888 27 | [88888888887888 87;7oo 777888o8888^;ii;;ii;^888o87777 oo7;7[]8778888888888888 28 | 88888888888888[]87;777oooooooooooooo888888oooooooooooo77;78]88877i78888888888 29 | o88888888888888 877;7877788777iiiiiii;;;;;iiiiiiiii77877i;78] 88877i;788888888 30 | 88^;iiii^88888 o87;78888888888888888888888888888888888887;778] 88877ii;7788888 31 | ;;;iiiii7iiii^ 87;;888888888888888888888888888888888888887;778] 888777ii;78888 32 | ;iiiii7iiiii7iiii77;i88888888888888888888i7888888888888888877;77i 888877777ii78 33 | iiiiiiiiiii7iiii7iii;;;i7778888888888888ii7788888888888777i;;;;iiii 88888888888 34 | i;iiiiiiiiiiii7iiiiiiiiiiiiiiiiiiiiiiiiii8877iiiiiiiiiiiiiiiiiii877 88888 35 | ii;;iiiiiiiiiiiiii;;;ii^^^;;;ii77777788888888888887777iii;; 77777 78 36 | 77iii;;iiiiiiiiii;;;ii;;;;;;;;;^^^^8888888888888888888777ii;; ii7 ;i78 37 | ^ii;8iiiiiiii ';;;;ii;;;;;;;;;;;;;;;;;;^^oo ooooo^^^88888888;;i7 7;788 38 | o ^;;^^88888^ 'i;;;;;;;;;;;;;;;;;;;;;;;;;;;^^^88oo^^^^888ii7 7;i788 39 | 88ooooooooo ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 788oo^;; 7;i888 40 | 887ii8788888 ;;;;;;;ii;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;^87 7;788 41 | 887i8788888^ ;;;;;;;ii;;;;;;;oo;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;,,, ;;888 42 | 87787888888 ;;;;;;;ii;;;;;;;888888oo;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;,,;i788 43 | 87i8788888^ ';;;ii;;;;;;;8888878777ii8ooo;;;;;;;;;;;;;;;;;;;;;;;;;;i788 7 44 | 77i8788888 ioo;;;;;;oo^^ooooo ^7i88^ooooo;;;;;;;;;;;;;;;;;;;;i7888 78 45 | 7i87788888o 7;ii788887i7;7;788888ooooo7888888ooo;;;;;;;;;;;;;;oo ^^^ 78 46 | i; 7888888^ 8888^o;ii778877;7;7888887;;7;7788878;878;; ;;;;;;;i78888o ^ 47 | i8 788888 [88888^^ ooo ^^^^^;;77888^^^^;;7787^^^^ ^^;;;; iiii;i78888888 48 | ^8 7888^ [87888 87 ^877i;i8ooooooo8778oooooo888877ii; iiiiiiii788888888 49 | ^^^ [7i888 87;; ^8i;;i7888888888888888887888888 i7iiiiiii88888^^ 50 | 87;88 o87;;;;o 87i;;;78888788888888888888^^ o 8ii7iiiiii;; 51 | 87;i8 877;77888o ^877;;;i7888888888888^^ 7888 78iii7iii7iiii 52 | ^87; 877;778888887o 877;;88888888888^ 7ii7888 788oiiiiiiiii 53 | ^ 877;7 7888888887 877i;;8888887ii 87i78888 7888888888 54 | [87;;7 78888888887 87i;;888887i 87ii78888 7888888888] 55 | 877;7 7788888888887 887i;887i^ 87ii788888 78888888888 56 | 87;i8 788888888888887 887ii;;^ 87ii7888888 78888888888 57 | [87;i8 7888888888888887 ^^^^ 87ii77888888 78888888888 58 | 87;;78 7888888888888887ii 87i78888888 778888888888 59 | 87;788 7888888888888887i] 87i78888888 788888888888 60 | [87;88 778888888888888887 7ii78888888 788888888888 61 | 87;;88 78888888888888887] ii778888888 78888888888] 62 | 7;;788 7888888888888888] i7888888888 78888888888' 63 | 7;;788 7888888888888888 'i788888888 78888888888 64 | 7;i788 788888888888888] 788888888 77888888888] 65 | '7;788 778888888888888] [788888888 78888888888' 66 | ';77888 78888888888888 8888888888 7888888888] 67 | 778888 78888888888888 8888888888 7888888888] 68 | 78888 7888888888888] [8888888888 7888888888 69 | 7888 788888888888] 88888888888 788888888] 70 | 778 78888888888] ]888888888 778888888] 71 | oooooo ^88888^ ^88888^^^^^^^^8888] 72 | 87;78888ooooooo8o ,oooooo oo888oooooo 73 | [877;i77888888888] [;78887i8888878i7888; 74 | ^877;;ii7888ii788 ;i777;7788887787;778; 75 | ^87777;;;iiii777 ;77^^^^^^^^^^^^^^^^;; 76 | ^^^^^^^^^ii7] ^ o88888888877iiioo 77 | 77777o [88777777iiiiii;;778 78 | 77777iii 8877iiiii;;;77888888] 79 | 77iiii;8 [77ii;778 788888888888 80 | 7iii;;88 iii;78888 778888888888 81 | 77i;78888] ;;;;i88888 78888888888 82 | ,7;78888888 [;;i788888 7888888888] 83 | i;788888888 ;i7888888 7888888888 84 | ;788888888] i77888888 788888888] 85 | ';88888888' [77888888 788888888] 86 | [[8ooo88] 78888888 788888888 87 | [88888] 78888888 788888888 88 | ^^^ [7888888 77888888] 89 | 88888888 7888887 90 | 77888888 7888887 91 | ;i88888 788888i 92 | ,;;78888 788877i7 93 | ,7;;i;777777i7i;;7 94 | 87778^^^ ^^^^87778 95 | ^^^^ o777777o ^^^ 96 | o77777iiiiii7777o 97 | 7777iiii88888iii777 98 | ;;;i7778888888877ii;; 99 | [i77888888^^^^8888877i] 100 | 77888^oooo8888oooo^8887] 101 | [788888888888888888888888] 102 | 88888888888888888888888888 103 | ]8888888^iiiiiiiii^888888] 104 | iiiiiiiiiiiiiiiiiiiiii 105 | ^^^^^^^^^^^^^ -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | WorkerPool 8 | ---------- 9 | .. autoclass:: mpire.WorkerPool 10 | :members: 11 | :special-members: 12 | 13 | 14 | AsyncResult 15 | ----------- 16 | 17 | .. autoclass:: mpire.async_result.AsyncResult 18 | :members: 19 | :special-members: 20 | 21 | 22 | Task chunking 23 | ------------- 24 | .. autofunction:: mpire.utils.chunk_tasks 25 | 26 | 27 | Converting iterable of arguments 28 | -------------------------------- 29 | .. autofunction:: mpire.utils.make_single_arguments 30 | 31 | 32 | Dashboard 33 | --------- 34 | .. autofunction:: mpire.dashboard.start_dashboard 35 | 36 | .. autofunction:: mpire.dashboard.connect_to_dashboard 37 | 38 | .. autofunction:: mpire.dashboard.shutdown_dashboard 39 | 40 | .. autofunction:: mpire.dashboard.get_stacklevel 41 | 42 | .. autofunction:: mpire.dashboard.set_stacklevel 43 | 44 | 45 | Other 46 | ----- 47 | 48 | .. autofunction:: mpire.cpu_count 49 | -------------------------------------------------------------------------------- /docs/troubleshooting.rst: -------------------------------------------------------------------------------- 1 | Troubleshooting 2 | =============== 3 | 4 | This section describes some known problems that can arise when using MPIRE. 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | 11 | .. _troubleshooting_progress_bar: 12 | 13 | Progress bar issues with Jupyter notebooks 14 | ------------------------------------------ 15 | 16 | When using the progress bar in a Jupyter notebook you might encounter some issues. A few of these are described below, 17 | together with possible solutions. 18 | 19 | IProgress not found 20 | ~~~~~~~~~~~~~~~~~~~ 21 | 22 | When you something like ``ImportError: IProgress not found. Please update jupyter and ipywidgets.``, this means 23 | ``ipywidgets`` is not installed. You can install it using ``pip``: 24 | 25 | .. code-block:: bash 26 | 27 | pip install ipywidgets 28 | 29 | or conda: 30 | 31 | .. code-block:: bash 32 | 33 | conda install -c conda-forge ipywidgets 34 | 35 | Have a look at the `ipywidgets documentation`_ for more information. 36 | 37 | .. _ipywidgets documentation: https://ipywidgets.readthedocs.io/en/stable/user_install.html 38 | 39 | Widget Javascript not detected 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | When you see something like ``Widget Javascript not detected. It may not be enabled properly.``, this means the 43 | Javascript extension is not enabled. You can enable it using the following command before starting your notebook: 44 | 45 | .. code-block:: bash 46 | 47 | jupyter nbextension enable --py --sys-prefix widgetsnbextension 48 | 49 | Note that you have to restart your notebook server after enabling the extension, simply restarting the kernel won't be 50 | enough. 51 | 52 | Unit tests 53 | ---------- 54 | 55 | When using the ``'spawn'`` or ``'forkserver'`` method you'll probably run into one or two issues when running 56 | unittests in your own package. One problem that might occur is that your unittests will restart whenever the piece of 57 | code containing such a start method is called, leading to very funky terminal output. To remedy this problem make sure 58 | your ``setup`` call in ``setup.py`` is surrounded by an ``if __name__ == '__main__':`` clause: 59 | 60 | .. code-block:: python 61 | 62 | from setuptools import setup 63 | 64 | if __name__ == '__main__': 65 | 66 | # Call setup and install any dependencies you have inside the if-clause 67 | setup(...) 68 | 69 | See the 'Safe importing of main module' section at caveats_. 70 | 71 | The second problem you might encounter is that the semaphore tracker of multiprocessing will complain when you run 72 | individual (or a selection of) unittests using ``python setup.py test -s tests.some_test``. At the end of the tests you 73 | will see errors like: 74 | 75 | .. code-block:: python 76 | 77 | Traceback (most recent call last): 78 | File ".../site-packages/multiprocess/semaphore_tracker.py", line 132, in main 79 | cache.remove(name) 80 | KeyError: b'/mp-d3i13qd5' 81 | .../site-packages/multiprocess/semaphore_tracker.py:146: UserWarning: semaphore_tracker: There appear to be 58 82 | leaked semaphores to clean up at shutdown 83 | len(cache)) 84 | .../site-packages/multiprocess/semaphore_tracker.py:158: UserWarning: semaphore_tracker: '/mp-f45dt4d6': [Errno 2] 85 | No such file or directory 86 | warnings.warn('semaphore_tracker: %r: %s' % (name, e)) 87 | ... 88 | 89 | Your unittests will still succeed and run OK. Unfortunately, I've not found a remedy to this problem using 90 | ``python setup.py test`` yet. What you can use instead is something like the following: 91 | 92 | .. code-block:: python 93 | 94 | python -m unittest tests.some_test 95 | 96 | This will work just fine. See the unittest_ documentation for more information. 97 | 98 | .. _caveats: https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods 99 | .. _unittest: https://docs.python.org/3.4/library/unittest.html#command-line-interface 100 | 101 | 102 | Shutting down takes a long time on error 103 | ---------------------------------------- 104 | 105 | When you issue a ``KeyboardInterrupt`` or when an error occured in the function that's run in parallel, there are 106 | situations where MPIRE needs a few seconds to gracefully shutdown. This has to do with the fact that in these situations 107 | the task or results queue can be quite full, still. MPIRE drains these queues until they're completely empty, as to 108 | properly shutdown and clean up every communication channel. 109 | 110 | To remedy this issue you can use the ``max_tasks_active`` parameter and set it to ``n_jobs * 2``, or similar. Aside 111 | from the added benefit that the workers can start more quickly, the queues won't get that full anymore and shutting down 112 | will be much quicker. See :ref:`max_active_tasks` for more information. 113 | 114 | When you're using a lazy map function also be sure to iterate through the results, otherwise that queue will be full and 115 | draining it will take a longer time. 116 | 117 | .. _unpickable_tasks: 118 | 119 | Unpicklable tasks/results 120 | ------------------------- 121 | 122 | Sometimes you can encounter deadlocks in your code when using MPIRE. When you encounter this, chances are some tasks or 123 | results from your script can't be pickled. MPIRE makes use of multiprocessing queues for inter-process communication and 124 | if your function returns unpicklable results the queue will unfortunately deadlock. 125 | 126 | The only way to remedy this problem in MPIRE would be to manually pickle objects before sending it to a queue and quit 127 | gracefully when encountering a pickle error. However, this would mean objects would always be pickled twice. This would 128 | add a heavy performance penalty and is therefore not an acceptable solution. 129 | 130 | Instead, the user should make sure their tasks and results are always picklable (which in most cases won't be a 131 | problem), or resort to setting ``use_dill=True``. The latter is capable of pickling a lot more exotic types. See 132 | :ref:`use_dill` for more information. 133 | 134 | 135 | AttributeError: Can't get attribute '' on 136 | --------------------------------------------------------------------------------------- 137 | 138 | This error can occur when inside an iPython or Jupyter notebook session and the function to parallelize is defined in 139 | that session. This is often the result of using ``spawn`` as start method (the default on Windows), which starts a new 140 | process without copying the function in question. 141 | 142 | This error is actually related to the :ref:`unpickable_tasks` problem and can be solved in a similar way. I.e., you can 143 | define your function in a file that can be imported by the child process, or you can resort to using ``dill`` by setting 144 | ``use_dill=True``. See :ref:`use_dill` for more information. 145 | 146 | 147 | .. _troubleshooting_windows: 148 | 149 | Windows 150 | ------- 151 | 152 | * When using ``dill`` and an exception occurs, or when the exception occurs in an exit function, it can print additional 153 | ``OSError`` messages in the terminal, but they can be safely ignored. 154 | * The ``mpire-dashboard`` script does not work on Windows. 155 | 156 | 157 | .. _troubleshooting_macos: 158 | 159 | macOS 160 | ----- 161 | 162 | * When encountering ``OSError: [Errno 24] Too many open files`` errors, use ``ulimit -n `` to increase the 163 | limit of the number of open files. This is required because MPIRE uses file-descriptor based synchronization 164 | primitives and macOS has a very low default limit. For example, MPIRE uses about 190 file descriptors when using 10 165 | workers. 166 | * Pinning of processes to CPU cores is not supported on macOS. This is because macOS does not support the 167 | ``sched_setaffinity`` system call. A warning will be printed when trying to use this feature. 168 | -------------------------------------------------------------------------------- /docs/usage/apply.rst: -------------------------------------------------------------------------------- 1 | .. _apply-family: 2 | 3 | Apply family 4 | ============ 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | :obj:`mpire.WorkerPool` implements two ``apply`` functions, which are very similar to the ones in the 11 | :mod:`multiprocessing` module: 12 | 13 | :meth:`mpire.WorkerPool.apply` 14 | Apply a function to a single task. This is a blocking call. 15 | :meth:`mpire.WorkerPool.apply_async` 16 | A variant of the above, but which is non-blocking. This returns an :obj:`mpire.async_result.AsyncResult` object. 17 | 18 | ``apply`` 19 | --------- 20 | 21 | The ``apply`` function is a blocking call, which means that it will not return until the task is completed. If you want 22 | to run multiple different tasks in parallel, you should use the ``apply_async`` function instead. If you require 23 | to run the same function for many tasks in parallel, use the ``map`` functions instead. 24 | 25 | The ``apply`` function takes a function, positional arguments, and keyword arguments, similar to how 26 | :mod:`multiprocessing` does it. 27 | 28 | .. code-block:: python 29 | 30 | def task(a, b, c, d): 31 | return a + b + c + d 32 | 33 | with WorkerPool(n_jobs=1) as pool: 34 | result = pool.apply(task, args=(1, 2), kwargs={'d': 4, 'c': 3}) 35 | print(result) 36 | 37 | 38 | ``apply_async`` 39 | --------------- 40 | 41 | The ``apply_async`` function is a non-blocking call, which means that it will return immediately. It returns an 42 | :obj:`mpire.async_result.AsyncResult` object, which can be used to get the result of the task at a later moment in time. 43 | 44 | The ``apply_async`` function takes the same parameters as the ``apply`` function. 45 | 46 | .. code-block:: python 47 | 48 | def task(a, b): 49 | return a + b 50 | 51 | with WorkerPool(n_jobs=4) as pool: 52 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)] 53 | results = [async_result.get() for async_result in async_results] 54 | 55 | Obtaining the results should happen while the pool is still running! E.g., the following will deadlock: 56 | 57 | .. code-block:: 58 | 59 | with WorkerPool(n_jobs=4) as pool: 60 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)] 61 | 62 | # Will wait forever 63 | results = [async_result.get() for async_result in async_results] 64 | 65 | You can, however, make use of the :meth:`mpire.WorkerPool.stop_and_join()` function to stop the workers and join the 66 | pool. This will make sure that all tasks are completed before the pool exits. 67 | 68 | .. code-block:: 69 | 70 | with WorkerPool(n_jobs=4) as pool: 71 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)] 72 | pool.stop_and_join() 73 | 74 | # Will not deadlock 75 | results = [async_result.get() for async_result in async_results] 76 | 77 | AsyncResult 78 | ----------- 79 | 80 | The :obj:`mpire.async_result.AsyncResult` object has the following convenient methods: 81 | 82 | .. code-block:: python 83 | 84 | with WorkerPool(n_jobs=1) as pool: 85 | async_result = pool.apply_async(task, args=(1, 1)) 86 | 87 | # Check if the task is completed 88 | is_completed = async_result.ready() 89 | 90 | # Wait until the task is completed, or until the timeout is reached. 91 | async_result.wait(timeout=10) 92 | 93 | # Get the result of the task. This will block until the task is completed, 94 | # or until the timeout is reached. 95 | result = async_result.get(timeout=None) 96 | 97 | # Check if the task was successful (i.e., did not raise an exception). 98 | # This will raise an exception if the task is not completed yet. 99 | is_successful = async_result.successful() 100 | 101 | Callbacks 102 | --------- 103 | 104 | Each ``apply`` function has a ``callback`` and ``error_callback`` argument. These are functions which are called when 105 | the task is finished. The ``callback`` function is called with the result of the task when the task was completed 106 | successfully, and the ``error_callback`` is called with the exception when the task failed. 107 | 108 | .. code-block:: python 109 | 110 | def task(a): 111 | return a + 1 112 | 113 | def callback(result): 114 | print("Task completed successfully with result:", result) 115 | 116 | def error_callback(exception): 117 | print("Task failed with exception:", exception) 118 | 119 | with WorkerPool(n_jobs=1) as pool: 120 | pool.apply(task, 42, callback=callback, error_callback=error_callback) 121 | 122 | 123 | Worker init and exit 124 | -------------------- 125 | 126 | As with the ``map`` family of functions, the ``apply`` family of functions also has ``worker_init`` and ``worker_exit`` 127 | arguments. These are functions which are called when a worker is started and stopped, respectively. See 128 | :ref:`worker_init_exit` for more information on these functions. 129 | 130 | .. code-block:: python 131 | 132 | def worker_init(): 133 | print("Worker started") 134 | 135 | def worker_exit(): 136 | print("Worker stopped") 137 | 138 | with WorkerPool(n_jobs=5) as pool: 139 | pool.apply(task, 42, worker_init=worker_init, worker_exit=worker_exit) 140 | 141 | There's a caveat though. When the first ``apply`` or ``apply_async`` function is executed, the entire pool of workers 142 | is started. This means that in the above example all five workers are started, while only one was needed. This also 143 | means that the ``worker_init`` function is set for all those workers at once. This means you cannot have a different 144 | ``worker_init`` function for each apply task. A second, different ``worker_init`` function will simply be ignored. 145 | 146 | Similarly, the ``worker_exit`` function can only be set once as well. Additionally, exit functions are only called when 147 | a worker exits, which in this case translates to when the pool exits. This means that if you call ``apply`` or 148 | ``apply_async`` multiple times, the ``worker_exit`` function is only called once at the end. Use 149 | :meth:`mpire.WorkerPool.stop_and_join()` to stop the workers, which will cause the ``worker_exit`` function to be 150 | triggered for each worker. 151 | 152 | 153 | Timeouts 154 | -------- 155 | 156 | The ``apply`` family of functions also has ``task_timeout``, ``worker_init_timeout`` and ``worker_exit_timeout`` 157 | arguments. These are timeouts for the task, the ``worker_init`` function and the ``worker_exit`` function, respectively. 158 | They work similarly as those for the ``map`` functions. 159 | 160 | When a single task times out, only that task is cancelled. The other tasks will continue to run. When a worker init or 161 | exit times out, the entire pool is stopped. 162 | 163 | See :ref:`timeouts` for more information. 164 | -------------------------------------------------------------------------------- /docs/usage/dashboard.rst: -------------------------------------------------------------------------------- 1 | .. _Dashboard: 2 | 3 | Dashboard 4 | ========= 5 | 6 | The dashboard allows you to see progress information from a browser. This is convenient when running scripts in a 7 | notebook or screen, if you want to share the progress information with others, or if you want to get real-time worker 8 | insight information. 9 | 10 | The dashboard dependencies are not installed by default. See :ref:`dashboarddep` for more information. 11 | 12 | .. contents:: Contents 13 | :depth: 2 14 | :local: 15 | 16 | 17 | Starting the dashboard 18 | ---------------------- 19 | 20 | You can start the dashboard programmatically: 21 | 22 | .. code-block:: python 23 | 24 | from mpire.dashboard import start_dashboard 25 | 26 | # Will return a dictionary with dashboard details 27 | dashboard_details = start_dashboard() 28 | print(dashboard_details) 29 | 30 | which will print: 31 | 32 | .. code-block:: python 33 | 34 | {'dashboard_port_nr': 8080, 35 | 'manager_host': 'localhost', 36 | 'manager_port_nr': 8081} 37 | 38 | This will start a dashboard on your local machine on port 8080. When the port is already in use MPIRE will try the next, 39 | until it finds an unused one. In the rare case that no ports are available up to port 8099 the function will raise an 40 | ``OSError``. By default, MPIRE tries ports 8080-8100. You can override this range by passing on a custom range object: 41 | 42 | 43 | .. code-block:: python 44 | 45 | dashboard_details = start_dashboard(range(9000, 9100)) 46 | 47 | The returned dictionary contains the port number that is ultimately chosen. It also contains information on how to 48 | connect to this dashboard remotely. 49 | 50 | Another way of starting a dashboard is by using the bash script (this doesn't work on Windows!): 51 | 52 | .. code-block:: bash 53 | 54 | $ mpire-dashboard 55 | 56 | This will start a dashboard with the connection details printed on screen. It will say something like: 57 | 58 | .. code-block:: bash 59 | 60 | Starting MPIRE dashboard... 61 | 62 | MPIRE dashboard started on http://localhost:8080 63 | Server is listening on localhost:8098 64 | -------------------------------------------------- 65 | 66 | The server part corresponds to the ``manager_host`` and ``manager_port_nr`` from the dictionary returned by 67 | :meth:`mpire.dashboard.start_dashboard`. Similarly to earlier, a custom port range can be provided: 68 | 69 | .. code-block:: bash 70 | 71 | $ mpire-dashboard --port-range 9000-9100 72 | 73 | The benefit of starting a dashboard this way is that your dashboard keeps running in case of errors in your script. You 74 | will be able to see what the error was, when it occurred and where it occurred in your code. 75 | 76 | 77 | Connecting to an existing dashboard 78 | ----------------------------------- 79 | 80 | If you have started a dashboard elsewhere, you can connect to it using: 81 | 82 | .. code-block:: python 83 | 84 | from mpire.dashboard import connect_to_dashboard 85 | 86 | connect_to_dashboard(manager_port_nr=8081, manager_host='localhost') 87 | 88 | Make sure you use the ``manager_port_nr``, not the ``dashboard_port_nr`` in the examples above. 89 | 90 | You can connect to an existing dashboard on the same, but also on a remote machine (if the ports are open). If 91 | ``manager_host`` is omitted it will fall back to using ``'localhost'``. 92 | 93 | 94 | Using the dashboard 95 | ------------------- 96 | 97 | Once connected to a dashboard you don't need to change anything to your code. When you have enabled the use of 98 | a progress bar in your ``map`` call the progress bar will automatically register itself to the dashboard server and show 99 | up, like here: 100 | 101 | .. code-block:: python 102 | 103 | from mpire import WorkerPool 104 | from mpire.dashboard import connect_to_dashboard 105 | 106 | connect_to_dashboard(8099) 107 | 108 | def square(x): 109 | import time 110 | time.sleep(0.01) # To be able to show progress 111 | return x * x 112 | 113 | with WorkerPool(4) as pool: 114 | pool.map(square, range(10000), progress_bar=True) 115 | 116 | This will show something like: 117 | 118 | .. thumbnail:: mpire_dashboard.png 119 | :title: MPIRE dashboard 120 | 121 | You can click on a progress bar row to view details about the function that is called (which has already been done in 122 | the screenshot above). 123 | 124 | It will let you know when a ``KeyboardInterrupt`` signal was send to the running process: 125 | 126 | .. thumbnail:: mpire_dashboard_keyboard_interrupt.png 127 | :title: MPIRE dashboard - KeyboardInterrupt has been raised 128 | 129 | or show the traceback information in case of an exception: 130 | 131 | .. thumbnail:: mpire_dashboard_error.png 132 | :title: MPIRE dashboard - Error traceback 133 | 134 | In case you have enabled :ref:`worker insights` these insights will be shown real-time in the dashboard: 135 | 136 | .. thumbnail:: mpire_dashboard_insights.png 137 | :title: MPIRE dashboard - Worker insights 138 | 139 | Click on the ``Insights (click to expand/collapse)`` to either expand or collapse the insight details. 140 | 141 | The dashboard will refresh automatically every 0.5 seconds. 142 | 143 | 144 | Stack level 145 | ----------- 146 | 147 | By default, the dashboard will show information about the function that is called and where it is called from. However, 148 | in some cases where you have wrapped the function in another function, you might be less interested in the wrapper 149 | function and more interested in the function that is calling this wrapper. In such cases you can use 150 | :meth:`mpire.dashboard.set_stacklevel` to set the stack level. This is the number of levels in the stack to go back in 151 | order to find the frame that contains the function that is invoking MPIRE. For example: 152 | 153 | .. code-block:: python 154 | 155 | from mpire import WorkerPool 156 | from mpire.dashboard import set_stacklevel, start_dashboard 157 | 158 | class WorkerPoolWrapper: 159 | def __init__(self, n_jobs, progress_bar=True): 160 | self.n_jobs = n_jobs 161 | self.progress_bar = progress_bar 162 | 163 | def __call__(self, func, data): 164 | with WorkerPool(self.n_jobs) as pool: 165 | return pool.map(func, data, progress_bar=self.progress_bar) 166 | 167 | def square(x): 168 | return x * x 169 | 170 | if __name__ == '__main__': 171 | start_dashboard() 172 | executor = WorkerPoolWrapper(4, progress_bar=True) 173 | set_stacklevel(1) # default 174 | results = executor(square, range(10000)) 175 | set_stacklevel(2) 176 | results = executor(square, range(10000)) 177 | 178 | When you run this code you will see that the dashboard will show two progress bars. In both cases, the dashboard will 179 | show the ``square`` function as the function that is called. However, in the first case, it will show 180 | ``return pool.map(func, data, progress_bar=self.progress_bar)`` as the line where it is called from. In the second case, 181 | it will show the ``results = executor(square, range(10000))`` line. 182 | -------------------------------------------------------------------------------- /docs/usage/index.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | workerpool/index 8 | map/index 9 | apply 10 | dashboard 11 | -------------------------------------------------------------------------------- /docs/usage/map/index.rst: -------------------------------------------------------------------------------- 1 | Map family 2 | ========== 3 | 4 | This section describes the different ways of interacting with a :obj:`mpire.WorkerPool` instance. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | map 10 | progress_bar 11 | worker_init_exit 12 | task_chunking 13 | max_tasks_active 14 | worker_lifespan 15 | timeouts 16 | numpy 17 | -------------------------------------------------------------------------------- /docs/usage/map/map.rst: -------------------------------------------------------------------------------- 1 | map family of functions 2 | ======================= 3 | 4 | .. contents:: Contents 5 | :depth: 2 6 | :local: 7 | 8 | :obj:`mpire.WorkerPool` implements four types of parallel ``map`` functions, being: 9 | 10 | :meth:`mpire.WorkerPool.map` 11 | Blocks until results are ready, results are ordered in the same way as the provided arguments. 12 | :meth:`mpire.WorkerPool.map_unordered` 13 | The same as :meth:`mpire.WorkerPool.map`, but results are ordered by task completion time. Usually faster than 14 | :meth:`mpire.WorkerPool.map`. 15 | :meth:`mpire.WorkerPool.imap` 16 | Lazy version of :meth:`mpire.WorkerPool.map`, returns a generator. The generator will give results back whenever new 17 | results are ready. Results are ordered in the same way as the provided arguments. 18 | :meth:`mpire.WorkerPool.imap_unordered` 19 | The same as :meth:`mpire.WorkerPool.imap`, but results are ordered by task completion time. Usually faster than 20 | :meth:`mpire.WorkerPool.imap`. 21 | 22 | When using a single worker the unordered versions are equivalent to their ordered counterparts. 23 | 24 | Iterable of arguments 25 | --------------------- 26 | 27 | Each ``map`` function should receive a function and an iterable of arguments, where the elements of the iterable can 28 | be single values or iterables that are unpacked as arguments. If an element is a dictionary, the ``(key, value)`` pairs 29 | will be unpacked with the ``**``-operator. 30 | 31 | .. code-block:: python 32 | 33 | def square(x): 34 | return x * x 35 | 36 | with WorkerPool(n_jobs=4) as pool: 37 | # 1. Square the numbers, results should be: [0, 1, 4, 9, 16, 25, ...] 38 | results = pool.map(square, range(100)) 39 | 40 | The first example should work as expected, the numbers are simply squared. MPIRE knows how many tasks there are because 41 | a ``range`` object implements the ``__len__`` method (see :ref:`Task chunking`). 42 | 43 | .. code-block:: python 44 | 45 | with WorkerPool(n_jobs=4) as pool: 46 | # 2. Square the numbers, results should be: [0, 1, 4, 9, 16, 25, ...] 47 | # Note: don't execute this, it will take a long time ... 48 | results = pool.map(square, range(int(1e30)), iterable_len=int(1e30), chunk_size=1) 49 | 50 | In the second example the ``1e30`` number is too large for Python: try calling ``len(range(int(1e30)))``, this will 51 | throw an ``OverflowError`` (don't get me started ...). Therefore, we must use the ``iterable_len`` parameter to let 52 | MPIRE know how large the tasks list is. We also have to specify a chunk size here as the chunk size should be lower than 53 | ``sys.maxsize``. 54 | 55 | .. code-block:: python 56 | 57 | def multiply(x, y): 58 | return x * y 59 | 60 | with WorkerPool(n_jobs=4) as pool: 61 | # 3. Multiply the numbers, results should be [0, 101, 204, 309, 416, ...] 62 | for result in pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100): 63 | ... 64 | 65 | The third example shows an example of using multiple function arguments. Note that we use ``imap`` in this example, 66 | which allows us to process the results whenever they come available, not having to wait for all results to be ready. 67 | 68 | .. code-block:: python 69 | 70 | with WorkerPool(n_jobs=4) as pool: 71 | # 4. Multiply the numbers, results should be [0, 101, ...] 72 | for result in pool.imap(multiply, [{'x': 0, 'y': 100}, {'y': 101, 'x': 1}, ...]): 73 | ... 74 | 75 | The final example shows the use of an iterable of dictionaries. The (key, value) pairs are unpacked with the 76 | ``**``-operator, as you would expect. So it doesn't matter in what order the keys are stored. This should work for 77 | ``collection.OrderedDict`` objects as well. 78 | 79 | Circumvent argument unpacking 80 | ----------------------------- 81 | 82 | If you want to avoid unpacking and pass the tuples in example 3 or the dictionaries in example 4 as a whole, you can. 83 | We'll continue on example 4, but the workaround for example 3 is similar. 84 | 85 | Suppose we have the following function which expects a dictionary: 86 | 87 | .. code-block:: python 88 | 89 | def multiply_dict(d): 90 | return d['x'] * d['y'] 91 | 92 | Then you would have to convert the list of dictionaries to a list of single argument tuples, where each argument is a 93 | dictionary: 94 | 95 | .. code-block:: python 96 | 97 | with WorkerPool(n_jobs=4) as pool: 98 | # Multiply the numbers, results should be [0, 101, ...] 99 | for result in pool.imap(multiply_dict, [({'x': 0, 'y': 100},), 100 | ({'y': 101, 'x': 1},), 101 | ...]): 102 | ... 103 | 104 | There is a utility function available that does this transformation for you: 105 | 106 | .. code-block:: python 107 | 108 | from mpire.utils import make_single_arguments 109 | 110 | with WorkerPool(n_jobs=4) as pool: 111 | # Multiply the numbers, results should be [0, 101, ...] 112 | for result in pool.imap(multiply_dict, make_single_arguments([{'x': 0, 'y': 100}, 113 | {'y': 101, 'x': 1}, ...], 114 | generator=False)): 115 | ... 116 | 117 | :meth:`mpire.utils.make_single_arguments` expects an iterable of arguments and converts them to tuples accordingly. The 118 | second argument of this function specifies if you want the function to return a generator or a materialized list. If we 119 | would like to return a generator we would need to pass on the iterable length as well. 120 | 121 | .. _mixing-multiple-map-calls: 122 | 123 | Mixing ``map`` functions 124 | ------------------------ 125 | 126 | ``map`` functions cannot be used while another ``map`` function is still running. E.g., the following will raise an 127 | exception: 128 | 129 | .. code-block:: python 130 | 131 | with WorkerPool(n_jobs=4) as pool: 132 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100) 133 | next(imap_results) # We actually have to start the imap function 134 | 135 | # Will raise because the imap function is still running 136 | map_results = pool.map(square, range(100)) 137 | 138 | Make sure to first finish the ``imap`` function before starting a new ``map`` function. This holds for all ``map`` 139 | functions. 140 | 141 | Not exhausting a lazy ``imap`` function 142 | --------------------------------------- 143 | 144 | If you don't exhaust a lazy ``imap`` function, but do close the pool, the remaining tasks and results will be lost. 145 | E.g., the following will raise an exception: 146 | 147 | .. code-block:: python 148 | 149 | with WorkerPool(n_jobs=4) as pool: 150 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100) 151 | first_result = next(imap_results) # We actually have to start the imap function 152 | pool.terminate() 153 | 154 | # This will raise 155 | results = list(imap_results) 156 | 157 | Similarly, exiting the ``with`` block terminates the pool as well: 158 | 159 | .. code-block:: python 160 | 161 | with WorkerPool(n_jobs=4) as pool: 162 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100) 163 | first_result = next(imap_results) # We actually have to start the imap function 164 | 165 | # This will raise 166 | results = list(imap_results) 167 | -------------------------------------------------------------------------------- /docs/usage/map/max_tasks_active.rst: -------------------------------------------------------------------------------- 1 | .. _max_active_tasks: 2 | 3 | Maximum number of active tasks 4 | ============================== 5 | 6 | When you have tasks that take up a lot of memory you can do a few things: 7 | 8 | - Limit the number of jobs (i.e., the number of tasks currently being available to the workers, tasks that are in the 9 | queue ready to be processed). 10 | - Limit the number of active tasks 11 | 12 | The first option is the most obvious one to save memory when the processes themselves use up much memory. The second is 13 | convenient when the argument list takes up too much memory. For example, suppose you want to kick off an enormous amount 14 | of jobs (let's say a billion) of which the arguments take up 1 KB per task (e.g., large strings), then that task queue 15 | would take up ~1 TB of memory! 16 | 17 | In such cases, a good rule of thumb would be to have twice the amount of active chunks of tasks than there are jobs. 18 | This means that when all workers complete their task at the same time each would directly be able to continue with 19 | another task. When workers take on their new tasks the generator of tasks is iterated to the point that again there 20 | would be twice the amount of active chunks of tasks. 21 | 22 | In MPIRE, the maximum number of active tasks by default is set to ``n_jobs * chunk_size * 2``, so you don't have to 23 | tweak it for memory optimization. If, for whatever reason, you want to change this behavior, you can do so by setting 24 | the ``max_active_tasks`` parameter: 25 | 26 | .. code-block:: python 27 | 28 | with WorkerPool(n_jobs=4) as pool: 29 | results = pool.map(task, range(int(1e300)), iterable_len=int(1e300), 30 | chunk_size=int(1e5), max_tasks_active=4 * int(1e5)) 31 | 32 | .. note:: 33 | 34 | Setting the ``max_tasks_active`` parameter to a value lower than ``n_jobs * chunk_size`` can result in some workers 35 | not being able to do anything. 36 | -------------------------------------------------------------------------------- /docs/usage/map/numpy.rst: -------------------------------------------------------------------------------- 1 | Numpy arrays 2 | ============ 3 | 4 | .. contents:: Contents 5 | :depth: 2 6 | :local: 7 | 8 | Chunking 9 | -------- 10 | 11 | Numpy arrays are treated a little bit differently when passed on to the ``map`` functions. Usually MPIRE uses 12 | ``itertools.islice`` for chunking, which depends on the ``__iter__`` special function of the container object. But 13 | applying that to numpy arrays: 14 | 15 | .. code-block:: python 16 | 17 | import numpy as np 18 | 19 | # Create random array 20 | arr = np.random.rand(10, 3) 21 | 22 | # Chunk the array using default chunking 23 | arr_iter = iter(arr) 24 | chunk_size = 3 25 | while True: 26 | chunk = list(itertools.islice(arr_iter, chunk_size)) 27 | if chunk: 28 | yield chunk 29 | else: 30 | break 31 | 32 | would yield: 33 | 34 | .. code-block:: python 35 | 36 | [array([0.68438994, 0.9701514 , 0.40083965]), array([0.88428556, 0.2083905 , 0.61490443]), 37 | array([0.89249174, 0.39902235, 0.70762541])] 38 | [array([0.18850964, 0.1022777 , 0.41539432]), array([0.07327858, 0.18608165, 0.75862301]), 39 | array([0.69215651, 0.4211941 , 0.31029439])] 40 | [array([0.82571272, 0.72257819, 0.86079131]), array([0.91285817, 0.49398461, 0.27863929]), 41 | array([0.146981 , 0.84671211, 0.30122806])] 42 | [array([0.11783283, 0.12585031, 0.39864368])] 43 | 44 | In other words, each row of the array is now in its own array and each one of them is given to the target function 45 | individually. Instead, MPIRE will chunk them in to something more reasonable using numpy slicing instead: 46 | 47 | .. code-block:: python 48 | 49 | from mpire.utils import chunk_tasks 50 | 51 | for chunk in chunk_tasks(arr, chunk_size=chunk_size): 52 | print(repr(chunk)) 53 | 54 | Output: 55 | 56 | .. code-block:: python 57 | 58 | array([[0.68438994, 0.9701514 , 0.40083965], 59 | [0.88428556, 0.2083905 , 0.61490443], 60 | [0.89249174, 0.39902235, 0.70762541]]) 61 | array([[0.18850964, 0.1022777 , 0.41539432], 62 | [0.07327858, 0.18608165, 0.75862301], 63 | [0.69215651, 0.4211941 , 0.31029439]]) 64 | array([[0.82571272, 0.72257819, 0.86079131], 65 | [0.91285817, 0.49398461, 0.27863929], 66 | [0.146981 , 0.84671211, 0.30122806]]) 67 | array([[0.11783283, 0.12585031, 0.39864368]]) 68 | 69 | Each chunk is now a single numpy array containing as many rows as the chunk size, except for the last chunk as there 70 | aren't enough rows left. 71 | 72 | Return value 73 | ------------ 74 | 75 | When the user provided function returns numpy arrays and you're applying the :meth:`mpire.WorkerPool.map` function MPIRE 76 | will concatenate the resulting numpy arrays to a single array by default. For example: 77 | 78 | .. code-block:: python 79 | 80 | def add_five(x): 81 | return x + 5 82 | 83 | with WorkerPool(n_jobs=4) as pool: 84 | results = pool.map(add_five, arr, chunk_size=chunk_size) 85 | 86 | will return: 87 | 88 | .. code-block:: python 89 | 90 | array([[5.68438994, 5.9701514 , 5.40083965], 91 | [5.88428556, 5.2083905 , 5.61490443], 92 | [5.89249174, 5.39902235, 5.70762541], 93 | [5.18850964, 5.1022777 , 5.41539432], 94 | [5.07327858, 5.18608165, 5.75862301], 95 | [5.69215651, 5.4211941 , 5.31029439], 96 | [5.82571272, 5.72257819, 5.86079131], 97 | [5.91285817, 5.49398461, 5.27863929], 98 | [5.146981 , 5.84671211, 5.30122806], 99 | [5.11783283, 5.12585031, 5.39864368]]) 100 | 101 | This behavior can be cancelled by using the ``concatenate_numpy_output`` flag: 102 | 103 | .. code-block:: python 104 | 105 | with WorkerPool(n_jobs=4) as pool: 106 | results = pool.map(add_five, arr, chunk_size=chunk_size, concatenate_numpy_output=False) 107 | 108 | This will return individual arrays: 109 | 110 | .. code-block:: python 111 | 112 | [array([[5.68438994, 5.9701514 , 5.40083965], 113 | [5.88428556, 5.2083905 , 5.61490443], 114 | [5.89249174, 5.39902235, 5.70762541]]), 115 | array([[5.18850964, 5.1022777 , 5.41539432], 116 | [5.07327858, 5.18608165, 5.75862301], 117 | [5.69215651, 5.4211941 , 5.31029439]]), 118 | array([[5.82571272, 5.72257819, 5.86079131], 119 | [5.91285817, 5.49398461, 5.27863929], 120 | [5.146981 , 5.84671211, 5.30122806]]), 121 | array([[5.11783283, 5.12585031, 5.39864368]])] 122 | -------------------------------------------------------------------------------- /docs/usage/map/progress_bar.rst: -------------------------------------------------------------------------------- 1 | Progress bar 2 | ============ 3 | 4 | .. contents:: Contents 5 | :depth: 2 6 | :local: 7 | 8 | Progress bar support is added through the tqdm_ package (installed by default when installing MPIRE). The most easy way 9 | to include a progress bar is by enabling the ``progress_bar`` flag in any of the ``map`` functions: 10 | 11 | .. code-block:: python 12 | 13 | with WorkerPool(n_jobs=4) as pool: 14 | pool.map(task, range(100), progress_bar=True) 15 | 16 | This will display a basic ``tqdm`` progress bar displaying the time elapsed and remaining, number of tasks completed 17 | (including a percentage value) and the speed (i.e., number of tasks completed per time unit). 18 | 19 | 20 | .. _progress_bar_style: 21 | 22 | Progress bar style 23 | ------------------ 24 | 25 | You can switch to a different progress bar style by changing the ``progress_bar_style`` parameter. For example, when 26 | you require a notebook widget use ``'notebook'`` as the style: 27 | 28 | .. code-block:: python 29 | 30 | with WorkerPool(n_jobs=4) as pool: 31 | pool.map(task, range(100), progress_bar=True, progress_bar_style='notebook') 32 | 33 | The available styles are: 34 | 35 | - ``None``: use the default style (= ``'std'`` , see below) 36 | - ``'std'``: use the standard ``tqdm`` progress bar 37 | - ``'rich'``: use the rich progress bar (requires the ``rich`` package to be installed, see :ref:`richdep`) 38 | - ``'notebook'``: use the Jupyter notebook widget 39 | - ``'dashboard'``: use only the progress bar on the dashboard 40 | 41 | When in a terminal and using the ``'notebook'`` style, the progress bar will behave weirdly. This is not recommended. 42 | 43 | .. note:: 44 | 45 | If you run into problems with getting the progress bar to work in a Jupyter notebook (with ``'notebook'`` style), 46 | have a look at :ref:`troubleshooting_progress_bar`. 47 | 48 | Changing the default style 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | You can change the default style by setting the :obj:`mpire.tqdm_utils.PROGRESS_BAR_DEFAULT_STYLE` variable: 52 | 53 | .. code-block:: python 54 | 55 | import mpire.tqdm_utils 56 | 57 | mpire.tqdm_utils.PROGRESS_BAR_DEFAULT_STYLE = 'notebook' 58 | 59 | .. _tqdm: https://pypi.python.org/pypi/tqdm 60 | 61 | 62 | Progress bar options 63 | -------------------- 64 | 65 | The ``tqdm`` progress bar can be configured using the ``progress_bar_options`` parameter. This parameter accepts a 66 | dictionary with keyword arguments that will be passed to the ``tqdm`` constructor. 67 | 68 | Some options in ``tqdm`` will be overwritten by MPIRE. These include the ``iterable``, ``total`` and ``leave`` 69 | parameters. The ``iterable`` is set to the iterable passed on to the ``map`` function. The ``total`` parameter is set to 70 | the number of tasks to be completed. The ``leave`` parameter is always set to ``True``. Some other parameters have a 71 | default value assigned to them, but can be overwritten by the user. 72 | 73 | Here's an example where we change the description, the units, and the colour of the progress bar: 74 | 75 | .. code-block:: python 76 | 77 | with WorkerPool(n_jobs=4) as pool: 78 | pool.map(some_func, some_data, progress_bar=True, 79 | progress_bar_options={'desc': 'Processing', 'unit': 'items', 'colour': 'green'}) 80 | 81 | For a complete list of available options, check out the `tqdm docs`_. 82 | 83 | .. _`tqdm docs`: https://tqdm.github.io/docs/tqdm/#__init__ 84 | 85 | Progress bar position 86 | ~~~~~~~~~~~~~~~~~~~~~ 87 | 88 | You can easily print a progress bar on a different position on the terminal using the ``position`` parameter of 89 | ``tqdm``, which facilitates the use of multiple progress bars. Here's an example of using multiple progress bars using 90 | nested WorkerPools: 91 | 92 | .. code-block:: python 93 | 94 | def dispatcher(worker_id, X): 95 | with WorkerPool(n_jobs=4) as nested_pool: 96 | return nested_pool.map(task, X, progress_bar=True, 97 | progress_bar_options={'position': worker_id + 1}) 98 | 99 | def main(): 100 | with WorkerPool(n_jobs=4, daemon=False, pass_worker_id=True) as pool: 101 | pool.map(dispatcher, ((range(x, x + 100),) for x in range(100)), iterable_len=100, 102 | n_splits=4, progress_bar=True) 103 | 104 | main() 105 | 106 | We use ``worker_id + 1`` here because the worker IDs start at zero and we reserve position 0 for the progress bar of 107 | the main WorkerPool (which is the default). 108 | 109 | It goes without saying that you shouldn't specify the same progress bar position multiple times. 110 | 111 | .. note:: 112 | 113 | When using the ``rich`` progress bar style, the ``position`` parameter cannot be used. An exception will be raised 114 | when trying to do so. 115 | 116 | .. note:: 117 | 118 | Most progress bar options are completely ignored when in a Jupyter/IPython notebook session or in the MPIRE 119 | dashboard. 120 | 121 | -------------------------------------------------------------------------------- /docs/usage/map/task_chunking.rst: -------------------------------------------------------------------------------- 1 | .. _Task chunking: 2 | 3 | Task chunking 4 | ============= 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | By default, MPIRE chunks the given tasks in to ``64 * n_jobs`` chunks. Each worker is given one chunk of tasks at a time 11 | before returning its results. This usually makes processing faster when you have rather small tasks (computation wise) 12 | and results are pickled/unpickled when they are send to a worker or main process. Chunking the tasks and results ensures 13 | that each process has to pickle/unpickle less often. 14 | 15 | However, to determine the number of tasks in the argument list the iterable should implement the ``__len__`` method, 16 | which is available in default containers like ``list`` or ``tuple``, but isn't available in most generator objects 17 | (the ``range`` object is one of the exceptions). To allow working with generators each ``map`` function has the option 18 | to pass the iterable length: 19 | 20 | .. code-block:: python 21 | 22 | with WorkerPool(n_jobs=4) as pool: 23 | # 1. This will issue a warning and sets the chunk size to 1 24 | results = pool.map(square, ((x,) for x in range(1000))) 25 | 26 | # 2. This will issue a warning as well and sets the chunk size to 1 27 | results = pool.map(square, ((x,) for x in range(1000)), n_splits=4) 28 | 29 | # 3. Square the numbers using a generator using a specific number of splits 30 | results = pool.map(square, ((x,) for x in range(1000)), iterable_len=1000, n_splits=4) 31 | 32 | # 4. Square the numbers using a generator using automatic chunking 33 | results = pool.map(square, ((x,) for x in range(1000)), iterable_len=1000) 34 | 35 | # 5. Square the numbers using a generator using a fixed chunk size 36 | results = pool.map(square, ((x,) for x in range(1000)), chunk_size=4) 37 | 38 | In the first two examples the function call will issue a warning because MPIRE doesn't know how large the chunks should 39 | be as the total number of tasks is unknown, therefore it will fall back to a chunk size of 1. The third example should 40 | work as expected where 4 chunks are used. The fourth example uses 256 chunks (the default 64 times the number of 41 | workers). The last example uses a fixed chunk size of four, so MPIRE doesn't need to know the iterable length. 42 | 43 | You can also call the chunk function manually: 44 | 45 | .. code-block:: python 46 | 47 | from mpire.utils import chunk_tasks 48 | 49 | # Convert to list because chunk_tasks returns a generator 50 | print(list(chunk_tasks(range(10), n_splits=3))) 51 | print(list(chunk_tasks(range(10), chunk_size=2.5))) 52 | print(list(chunk_tasks((x for x in range(10)), iterable_len=10, n_splits=6))) 53 | 54 | will output: 55 | 56 | .. code-block:: python 57 | 58 | [(0, 1, 2, 3), (4, 5, 6), (7, 8, 9)] 59 | [(0, 1, 2), (3, 4), (5, 6, 7), (8, 9)] 60 | [(0, 1), (2, 3), (4,), (5, 6), (7, 8), (9,)] 61 | -------------------------------------------------------------------------------- /docs/usage/map/timeouts.rst: -------------------------------------------------------------------------------- 1 | .. _timeouts: 2 | 3 | Timeouts 4 | ======== 5 | 6 | Timeouts can be set separately for the target, ``worker_init`` and ``worker_exit`` functions. When a timeout has been 7 | set and reached, it will throw a ``TimeoutError``: 8 | 9 | .. code-block:: python 10 | 11 | # Will raise TimeoutError, provided that the target function takes longer 12 | # than half a second to complete 13 | with WorkerPool(n_jobs=5) as pool: 14 | pool.map(time_consuming_function, range(10), task_timeout=0.5) 15 | 16 | # Will raise TimeoutError, provided that the worker_init function takes longer 17 | # than 3 seconds to complete or the worker_exit function takes longer than 18 | # 150.5 seconds to complete 19 | with WorkerPool(n_jobs=5) as pool: 20 | pool.map(time_consuming_function, range(10), worker_init=init, worker_exit=exit_, 21 | worker_init_timeout=3.0, worker_exit_timeout=150.5) 22 | 23 | Use ``None`` (=default) to disable timeouts. 24 | 25 | ``imap`` and ``imap_unordered`` 26 | ------------------------------- 27 | 28 | When you're using one of the lazy map functions (e.g., ``imap`` or ``imap_unordered``) then an exception will only be 29 | raised when the function is actually running. E.g. when executing: 30 | 31 | .. code-block:: python 32 | 33 | with WorkerPool(n_jobs=5) as pool: 34 | results = pool.imap(time_consuming_function, range(10), task_timeout=0.5) 35 | 36 | this will never raise. This is because ``imap`` and ``imap_unordered`` return a generator object, which stops executing 37 | until it gets the trigger to go beyond the ``yield`` statement. When iterating through the results, it will raise as 38 | expected: 39 | 40 | .. code-block:: python 41 | 42 | with WorkerPool(n_jobs=5) as pool: 43 | results = pool.imap(time_consuming_function, range(10), task_timeout=0.5) 44 | for result in results: 45 | ... 46 | 47 | Threading 48 | --------- 49 | 50 | When using ``threading`` as start method MPIRE won't be able to interrupt certain functions, like ``time.sleep``. -------------------------------------------------------------------------------- /docs/usage/map/worker_init_exit.rst: -------------------------------------------------------------------------------- 1 | .. _worker_init_exit: 2 | 3 | Worker init and exit 4 | ==================== 5 | 6 | When you want to initialize a worker you can make use of the ``worker_init`` parameter of any ``map`` function. This 7 | will call the initialization function only once per worker. Similarly, if you need to clean up the worker at the end of 8 | its lifecycle you can use the ``worker_exit`` parameter. Additionally, the exit function can return anything you like, 9 | which can be collected using :meth:`mpire.WorkerPool.get_exit_results` after the workers are done. 10 | 11 | Both init and exit functions receive the worker ID, shared objects, and worker state in the same way as the task 12 | function does, given they're enabled. 13 | 14 | For example: 15 | 16 | .. code-block:: python 17 | 18 | def init_func(worker_state): 19 | # Initialize a counter for each worker 20 | worker_state['count_even'] = 0 21 | 22 | def square_and_count_even(worker_state, x): 23 | # Count number of even numbers and return the square 24 | if x % 2 == 0: 25 | worker_state['count_even'] += 1 26 | return x * x 27 | 28 | def exit_func(worker_state): 29 | # Return the counter 30 | return worker_state['count_even'] 31 | 32 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool: 33 | pool.map(square_and_count_even, range(100), worker_init=init_func, worker_exit=exit_func) 34 | print(pool.get_exit_results()) # Output, e.g.: [13, 13, 12, 12] 35 | print(sum(pool.get_exit_results())) # Output: 50 36 | 37 | .. important:: 38 | 39 | When the ``worker_lifespan`` option is used to restart workers during execution, the exit function will be called 40 | for the worker that's shutting down and the init function will be called again for the new worker. Therefore, the 41 | number of elements in the list that's returned from :meth:`mpire.WorkerPool.get_exit_results` does not always equal 42 | ``n_jobs``. 43 | 44 | .. important:: 45 | 46 | When ``keep_alive`` is enabled the workers won't be terminated after a ``map`` call. This means the exit function 47 | won't be called until it's time for cleaning up the entire pool. You will have to explicitly call 48 | :meth:`mpire.WorkerPool.stop_and_join` to receive the exit results. 49 | -------------------------------------------------------------------------------- /docs/usage/map/worker_lifespan.rst: -------------------------------------------------------------------------------- 1 | Worker lifespan 2 | =============== 3 | 4 | Occasionally, workers that process multiple, memory intensive tasks do not release their used up memory properly, which 5 | results in memory usage building up. This is not a bug in MPIRE, but a consequence of Python's poor garbage collection. 6 | To avoid this type of problem you can set the worker lifespan: the number of tasks after which a worker should restart. 7 | 8 | .. code-block:: python 9 | 10 | with WorkerPool(n_jobs=4) as pool: 11 | results = pool.map(task, range(100), worker_lifespan=1, chunk_size=1) 12 | 13 | In this example each worker is restarted after finishing a single task. 14 | 15 | .. note:: 16 | 17 | When the worker lifespan has been reached, a worker will finish the current chunk of tasks before restarting. I.e., 18 | based on the ``chunk_size`` a worker could end up completing more tasks than is allowed by the worker lifespan. 19 | -------------------------------------------------------------------------------- /docs/usage/mpire_dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/docs/usage/mpire_dashboard.png -------------------------------------------------------------------------------- /docs/usage/mpire_dashboard_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/docs/usage/mpire_dashboard_error.png -------------------------------------------------------------------------------- /docs/usage/mpire_dashboard_insights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/docs/usage/mpire_dashboard_insights.png -------------------------------------------------------------------------------- /docs/usage/mpire_dashboard_keyboard_interrupt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/docs/usage/mpire_dashboard_keyboard_interrupt.png -------------------------------------------------------------------------------- /docs/usage/workerpool/cpu_pinning.rst: -------------------------------------------------------------------------------- 1 | CPU pinning 2 | =========== 3 | 4 | You can pin the child processes of :obj:`mpire.WorkerPool` to specific CPUs by using the ``cpu_ids`` parameter in the 5 | constructor: 6 | 7 | .. code-block:: python 8 | 9 | # Pin the two child processes to CPUs 2 and 3 10 | with WorkerPool(n_jobs=2, cpu_ids=[2, 3]) as pool: 11 | ... 12 | 13 | # Pin the child processes to CPUs 40-59 14 | with WorkerPool(n_jobs=20, cpu_ids=list(range(40, 60))) as pool: 15 | ... 16 | 17 | # All child processes have to share a single core: 18 | with WorkerPool(n_jobs=4, cpu_ids=[0]) as pool: 19 | ... 20 | 21 | # All child processes have to share multiple cores, namely 4-7: 22 | with WorkerPool(n_jobs=4, cpu_ids=[[4, 5, 6, 7]]) as pool: 23 | ... 24 | 25 | # Each child process can use two distinctive cores: 26 | with WorkerPool(n_jobs=4, cpu_ids=[[0, 1], [2, 3], [4, 5], [6, 7]]) as pool: 27 | ... 28 | 29 | CPU IDs have to be positive integers, not exceeding the number of CPUs available (which can be retrieved by using 30 | :meth:`mpire.cpu_count`). Use ``None`` to disable CPU pinning (which is the default). 31 | 32 | .. note:: 33 | 34 | Pinning processes to CPU IDs doesn't work when using threading or when you're on macOS. -------------------------------------------------------------------------------- /docs/usage/workerpool/dill.rst: -------------------------------------------------------------------------------- 1 | .. _use_dill: 2 | 3 | Dill 4 | ==== 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | For some functions or tasks it can be useful to not rely on pickle, but on some more powerful serialization backends 11 | like dill_. ``dill`` isn't installed by default. See :ref:`dilldep` for more information on installing the dependencies. 12 | 13 | One specific example where ``dill`` shines is when using start method ``spawn`` (the default on Windows) in combination 14 | with iPython or Jupyter notebooks. ``dill`` enables parallelizing more exotic objects like lambdas and functions defined 15 | in iPython and Jupyter notebooks. For all benefits of ``dill``, please refer to the `dill documentation`_. 16 | 17 | Once the dependencies have been installed, you can enable it using the ``use_dill`` flag: 18 | 19 | .. code-block:: python 20 | 21 | with WorkerPool(n_jobs=4, use_dill=True) as pool: 22 | ... 23 | 24 | .. note:: 25 | 26 | When using ``dill`` it can potentially slow down processing. This is the cost of having a more reliable and 27 | powerful serialization backend. 28 | 29 | .. _dill: https://pypi.org/project/dill/ 30 | .. _dill documentation: https://github.com/uqfoundation/dill 31 | -------------------------------------------------------------------------------- /docs/usage/workerpool/index.rst: -------------------------------------------------------------------------------- 1 | WorkerPool 2 | ========== 3 | 4 | This section describes how to setup a :obj:`mpire.WorkerPool` instance. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | setup 10 | start_method 11 | cpu_pinning 12 | worker_id 13 | shared_objects 14 | worker_state 15 | keep_alive 16 | worker_insights 17 | dill 18 | order_tasks 19 | -------------------------------------------------------------------------------- /docs/usage/workerpool/keep_alive.rst: -------------------------------------------------------------------------------- 1 | .. _keep_alive: 2 | 3 | Keep alive 4 | ========== 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | By default, workers are restarted on each ``map`` call. This is done to clean up resources as quickly as possible when 11 | the work is done. 12 | 13 | Workers can be kept alive in between consecutive map calls using the ``keep_alive`` flag. This is useful when your 14 | workers have a long startup time and you need to call one of the map functions multiple times. 15 | 16 | .. code-block:: python 17 | 18 | def foo(x): 19 | pass 20 | 21 | with WorkerPool(n_jobs=4, keep_alive=True) as pool: 22 | pool.map(task, range(100)) 23 | pool.map(task, range(100)) # Workers are reused here 24 | 25 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of 26 | :meth:`mpire.WorkerPool.set_keep_alive`: 27 | 28 | .. code-block:: python 29 | 30 | with WorkerPool(n_jobs=4) as pool: 31 | pool.map(task, range(100)) 32 | pool.map(task, range(100)) # Workers are restarted 33 | pool.set_keep_alive() 34 | pool.map(task, range(100)) # Workers are reused here 35 | 36 | Caveats 37 | ------- 38 | 39 | Changing some WorkerPool init parameters do require a restart. These include ``pass_worker_id``, ``shared_objects``, and 40 | ``use_worker_state``. 41 | 42 | Keeping workers alive works even when the function to be called or any other parameter passed on to the ``map`` function 43 | changes. 44 | 45 | However, when you're changing either the ``worker_init`` and/or ``worker_exit`` function while ``keep_alive`` is 46 | enabled, you need to be aware this can have undesired side-effects. ``worker_init`` functions are only executed when a 47 | worker is started and ``worker_exit`` functions when a worker is terminated. When ``keep_alive`` is enabled, workers 48 | aren't restarted in between consecutive ``map`` calls, so those functions are not called. 49 | 50 | .. code-block:: python 51 | 52 | def init_func_1(): pass 53 | def exit_func_1(): pass 54 | 55 | def init_func_2(): pass 56 | def init_func_2(): pass 57 | 58 | with WorkerPool(n_jobs=4, keep_alive=True) as pool: 59 | pool.map(task, range(100), worker_init=init_func_1, worker_exit=exit_func_1) 60 | pool.map(task, range(100), worker_init=init_func_2, worker_exit=exit_func_2) 61 | 62 | In the above example ``init_func_1`` is called for each worker when the workers are started. After the first ``map`` 63 | call ``exit_func_1`` is not called because workers are kept alive. During the second ``map`` call ``init_func_2`` isn't 64 | called as well, because the workers are still alive. When exiting the context manager the workers are shut down and 65 | ``exit_func_2`` is called. 66 | 67 | It gets even trickier when you also enable ``worker_lifespan``. In this scenario during the first ``map`` call a worker 68 | could've reached its maximum lifespan and is forced to restart, while others haven't. The exit function of the worker to 69 | be restarted is called (i.e., ``exit_func_1``). When calling ``map`` for the second time and the exit function is 70 | changed, the other workers will execute the new exit function when they need to be restarted (i.e., ``exit_func_2``). 71 | -------------------------------------------------------------------------------- /docs/usage/workerpool/order_tasks.rst: -------------------------------------------------------------------------------- 1 | Order tasks 2 | =========== 3 | 4 | .. contents:: Contents 5 | :depth: 2 6 | :local: 7 | 8 | In some settings it can be useful to supply the tasks to workers in a round-robin fashion. This means worker 0 will get 9 | task 0, worker 1 will get task 1, etc. After each worker got a task, we start with worker 0 again instead of picking the 10 | worker that has most recently completed a task. 11 | 12 | When the chunk size is larger than 1, the tasks are distributed to the workers in order, but in chunks. I.e., when 13 | ``chunk_size=3`` tasks 0, 1, and 2 will be assigned to worker 0, tasks 3, 4, and 5 to worker 1, and so on. 14 | 15 | When ``keep_alive`` is set to ``True`` and the second ``map`` call is made, MPIRE resets the worker order and starts at 16 | worker 0 again. 17 | 18 | .. warning:: 19 | 20 | When tasks vary in execution time, the default task scheduler makes sure each worker is busy for approximately the 21 | same amount of time. This can mean that some workers execute more tasks than others. When using ``order_tasks`` this 22 | is no longer the case and therefore the total execution time is likely to be higher. 23 | 24 | You can enable/disable task ordering by setting the ``order_tasks`` flag: 25 | 26 | .. code-block:: python 27 | 28 | def task(x): 29 | pass 30 | 31 | with WorkerPool(n_jobs=4, order_tasks=True) as pool: 32 | pool.map(task, range(10)) 33 | 34 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of 35 | :meth:`mpire.WorkerPool.set_order_tasks`: 36 | 37 | .. code-block:: python 38 | 39 | with WorkerPool(n_jobs=4) as pool: 40 | pool.set_order_tasks() 41 | pool.map(task, range(10)) 42 | -------------------------------------------------------------------------------- /docs/usage/workerpool/setup.rst: -------------------------------------------------------------------------------- 1 | Starting a WorkerPool 2 | ===================== 3 | 4 | .. contents:: Contents 5 | :depth: 2 6 | :local: 7 | 8 | The :obj:`mpire.WorkerPool` class controls a pool of worker processes similarly to a ``multiprocessing.Pool``. It 9 | contains all the ``map`` like functions (with the addition of :meth:`mpire.WorkerPool.map_unordered`), together with 10 | the ``apply`` and ``apply_async`` functions (see :ref:`apply-family`). 11 | 12 | An :obj:`mpire.WorkerPool` can be started in two different ways. The first and recommended way to do so is using a 13 | context manager: 14 | 15 | .. code-block:: python 16 | 17 | from mpire import WorkerPool 18 | 19 | # Start a pool of 4 workers 20 | with WorkerPool(n_jobs=4) as pool: 21 | # Do some processing here 22 | pass 23 | 24 | The ``with`` statement takes care of properly joining/terminating the spawned worker processes after the block has 25 | ended. 26 | 27 | The other way is to do it manually: 28 | 29 | .. code-block:: python 30 | 31 | # Start a pool of 4 workers 32 | pool = WorkerPool(n_jobs=4) 33 | 34 | # Do some processing here 35 | pass 36 | 37 | # Only needed when keep_alive=True: 38 | # Clean up pool (this will block until all processing has completed) 39 | pool.stop_and_join() # or use pool.join() which is an alias of stop_and_join() 40 | 41 | # In the case you want to kill the processes, even though they are still busy 42 | pool.terminate() 43 | 44 | When using ``n_jobs=None`` MPIRE will spawn as many processes as there are CPUs on your system. Specifying more jobs 45 | than you have CPUs is, of course, possible as well. 46 | 47 | .. warning:: 48 | 49 | In the manual approach, the results queue should be drained before joining the workers, otherwise you can get a 50 | deadlock. If you want to join either way, use :meth:`mpire.WorkerPool.terminate`. For more information, see the 51 | warnings in the Python docs here_. 52 | 53 | .. _here: https://docs.python.org/3/library/multiprocessing.html#pipes-and-queues 54 | 55 | 56 | Nested WorkerPools 57 | ------------------ 58 | 59 | By default, the :obj:`mpire.WorkerPool` class spawns daemon child processes who are not able to create child processes 60 | themselves, so nested pools are not allowed. There's an option to create non-daemon child processes to allow for nested 61 | structures: 62 | 63 | .. code-block:: python 64 | 65 | def job(...) 66 | with WorkerPool(n_jobs=4) as p: 67 | # Do some work 68 | results = p.map(...) 69 | 70 | with WorkerPool(n_jobs=4, daemon=True, start_method='spawn') as pool: 71 | # This will raise an AssertionError telling you daemon processes 72 | # can't start child processes 73 | pool.map(job, ...) 74 | 75 | with WorkerPool(n_jobs=4, daemon=False, start_method='spawn') as pool: 76 | # This will work just fine 77 | pool.map(job, ...) 78 | 79 | .. note:: 80 | 81 | Nested pools aren't supported when using threading. 82 | 83 | .. warning:: 84 | 85 | Spawning processes is not thread-safe_! Both ``start`` and ``join`` methods of the ``process`` class alter global 86 | variables. If you still want to have nested pools, the safest bet is to use ``spawn`` as start method. 87 | 88 | .. note:: 89 | 90 | Due to a strange bug in Python, using ``forkserver`` as start method in a nested pool is not allowed when the 91 | outer pool is using ``fork``, as the forkserver will not have been started there. For it to work your outer pool 92 | will have to have either ``spawn`` or ``forkserver`` as start method. 93 | 94 | .. warning:: 95 | 96 | Nested pools aren't production ready. Error handling and keyboard interrupts when using nested pools can, on some 97 | rare occassions (~1% of the time), still cause deadlocks. Use at your own risk. 98 | 99 | When a function is guaranteed to finish successfully, using nested pools is absolutely fine. 100 | 101 | .. _thread-safe: https://bugs.python.org/issue40860 102 | -------------------------------------------------------------------------------- /docs/usage/workerpool/shared_objects.rst: -------------------------------------------------------------------------------- 1 | .. _shared_objects: 2 | 3 | Shared objects 4 | ============== 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | MPIRE allows you to provide shared objects to the workers in a similar way as is possible with the 11 | ``multiprocessing.Process`` class. For the start method ``fork`` these shared objects are treated as ``copy-on-write``, 12 | which means they are only copied once changes are made to them. Otherwise they share the same memory address. This is 13 | convenient if you want to let workers access a large dataset that wouldn't fit in memory when copied multiple times. 14 | 15 | .. note:: 16 | 17 | The start method ``fork`` isn't available on Windows, which means copy-on-write isn't supported there. 18 | 19 | For ``threading`` these shared objects are readable and writable without copies being made. For the start methods 20 | ``spawn`` and ``forkserver`` the shared objects are copied once for each worker, in contrast to copying it for each 21 | task which is done when using a regular ``multiprocessing.Pool``. 22 | 23 | .. code-block:: python 24 | 25 | def task(dataset, x): 26 | # Do something with this copy-on-write dataset 27 | ... 28 | 29 | def main(): 30 | dataset = ... # Load big dataset 31 | with WorkerPool(n_jobs=4, shared_objects=dataset, start_method='fork') as pool: 32 | ... = pool.map(task, range(100)) 33 | 34 | Multiple objects can be provided by placing them, for example, in a tuple container. 35 | 36 | Apart from sharing regular Python objects between workers, you can also share multiprocessing synchronization 37 | primitives such as ``multiprocessing.Lock`` using this method. Objects like these require to be shared through 38 | inheritance, which is exactly how shared objects in MPIRE are passed on. 39 | 40 | .. important:: 41 | 42 | Shared objects are passed on as the second argument, after the worker ID (when enabled), to the provided function. 43 | 44 | Instead of passing the shared objects to the :obj:`mpire.WorkerPool` constructor you can also use the 45 | :meth:`mpire.WorkerPool.set_shared_objects` function: 46 | 47 | .. code-block:: python 48 | 49 | def main(): 50 | dataset = ... # Load big dataset 51 | with WorkerPool(n_jobs=4, start_method='fork') as pool: 52 | pool.set_shared_objects(dataset) 53 | ... = pool.map(task, range(100)) 54 | 55 | Shared objects have to be specified before the workers are started. Workers are started once the first ``map`` call is 56 | executed. When ``keep_alive=True`` and the workers are reused, changing the shared objects between two consecutive 57 | ``map`` calls won't work. 58 | 59 | 60 | Copy-on-write alternatives 61 | -------------------------- 62 | 63 | When copy-on-write is not available for you, you can also use shared objects to share a ``multiprocessing.Array``, 64 | ``multiprocessing.Value``, or another object with ``multiprocessing.Manager``. You can then store results in the same 65 | object from multiple processes. However, you should keep the amount of synchronization to a minimum when the resources 66 | are protected with a lock, or disable locking if your situation allows it as is shown here: 67 | 68 | .. code-block:: python 69 | 70 | from multiprocessing import Array 71 | 72 | def square_add_and_modulo_with_index(shared_objects, idx, x): 73 | # Unpack results containers 74 | square_results_container, add_results_container = shared_objects 75 | 76 | # Square, add and modulo 77 | square_results_container[idx] = x * x 78 | add_results_container[idx] = x + x 79 | return x % 2 80 | 81 | def main(): 82 | # Use a shared array of size 100 and type float to store the results 83 | square_results_container = Array('f', 100, lock=False) 84 | add_results_container = Array('f', 100, lock=False) 85 | shared_objects = square_results_container, add_results_container 86 | with WorkerPool(n_jobs=4, shared_objects=shared_objects) as pool: 87 | 88 | # Square, add and modulo the results and store them in the results containers 89 | modulo_results = pool.map(square_add_and_modulo_with_index, 90 | enumerate(range(100)), iterable_len=100) 91 | 92 | In the example above we create two results containers, one for squaring and for adding the given value, and disable 93 | locking for both. Additionally, we also return a value, even though we use shared objects for storing results. We can 94 | safely disable locking here as each task writes to a different index in the array, so no race conditions can occur. 95 | Disabling locking is, of course, a lot faster than having it enabled. 96 | -------------------------------------------------------------------------------- /docs/usage/workerpool/start_method.rst: -------------------------------------------------------------------------------- 1 | .. _start_methods: 2 | 3 | Process start method 4 | ==================== 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | The ``multiprocessing`` package allows you to start processes using a few different methods: ``'fork'``, ``'spawn'`` or 11 | ``'forkserver'``. Threading is also available by using ``'threading'``. For detailed information on the multiprocessing 12 | contexts, please refer to the multiprocessing documentation_ and caveats_ section. In short: 13 | 14 | fork 15 | Copies the parent process such that the child process is effectively identical. This includes copying everything 16 | currently in memory. This is sometimes useful, but other times useless or even a serious bottleneck. ``fork`` 17 | enables the use of copy-on-write shared objects (see :ref:`shared_objects`). 18 | spawn 19 | Starts a fresh python interpreter where only those resources necessary are inherited. 20 | forkserver 21 | First starts a server process (using ``'spawn'``). Whenever a new process is needed the parent process requests the 22 | server to fork a new process. 23 | threading 24 | Starts child threads. Suffers from the Global Interpreter Lock (GIL), but works fine for I/O intensive tasks. 25 | 26 | For an overview of start method availability and defaults, please refer to the following table: 27 | 28 | .. list-table:: 29 | :header-rows: 1 30 | 31 | * - Start method 32 | - Available on Unix 33 | - Available on Windows 34 | * - ``fork`` 35 | - Yes (default) 36 | - No 37 | * - ``spawn`` 38 | - Yes 39 | - Yes (default) 40 | * - ``forkserver`` 41 | - Yes 42 | - No 43 | * - ``threading`` 44 | - Yes 45 | - Yes 46 | 47 | Spawn and forkserver 48 | -------------------- 49 | 50 | When using ``spawn`` or ``forkserver`` as start method, be aware that global variables (constants are fine) might have a 51 | different value than you might expect. You also have to import packages within the called function: 52 | 53 | .. code-block:: python 54 | 55 | import os 56 | 57 | def failing_job(folder, filename): 58 | return os.path.join(folder, filename) 59 | 60 | # This will fail because 'os' is not copied to the child processes 61 | with WorkerPool(n_jobs=2, start_method='spawn') as pool: 62 | pool.map(failing_job, [('folder', '0.p3'), ('folder', '1.p3')]) 63 | 64 | .. code-block:: python 65 | 66 | def working_job(folder, filename): 67 | import os 68 | return os.path.join(folder, filename) 69 | 70 | # This will work 71 | with WorkerPool(n_jobs=2, start_method='spawn') as pool: 72 | pool.map(working_job, [('folder', '0.p3'), ('folder', '1.p3')]) 73 | 74 | A lot of effort has been put into making the progress bar, dashboard, and nested pools (with multiple progress bars) 75 | work well with ``spawn`` and ``forkserver``. So, everything should work fine. 76 | 77 | .. _documentation: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods 78 | .. _caveats: https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods 79 | -------------------------------------------------------------------------------- /docs/usage/workerpool/worker_id.rst: -------------------------------------------------------------------------------- 1 | .. _workerID: 2 | 3 | Accessing the worker ID 4 | ======================= 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | Each worker in MPIRE is given an integer ID to distinguish them. Worker #1 will have ID ``0``, #2 will have ID ``1``, 11 | etc. Sometimes it can be useful to have access to this ID. 12 | 13 | By default, the worker ID is not passed on. You can enable/disable this by setting the ``pass_worker_id`` flag: 14 | 15 | .. code-block:: python 16 | 17 | def task(worker_id, x): 18 | pass 19 | 20 | with WorkerPool(n_jobs=4, pass_worker_id=True) as pool: 21 | pool.map(task, range(10)) 22 | 23 | .. important:: 24 | 25 | The worker ID will always be the first argument passed on to the provided function. 26 | 27 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of 28 | :meth:`mpire.WorkerPool.pass_on_worker_id`: 29 | 30 | .. code-block:: python 31 | 32 | with WorkerPool(n_jobs=4) as pool: 33 | pool.pass_on_worker_id() 34 | pool.map(task, range(10)) 35 | 36 | Elaborate example 37 | ----------------- 38 | 39 | Here's a more elaborate example of using the worker ID together with a shared array, where each worker can only access 40 | the element corresponding to its worker ID, making the use of locking unnecessary: 41 | 42 | .. code-block:: python 43 | 44 | def square_sum(worker_id, shared_objects, x): 45 | # Even though the shared objects is a single container, we 'unpack' it anyway 46 | results_container = shared_objects 47 | 48 | # Square and sum 49 | results_container[worker_id] += x * x 50 | 51 | # Use a shared array of size equal to the number of jobs to store the results 52 | results_container = Array('f', 4, lock=False) 53 | 54 | with WorkerPool(n_jobs=4, shared_objects=results_container, pass_worker_id=True) as pool: 55 | # Square the results and store them in the results container 56 | pool.map_unordered(square_sum, range(100)) 57 | -------------------------------------------------------------------------------- /docs/usage/workerpool/worker_insights.rst: -------------------------------------------------------------------------------- 1 | .. _worker insights: 2 | 3 | Worker insights 4 | =============== 5 | 6 | Worker insights gives you insight in your multiprocessing efficiency by tracking worker start up time, waiting time and 7 | time spend on executing tasks. Tracking is disabled by default, but can be enabled by setting ``enable_insights``: 8 | 9 | .. code-block:: python 10 | 11 | with WorkerPool(n_jobs=4, enable_insights=True) as pool: 12 | pool.map(task, range(100)) 13 | 14 | The overhead is very minimal and you shouldn't really notice it, even on very small tasks. You can view the tracking 15 | results using :meth:`mpire.WorkerPool.get_insights` or use :meth:`mpire.WorkerPool.print_insights` to directly print 16 | the insights to console: 17 | 18 | .. code-block:: python 19 | 20 | import time 21 | 22 | def sleep_and_square(x): 23 | # For illustration purposes 24 | time.sleep(x / 1000) 25 | return x * x 26 | 27 | with WorkerPool(n_jobs=4, enable_insights=True) as pool: 28 | pool.map(sleep_and_square, range(100)) 29 | insights = pool.get_insights() 30 | print(insights) 31 | 32 | # Output: 33 | {'n_completed_tasks': [28, 24, 24, 24], 34 | 'total_start_up_time': '0:00:00.038', 35 | 'total_init_time': '0:00:00', 36 | 'total_waiting_time': '0:00:00.798', 37 | 'total_working_time': '0:00:04.980', 38 | 'total_exit_time': '0:00:00', 39 | 'total_time': '0:00:05.816', 40 | 'start_up_time': ['0:00:00.010', '0:00:00.008', '0:00:00.008', '0:00:00.011'], 41 | 'start_up_time_mean': '0:00:00.009', 42 | 'start_up_time_std': '0:00:00.001', 43 | 'start_up_ratio': 0.006610452621805033, 44 | 'init_time': ['0:00:00', '0:00:00', '0:00:00', '0:00:00'], 45 | 'init_time_mean': '0:00:00', 46 | 'init_time_std': '0:00:00', 47 | 'init_ratio': 0.0, 48 | 'waiting_time': ['0:00:00.309', '0:00:00.311', '0:00:00.165', '0:00:00.012'], 49 | 'waiting_time_mean': '0:00:00.199', 50 | 'waiting_time_std': '0:00:00.123', 51 | 'waiting_ratio': 0.13722942739284952, 52 | 'working_time': ['0:00:01.142', '0:00:01.135', '0:00:01.278', '0:00:01.423'], 53 | 'working_time_mean': '0:00:01.245', 54 | 'working_time_std': '0:00:00.117', 55 | 'working_ratio': 0.8561601182661567, 56 | 'exit_time': ['0:00:00', '0:00:00', '0:00:00', '0:00:00'] 57 | 'exit_time_mean': '0:00:00', 58 | 'exit_time_std': '0:00:00', 59 | 'exit_ratio': 0.0, 60 | 'top_5_max_task_durations': ['0:00:00.099', '0:00:00.098', '0:00:00.097', '0:00:00.096', 61 | '0:00:00.095'], 62 | 'top_5_max_task_args': ['Arg 0: 99', 'Arg 0: 98', 'Arg 0: 97', 'Arg 0: 96', 'Arg 0: 95']} 63 | 64 | We specified 4 workers, so there are 4 entries in the ``n_completed_tasks``, ``start_up_time``, ``init_time``, 65 | ``waiting_time``, ``working_time``, and ``exit_time`` containers. They show per worker the number of completed tasks, 66 | the total start up time, the total time spend on the ``worker_init`` function, the total time waiting for new tasks, 67 | total time spend on main function, and the total time spend on the ``worker_exit`` function, respectively. The insights 68 | also contain mean, standard deviation, and ratio of the tracked time. The ratio is the time for that part divided by the 69 | total time. In general, the higher the working ratio the more efficient your multiprocessing setup is. Of course, your 70 | setup might still not be optimal because the task itself is inefficient, but timing that is beyond the scope of MPIRE. 71 | 72 | Additionally, the insights keep track of the top 5 tasks that took the longest to run. The data is split up in two 73 | containers: one for the duration and one for the arguments that were passed on to the task function. Both are sorted 74 | based on task duration (desc), so index ``0`` of the args list corresponds to index ``0`` of the duration list, etc. 75 | 76 | When using the MPIRE :ref:`Dashboard` you can track these insights in real-time. See :ref:`Dashboard` for more 77 | information. 78 | 79 | .. note:: 80 | 81 | When using `imap` or `imap_unordered` you can view the insights during execution. Simply call ``get_insights()`` 82 | or ``print_insights()`` inside your loop where you process the results. 83 | -------------------------------------------------------------------------------- /docs/usage/workerpool/worker_state.rst: -------------------------------------------------------------------------------- 1 | .. _worker_state: 2 | 3 | Worker state 4 | ============ 5 | 6 | .. contents:: Contents 7 | :depth: 2 8 | :local: 9 | 10 | If you want to let each worker have its own state you can use the ``use_worker_state`` flag: 11 | 12 | .. code-block:: python 13 | 14 | def task(worker_state, x): 15 | if "local_sum" not in worker_state: 16 | worker_state["local_sum"] = 0 17 | worker_state["local_sum"] += x 18 | 19 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool: 20 | results = pool.map(task, range(100)) 21 | 22 | .. important:: 23 | 24 | The worker state is passed on as the third argument, after the worker ID and shared objects (when enabled), to the 25 | provided function. 26 | 27 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of 28 | :meth:`mpire.WorkerPool.set_use_worker_state`: 29 | 30 | .. code-block:: python 31 | 32 | with WorkerPool(n_jobs=4) as pool: 33 | pool.set_use_worker_state() 34 | pool.map(task, range(100)) 35 | 36 | Combining worker state with worker_init and worker_exit 37 | ------------------------------------------------------- 38 | 39 | The worker state can be combined with the ``worker_init`` and ``worker_exit`` parameters of each ``map`` function, 40 | leading to some really useful capabilities: 41 | 42 | .. code-block:: python 43 | 44 | import numpy as np 45 | import pickle 46 | 47 | def load_big_model(worker_state): 48 | # Load a model which takes up a lot of memory 49 | with open('./a_really_big_model.p3', 'rb') as f: 50 | worker_state['model'] = pickle.load(f) 51 | 52 | def model_predict(worker_state, x): 53 | # Predict 54 | return worker_state['model'].predict(x) 55 | 56 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool: 57 | # Let the model predict 58 | data = np.array([[...]]) 59 | results = pool.map(model_predict, data, worker_init=load_big_model) 60 | 61 | More information about the ``worker_init`` and ``worker_exit`` parameters can be found at :ref:`worker_init_exit`. 62 | 63 | Combining worker state with keep_alive 64 | -------------------------------------- 65 | 66 | By default, workers are restarted each time a ``map`` function is executed. As described in :ref:`keep_alive` this can 67 | be circumvented by using ``keep_alive=True``. This also ensures worker state is kept across consecutive ``map`` calls: 68 | 69 | .. code-block:: python 70 | 71 | with WorkerPool(n_jobs=4, use_worker_state=True, keep_alive=True) as pool: 72 | # Let the model predict 73 | data = np.array([[...]]) 74 | results = pool.map(model_predict, data, worker_init=load_big_model) 75 | 76 | # Predict some more 77 | more_data = np.array([[...]]) 78 | more_results = pool.map(model_predict, more_data) 79 | 80 | In this example we don't need to supply the ``worker_init`` function to the second ``map`` call, as the workers will be 81 | reused. When ``worker_lifespan`` is set, though, this rule doesn't apply. 82 | -------------------------------------------------------------------------------- /images/benchmarks_averaged.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/images/benchmarks_averaged.png -------------------------------------------------------------------------------- /mpire/__init__.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | 3 | from mpire.pool import WorkerPool 4 | -------------------------------------------------------------------------------- /mpire/async_result.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import queue 4 | import threading 5 | from enum import Enum, auto 6 | from typing import Any, Callable, Dict, List, Optional, Union 7 | 8 | from mpire.comms import EXIT_FUNC, MAIN_PROCESS 9 | 10 | job_counter = itertools.count() 11 | 12 | 13 | class JobType(Enum): 14 | MAIN = auto() 15 | INIT = auto() 16 | MAP = auto() 17 | EXIT = auto() 18 | APPLY = auto() 19 | 20 | 21 | class AsyncResult: 22 | """Adapted from ``multiprocessing.pool.ApplyResult``.""" 23 | 24 | def __init__( 25 | self, 26 | cache: Dict, 27 | callback: Optional[Callable], 28 | error_callback: Optional[Callable], 29 | job_id: Optional[int] = None, 30 | delete_from_cache: bool = True, 31 | timeout: Optional[float] = None, 32 | ) -> None: 33 | """ 34 | :param cache: Cache for storing intermediate results 35 | :param callback: Callback function to call when the task is finished. The callback function receives the output 36 | of the function as its argument 37 | :param error_callback: Callback function to call when the task has failed. The callback function receives the 38 | exception as its argument 39 | :param job_id: Job ID of the task. If None, a new job ID is generated 40 | :param delete_from_cache: If True, the result is deleted from the cache when the task is finished 41 | :param timeout: Timeout in seconds for a single task. When the timeout is exceeded, MPIRE will raise a 42 | ``TimeoutError``. Use ``None`` to disable (default) 43 | """ 44 | self._cache = cache 45 | self._callback = callback 46 | self._error_callback = error_callback 47 | self._delete_from_cache = delete_from_cache 48 | self._timeout = timeout 49 | 50 | self.type = JobType.APPLY 51 | self.job_id = next(job_counter) if job_id is None else job_id 52 | self._ready_event = threading.Event() 53 | self._success = None 54 | self._value = None 55 | if self.job_id in self._cache: 56 | raise ValueError(f"Job ID {job_id} already exists in cache") 57 | self._cache[self.job_id] = self 58 | 59 | def ready(self) -> bool: 60 | """ 61 | :return: Returns True if the task is finished 62 | """ 63 | return self._ready_event.is_set() 64 | 65 | def successful(self) -> bool: 66 | """ 67 | :return: Returns True if the task has finished successfully 68 | :raises: ValueError if the task is not finished yet 69 | """ 70 | if not self.ready(): 71 | raise ValueError(f"{self.job_id} is not ready") 72 | return self._success 73 | 74 | def wait(self, timeout: Optional[float] = None) -> None: 75 | """ 76 | Wait until the task is finished 77 | 78 | :param timeout: Timeout in seconds. If None, wait indefinitely 79 | """ 80 | self._ready_event.wait(timeout) 81 | 82 | def get(self, timeout: Optional[float] = None) -> Any: 83 | """ 84 | Wait until the task is finished and return the output of the function 85 | 86 | :param timeout: Timeout in seconds. If None, wait indefinitely 87 | :return: Output of the function 88 | :raises: TimeoutError if the task is not finished within the timeout. When the task has failed, the exception 89 | raised by the function is re-raised 90 | """ 91 | self.wait(timeout) 92 | if not self.ready(): 93 | raise TimeoutError 94 | if self._success: 95 | return self._value 96 | else: 97 | raise self._value 98 | 99 | def _set(self, success: bool, result: Any) -> None: 100 | """ 101 | Set the result of the task and call any callbacks, when provided. This also removes the task from the cache, as 102 | it's no longer needed there. The user should store a reference to the result object 103 | 104 | :param success: True if the task has finished successfully 105 | :param result: Output of the function or the exception raised by the function 106 | """ 107 | self._success = success 108 | self._value = result 109 | 110 | if self._callback and self._success: 111 | self._callback(self._value) 112 | 113 | if self._error_callback and not self._success: 114 | self._error_callback(self._value) 115 | 116 | self._ready_event.set() 117 | if self._delete_from_cache: 118 | del self._cache[self.job_id] 119 | 120 | 121 | class UnorderedAsyncResultIterator: 122 | """Stores results of a task and provides an iterator to obtain the results in an unordered fashion""" 123 | 124 | def __init__( 125 | self, cache: Dict, n_tasks: Optional[int], job_id: Optional[int] = None, timeout: Optional[float] = None 126 | ) -> None: 127 | """ 128 | :param cache: Cache for storing intermediate results 129 | :param n_tasks: Number of tasks that will be executed. If None, we don't know the lenght yet 130 | :param job_id: Job ID of the task. If None, a new job ID is generated 131 | :param timeout: Timeout in seconds for a single task. When the timeout is exceeded, MPIRE will raise a 132 | ``TimeoutError``. Use ``None`` to disable (default) 133 | """ 134 | self._cache = cache 135 | self._n_tasks = None 136 | self._timeout = timeout 137 | 138 | self.type = JobType.MAP 139 | self.job_id = next(job_counter) if job_id is None else job_id 140 | self._items = collections.deque() 141 | self._condition = threading.Condition(lock=threading.Lock()) 142 | self._n_received = 0 143 | self._n_returned = 0 144 | self._exception = None 145 | self._got_exception = threading.Event() 146 | if self.job_id in self._cache: 147 | raise ValueError(f"Job ID {job_id} already exists in cache") 148 | self._cache[self.job_id] = self 149 | 150 | if n_tasks is not None: 151 | self.set_length(n_tasks) 152 | 153 | def __iter__(self) -> "UnorderedAsyncResultIterator": 154 | return self 155 | 156 | def next(self, block: bool = True, timeout: Optional[float] = None) -> Any: 157 | """ 158 | Obtain the next unordered result for the task 159 | 160 | :param block: If True, wait until the next result is available. If False, raise queue.Empty if no result is 161 | available 162 | :param timeout: Timeout in seconds. If None, wait indefinitely 163 | :return: The next result 164 | """ 165 | if self._items: 166 | self._n_returned += 1 167 | return self._items.popleft() 168 | 169 | if self._n_tasks is not None and self._n_returned == self._n_tasks: 170 | raise StopIteration 171 | 172 | if not block: 173 | raise queue.Empty 174 | 175 | # We still expect results. Wait until the next result is available 176 | with self._condition: 177 | while not self._items: 178 | timed_out = not self._condition.wait(timeout=timeout) 179 | if timed_out: 180 | raise queue.Empty 181 | if self._n_tasks is not None and self._n_returned == self._n_tasks: 182 | raise StopIteration 183 | 184 | self._n_returned += 1 185 | return self._items.popleft() 186 | 187 | __next__ = next 188 | 189 | def wait(self) -> None: 190 | """ 191 | Wait until all results are available 192 | """ 193 | with self._condition: 194 | while self._n_tasks is None or self._n_received < self._n_tasks: 195 | self._condition.wait() 196 | 197 | def _set(self, success: bool, result: Any) -> None: 198 | """ 199 | Set the result of the task 200 | 201 | :param success: True if the task has finished successfully 202 | :param result: Output of the function or the exception raised by the function 203 | """ 204 | if success: 205 | # Add the result to the queue and notify the iterator 206 | self._n_received += 1 207 | self._items.append(result) 208 | with self._condition: 209 | self._condition.notify() 210 | else: 211 | self._exception = result 212 | self._got_exception.set() 213 | 214 | def set_length(self, length: int) -> None: 215 | """ 216 | Set the length of the iterator 217 | 218 | :param length: Length of the iterator 219 | """ 220 | if self._n_tasks is not None: 221 | if self._n_tasks != length: 222 | raise ValueError( 223 | f"Length of iterator has already been set to {self._n_tasks}, but is now set to {length}" 224 | ) 225 | # Length has already been set. No need to do anything 226 | return 227 | 228 | with self._condition: 229 | self._n_tasks = length 230 | self._condition.notify() 231 | 232 | def get_exception(self) -> Exception: 233 | """ 234 | :return: The exception raised by the function 235 | """ 236 | self._got_exception.wait() 237 | return self._exception 238 | 239 | def remove_from_cache(self) -> None: 240 | """ 241 | Remove the iterator from the cache 242 | """ 243 | del self._cache[self.job_id] 244 | 245 | 246 | class AsyncResultWithExceptionGetter(AsyncResult): 247 | 248 | def __init__(self, cache: Dict, job_id: int) -> None: 249 | super().__init__( 250 | cache, callback=None, error_callback=None, job_id=job_id, delete_from_cache=False, timeout=None 251 | ) 252 | self.type = JobType.MAIN if job_id == MAIN_PROCESS else JobType.INIT 253 | 254 | def get_exception(self) -> Exception: 255 | """ 256 | :return: The exception raised by the function 257 | """ 258 | self.wait() 259 | return self._value 260 | 261 | def reset(self) -> None: 262 | """ 263 | Reset the result object 264 | """ 265 | self._success = None 266 | self._value = None 267 | self._ready_event.clear() 268 | 269 | 270 | class UnorderedAsyncExitResultIterator(UnorderedAsyncResultIterator): 271 | 272 | def __init__(self, cache: Dict) -> None: 273 | super().__init__(cache, n_tasks=None, job_id=EXIT_FUNC, timeout=None) 274 | self.type = JobType.EXIT 275 | 276 | def get_results(self) -> List[Any]: 277 | """ 278 | :return: List of exit results 279 | """ 280 | return list(self._items) 281 | 282 | def reset(self) -> None: 283 | """ 284 | Reset the result object 285 | """ 286 | self._n_tasks = None 287 | self._items.clear() 288 | self._n_received = 0 289 | self._n_returned = 0 290 | self._exception = None 291 | self._got_exception.clear() 292 | 293 | 294 | AsyncResultType = Union[ 295 | AsyncResult, AsyncResultWithExceptionGetter, UnorderedAsyncResultIterator, UnorderedAsyncExitResultIterator 296 | ] 297 | -------------------------------------------------------------------------------- /mpire/context.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | try: 3 | import multiprocess as mp_dill 4 | import multiprocess.managers # Needed in utils.py 5 | except ImportError: 6 | mp_dill = None 7 | import platform 8 | import threading 9 | 10 | # Check if fork is available as start method. It's not available on Windows machines 11 | try: 12 | mp.get_context('fork') 13 | FORK_AVAILABLE = True 14 | except ValueError: 15 | FORK_AVAILABLE = False 16 | 17 | # Check if we're running on Windows or MacOS 18 | RUNNING_WINDOWS = platform.system() == "Windows" 19 | RUNNING_MACOS = platform.system() == "Darwin" 20 | 21 | 22 | # Threading context so we can use threading as backend as well 23 | class ThreadingContext: 24 | 25 | Barrier = threading.Barrier 26 | Condition = threading.Condition 27 | Event = threading.Event 28 | Lock = threading.Lock 29 | RLock = threading.RLock 30 | Thread = threading.Thread 31 | 32 | # threading doesn't have Array and JoinableQueue, so we take it from multiprocessing. Both are thread-safe. We need 33 | # the Process class for the MPIRE insights SyncManager instance. 34 | Array = mp.Array 35 | JoinableQueue = mp.JoinableQueue 36 | Process = mp.Process 37 | Value = mp.Value 38 | 39 | 40 | MP_CONTEXTS = {'mp': {'fork': mp.get_context('fork') if FORK_AVAILABLE else None, 41 | 'forkserver': mp.get_context('forkserver') if FORK_AVAILABLE else None, 42 | 'spawn': mp.get_context('spawn')}, 43 | 'threading': ThreadingContext} 44 | if mp_dill is not None: 45 | MP_CONTEXTS['mp_dill'] = {'fork': mp_dill.get_context('fork') if FORK_AVAILABLE else None, 46 | 'forkserver': mp_dill.get_context('forkserver') if FORK_AVAILABLE else None, 47 | 'spawn': mp_dill.get_context('spawn')} 48 | 49 | DEFAULT_START_METHOD = 'fork' if FORK_AVAILABLE else 'spawn' 50 | -------------------------------------------------------------------------------- /mpire/dashboard/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from mpire.dashboard.dashboard import connect_to_dashboard, shutdown_dashboard, start_dashboard 3 | from mpire.dashboard.utils import get_stacklevel, set_stacklevel 4 | except (ImportError, ModuleNotFoundError): 5 | def _not_installed(*_, **__): 6 | raise NotImplementedError("Install the dashboard dependencies to enable the dashboard") 7 | 8 | connect_to_dashboard = shutdown_dashboard = start_dashboard = _not_installed 9 | get_stacklevel = set_stacklevel = _not_installed 10 | -------------------------------------------------------------------------------- /mpire/dashboard/connection_classes.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from multiprocessing import Event 3 | from multiprocessing.managers import BaseManager 4 | from multiprocessing.synchronize import Event as EventType 5 | from typing import Optional 6 | 7 | 8 | class DashboardStartedEvent: 9 | 10 | def __init__(self) -> None: 11 | self.event: Optional[EventType] = None 12 | 13 | def init(self) -> None: 14 | self.event = Event() 15 | 16 | def reset(self) -> None: 17 | self.event = None 18 | 19 | def set(self) -> None: 20 | if self.event is None: 21 | self.init() 22 | self.event.set() 23 | 24 | def is_set(self) -> bool: 25 | return self.event.is_set() if self.event is not None else False 26 | 27 | def wait(self, timeout: Optional[float] = None) -> bool: 28 | return self.event.wait(timeout) if self.event is not None else False 29 | 30 | 31 | class DashboardManager(BaseManager): 32 | pass 33 | 34 | 35 | @dataclass 36 | class DashboardManagerConnectionDetails: 37 | host: Optional[str] = None 38 | port: Optional[int] = None 39 | 40 | def clear(self) -> None: 41 | self.host = None 42 | self.port = None 43 | -------------------------------------------------------------------------------- /mpire/dashboard/connection_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | from mpire.dashboard.connection_classes import DashboardManagerConnectionDetails, DashboardStartedEvent 4 | 5 | # If a user has not installed the dashboard dependencies than the imports below will fail 6 | try: 7 | from mpire.dashboard import connect_to_dashboard 8 | from mpire.dashboard.dashboard import DASHBOARD_STARTED_EVENT 9 | from mpire.dashboard.manager import DASHBOARD_MANAGER_CONNECTION_DETAILS 10 | except (ImportError, ModuleNotFoundError): 11 | DASHBOARD_MANAGER_CONNECTION_DETAILS = DashboardManagerConnectionDetails() 12 | DASHBOARD_STARTED_EVENT = DashboardStartedEvent() 13 | 14 | def connect_to_dashboard(*_): 15 | pass 16 | 17 | DashboardConnectionDetails = Tuple[Optional[str], Optional[int], bool] 18 | 19 | 20 | def get_dashboard_connection_details() -> DashboardConnectionDetails: 21 | """ 22 | Obtains the connection details of a dasbhoard. These details are needed to be passed on to child process when the 23 | start method is either forkserver or spawn. 24 | 25 | :return: Dashboard manager host, port_nr and whether a dashboard is started/connected 26 | """ 27 | return (DASHBOARD_MANAGER_CONNECTION_DETAILS.host, DASHBOARD_MANAGER_CONNECTION_DETAILS.port, 28 | DASHBOARD_STARTED_EVENT.is_set()) 29 | 30 | 31 | def set_dashboard_connection(dashboard_connection_details: DashboardConnectionDetails, 32 | auto_connect: bool = True) -> None: 33 | """ 34 | Sets the dashboard connection details and connects to an existing dashboard if needed. 35 | 36 | :param dashboard_connection_details: Dashboard manager host, port_nr and whether a dashboard is started/connected 37 | :param auto_connect: Whether to automatically connect to a server when the dashboard_started event is set 38 | """ 39 | global DASHBOARD_MANAGER_CONNECTION_DETAILS 40 | 41 | dashboard_manager_host, dashboard_manager_port_nr, dashboard_started = dashboard_connection_details 42 | if (dashboard_manager_host is not None and dashboard_manager_port_nr is not None and 43 | not DASHBOARD_STARTED_EVENT.is_set()): 44 | if dashboard_started and auto_connect: 45 | connect_to_dashboard(dashboard_manager_port_nr, dashboard_manager_host) 46 | else: 47 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = dashboard_manager_host 48 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = dashboard_manager_port_nr 49 | if dashboard_started: 50 | DASHBOARD_STARTED_EVENT.set() 51 | -------------------------------------------------------------------------------- /mpire/dashboard/dashboard.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import getpass 3 | try: 4 | from importlib.resources import files as resource 5 | except ImportError: 6 | # Python < 3.9 compatibility 7 | from importlib_resources import files as resource 8 | import logging 9 | import os 10 | import signal 11 | import socket 12 | from datetime import datetime 13 | from multiprocessing import Event, Process 14 | from multiprocessing.managers import BaseProxy 15 | from typing import Dict, Optional, Sequence, Tuple, Union 16 | 17 | from flask import Flask, jsonify, render_template, request 18 | from markupsafe import escape 19 | from werkzeug.serving import make_server 20 | 21 | from mpire.dashboard.connection_classes import DashboardStartedEvent 22 | from mpire.dashboard.manager import (DASHBOARD_MANAGER_CONNECTION_DETAILS, 23 | get_manager_client_dicts, shutdown_manager_server, start_manager_server) 24 | from mpire.dashboard.utils import get_two_available_ports 25 | 26 | logger = logging.getLogger(__name__) 27 | logger_werkzeug = logging.getLogger('werkzeug') 28 | logger_werkzeug.setLevel(logging.ERROR) 29 | app = Flask(__name__) 30 | _server_process = None 31 | with open(resource('mpire.dashboard') / 'templates' / 'progress_bar.html', 'r') as fp: 32 | _progress_bar_html = fp.read() 33 | 34 | _DASHBOARD_MANAGER = None 35 | _DASHBOARD_TQDM_DICT = None 36 | _DASHBOARD_TQDM_DETAILS_DICT = None 37 | DASHBOARD_STARTED_EVENT = DashboardStartedEvent() 38 | 39 | 40 | @app.route('/') 41 | def index() -> str: 42 | """ 43 | Obtain the index HTML 44 | 45 | :return: HTML 46 | """ 47 | # Obtain user. This can fail when the current uid refers to a non-existing user, which can happen when running in a 48 | # container as a non-root user. See https://github.com/sybrenjansen/mpire/issues/128. 49 | try: 50 | user = getpass.getuser() 51 | except KeyError: 52 | user = "n/a" 53 | return render_template('index.html', username=user, hostname=socket.gethostname(), 54 | manager_host=DASHBOARD_MANAGER_CONNECTION_DETAILS.host or 'localhost', 55 | manager_port_nr=DASHBOARD_MANAGER_CONNECTION_DETAILS.port) 56 | 57 | 58 | @app.route('/_progress_bar_update') 59 | def progress_bar_update() -> str: 60 | """ 61 | Obtain progress bar updates (should be called through AJAX) 62 | 63 | :return: JSON string containing progress bar updates 64 | """ 65 | # As we get updates only when the progress bar is updated we need to fix the 'duration' and 'time remaining' parts 66 | # (time never stops) 67 | now = datetime.now() 68 | result = [] 69 | for pb_id in sorted(_DASHBOARD_TQDM_DICT.keys()): 70 | progress = _DASHBOARD_TQDM_DICT.get(pb_id) 71 | if progress['total'] is None: 72 | progress['total'] = '?' 73 | if progress['success'] and progress['n'] != progress['total']: 74 | progress['duration'] = str(now - progress['started_raw']).rsplit('.', 1)[0] 75 | progress['remaining'] = (str(progress['finished_raw'] - now).rsplit('.', 1)[0] 76 | if progress['finished_raw'] is not None and progress['finished_raw'] > now 77 | else '-') 78 | result.append(progress) 79 | 80 | return jsonify(result=result) 81 | 82 | 83 | @app.route('/_progress_bar_new') 84 | def progress_bar_new() -> str: 85 | """ 86 | Obtain a piece of HTML for a new progress bar (should be called through AJAX) 87 | 88 | :return: JSON string containing new progress bar HTML 89 | """ 90 | pb_id = int(request.args['pb_id']) 91 | has_insights = request.args['has_insights'] == 'true' 92 | 93 | # Obtain progress bar details. Only show the user@host part if it doesn't equal the user@host of this process 94 | # (in case someone connected to this dashboard from another machine or user) 95 | progress_bar_details = _DASHBOARD_TQDM_DETAILS_DICT.get(pb_id) 96 | if progress_bar_details['user'] == f'{getpass.getuser()}@{socket.gethostname()}': 97 | progress_bar_details['user'] = '' 98 | else: 99 | progress_bar_details['user'] = '{}:'.format(progress_bar_details['user']) 100 | 101 | # Create table for worker insights 102 | insights_workers = [] 103 | if has_insights: 104 | for worker_id in range(progress_bar_details['n_jobs']): 105 | insights_workers.append(f"{worker_id}" 106 | f"" 107 | f"" 108 | f"" 109 | f"" 110 | f"" 111 | f"" 112 | f"") 113 | insights_workers = "\n".join(insights_workers) 114 | 115 | return jsonify(result=_progress_bar_html.format(id=pb_id, insights_workers=insights_workers, 116 | has_insights='block' if has_insights else 'none', 117 | **{k: escape(v) for k, v in progress_bar_details.items()})) 118 | 119 | 120 | def start_dashboard(port_range: Sequence = range(8080, 8100)) -> Dict[str, Union[int, str]]: 121 | """ 122 | Starts a new MPIRE dashboard 123 | 124 | :param port_range: Port range to try. 125 | :return: A dictionary containing the dashboard port number and manager host and port number being used 126 | """ 127 | global _server_process, _DASHBOARD_MANAGER 128 | 129 | if not DASHBOARD_STARTED_EVENT.is_set(): 130 | 131 | DASHBOARD_STARTED_EVENT.init() 132 | 133 | dashboard_port_nr, manager_port_nr = get_two_available_ports(port_range) 134 | 135 | # Set up manager server 136 | _DASHBOARD_MANAGER = start_manager_server(manager_port_nr) 137 | 138 | # Start flask server 139 | logging.getLogger('werkzeug').setLevel(logging.WARN) 140 | _server_process = Process(target=_run, args=(DASHBOARD_STARTED_EVENT, dashboard_port_nr, 141 | get_manager_client_dicts()), 142 | daemon=True, name='dashboard-process') 143 | _server_process.start() 144 | DASHBOARD_STARTED_EVENT.wait() 145 | 146 | # Return connect information 147 | return {'dashboard_port_nr': dashboard_port_nr, 148 | 'manager_host': DASHBOARD_MANAGER_CONNECTION_DETAILS.host or socket.gethostname(), 149 | 'manager_port_nr': DASHBOARD_MANAGER_CONNECTION_DETAILS.port} 150 | 151 | else: 152 | raise RuntimeError("You already have a running dashboard") 153 | 154 | 155 | @atexit.register 156 | def shutdown_dashboard() -> None: 157 | """ Shuts down the dashboard """ 158 | if DASHBOARD_STARTED_EVENT.is_set(): 159 | global _server_process, _DASHBOARD_MANAGER, _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT 160 | if _server_process is not None: 161 | # Send SIGINT to the server process, which is the only way to stop it without causing semaphore leaks 162 | os.kill(_server_process.pid, signal.SIGINT) 163 | _server_process.join() 164 | shutdown_manager_server(_DASHBOARD_MANAGER) 165 | _DASHBOARD_MANAGER = None 166 | _DASHBOARD_TQDM_DICT = None 167 | _DASHBOARD_TQDM_DETAILS_DICT = None 168 | DASHBOARD_STARTED_EVENT.reset() 169 | 170 | 171 | def connect_to_dashboard(manager_port_nr: int, manager_host: Optional[Union[bytes, str]] = None) -> None: 172 | """ 173 | Connects to an existing MPIRE dashboard 174 | 175 | :param manager_port_nr: Port to use when connecting to a manager 176 | :param manager_host: Host to use when connecting to a manager. If ``None`` it will use localhost 177 | """ 178 | global _DASHBOARD_MANAGER, DASHBOARD_MANAGER_CONNECTION_DETAILS 179 | 180 | if DASHBOARD_STARTED_EVENT.is_set(): 181 | raise RuntimeError("You're already connected to a running dashboard") 182 | 183 | # Set connection variables so we can connect to the right manager 184 | manager_host = manager_host or "127.0.0.1" 185 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = manager_host 186 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = manager_port_nr 187 | 188 | # Try to connect 189 | try: 190 | get_manager_client_dicts() 191 | except ConnectionRefusedError: 192 | raise ConnectionRefusedError("Could not connect to dashboard manager at " 193 | f"{manager_host.decode()}:{manager_port_nr}") 194 | 195 | DASHBOARD_STARTED_EVENT.set() 196 | 197 | 198 | def _run(started: Event, dashboard_port_nr: int, manager_client_dicts: Tuple[BaseProxy, BaseProxy, BaseProxy]) -> None: 199 | """ 200 | Starts a dashboard server 201 | 202 | :param started: Event that signals the dashboard server has started 203 | :param manager_host: Dashboard manager host 204 | :param manager_port_nr: Dashboard manager port number 205 | :param dashboard_port_nr: Dashboard port number 206 | """ 207 | global _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT 208 | _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT, _ = manager_client_dicts 209 | 210 | # Start server 211 | server = make_server('0.0.0.0', dashboard_port_nr, app) 212 | started.set() 213 | logger.info(f"Server started on 0.0.0.0:{dashboard_port_nr}") 214 | server.serve_forever() 215 | -------------------------------------------------------------------------------- /mpire/dashboard/manager.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Lock 2 | from multiprocessing.synchronize import Lock as LockType 3 | from multiprocessing.managers import BaseProxy 4 | from typing import Dict, Optional, Tuple 5 | 6 | from mpire.dashboard.connection_classes import DashboardManager, DashboardManagerConnectionDetails 7 | from mpire.signal import ignore_keyboard_interrupt 8 | 9 | 10 | # Dict for tqdm progress bar updates 11 | DASHBOARD_TQDM_DICT = None 12 | 13 | # Dict for tqdm progress bar details (function called etc.) 14 | DASHBOARD_TQDM_DETAILS_DICT = None 15 | 16 | # Lock for registering new progress bars 17 | DASHBOARD_TQDM_LOCK = None 18 | 19 | # Connection details for connecting to a manager 20 | DASHBOARD_MANAGER_CONNECTION_DETAILS = DashboardManagerConnectionDetails() 21 | 22 | 23 | def get_dashboard_tqdm_dict() -> Dict: 24 | """ 25 | :return: Dashboard tqdm dict which should be used in a DashboardManager context 26 | """ 27 | global DASHBOARD_TQDM_DICT 28 | if DASHBOARD_TQDM_DICT is None: 29 | DASHBOARD_TQDM_DICT = {} 30 | return DASHBOARD_TQDM_DICT 31 | 32 | 33 | def get_dashboard_tqdm_details_dict() -> Dict: 34 | """ 35 | :return: Dashboard tqdm details dict which should be used in a DashboardManager context 36 | """ 37 | global DASHBOARD_TQDM_DETAILS_DICT 38 | if DASHBOARD_TQDM_DETAILS_DICT is None: 39 | DASHBOARD_TQDM_DETAILS_DICT = {} 40 | return DASHBOARD_TQDM_DETAILS_DICT 41 | 42 | 43 | def get_dashboard_tqdm_lock() -> LockType: 44 | """ 45 | :return: Dashboard tqdm lock which should be used in a DashboardManager context 46 | """ 47 | global DASHBOARD_TQDM_LOCK 48 | if DASHBOARD_TQDM_LOCK is None: 49 | DASHBOARD_TQDM_LOCK = Lock() 50 | return DASHBOARD_TQDM_LOCK 51 | 52 | 53 | def start_manager_server(manager_port_nr: int) -> DashboardManager: 54 | """ 55 | Start a SyncManager 56 | 57 | :param manager_port_nr: Port number to use for the manager 58 | :return: SyncManager and hostname 59 | """ 60 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK, \ 61 | DASHBOARD_MANAGER_HOST, DASHBOARD_MANAGER_PORT 62 | 63 | DashboardManager.register('get_dashboard_tqdm_dict', get_dashboard_tqdm_dict) 64 | DashboardManager.register('get_dashboard_tqdm_details_dict', get_dashboard_tqdm_details_dict) 65 | DashboardManager.register('get_dashboard_tqdm_lock', get_dashboard_tqdm_lock) 66 | 67 | # Create manager 68 | dm = DashboardManager(address=("127.0.0.1", manager_port_nr), authkey=b'mpire_dashboard') 69 | dm.start(ignore_keyboard_interrupt) 70 | DASHBOARD_TQDM_DICT = dm.get_dashboard_tqdm_dict() 71 | DASHBOARD_TQDM_DETAILS_DICT = dm.get_dashboard_tqdm_details_dict() 72 | DASHBOARD_TQDM_LOCK = dm.get_dashboard_tqdm_lock() 73 | 74 | # Set host and port number so other processes know where to connect to 75 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = "127.0.0.1" 76 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = manager_port_nr 77 | 78 | return dm 79 | 80 | 81 | def shutdown_manager_server(manager: Optional[DashboardManager]) -> None: 82 | """ 83 | Shutdown a DashboardManager 84 | 85 | :param manager: DashboardManager to shutdown 86 | """ 87 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK 88 | if manager is not None: 89 | manager.shutdown() 90 | DASHBOARD_TQDM_DICT = None 91 | DASHBOARD_TQDM_DETAILS_DICT = None 92 | DASHBOARD_TQDM_LOCK = None 93 | DASHBOARD_MANAGER_CONNECTION_DETAILS.clear() 94 | 95 | 96 | def get_manager_client_dicts() -> Tuple[BaseProxy, BaseProxy, BaseProxy]: 97 | """ 98 | Connect to a DashboardManager and obtain the synchronized tqdm dashboard dicts 99 | 100 | :return: DashboardManager tqdm dict, tqdm details dict, tqdm lock 101 | """ 102 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK 103 | 104 | # If we're already connected to a manager, return the dicts directly 105 | if DASHBOARD_TQDM_DICT is not None: 106 | return DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK 107 | 108 | # Connect to a server 109 | DashboardManager.register('get_dashboard_tqdm_dict', get_dashboard_tqdm_dict) 110 | DashboardManager.register('get_dashboard_tqdm_details_dict', get_dashboard_tqdm_details_dict) 111 | DashboardManager.register('get_dashboard_tqdm_lock', get_dashboard_tqdm_lock) 112 | dm = DashboardManager( 113 | address=(DASHBOARD_MANAGER_CONNECTION_DETAILS.host, DASHBOARD_MANAGER_CONNECTION_DETAILS.port), 114 | authkey=b'mpire_dashboard' 115 | ) 116 | dm.connect() 117 | 118 | DASHBOARD_TQDM_DICT = dm.get_dashboard_tqdm_dict() 119 | DASHBOARD_TQDM_DETAILS_DICT = dm.get_dashboard_tqdm_details_dict() 120 | DASHBOARD_TQDM_LOCK = dm.get_dashboard_tqdm_lock() 121 | return DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK 122 | -------------------------------------------------------------------------------- /mpire/dashboard/static/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/mpire/dashboard/static/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /mpire/dashboard/static/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/mpire/dashboard/static/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /mpire/dashboard/static/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/mpire/dashboard/static/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /mpire/dashboard/static/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/mpire/dashboard/static/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /mpire/dashboard/static/refresh.js: -------------------------------------------------------------------------------- 1 | // Make ajax calls synchronous. Note that this is deprecated because of possible user experience problems, but in our 2 | // case this doesn't influence it. It actually makes it better 3 | $.ajaxSetup({ 4 | async: false 5 | }); 6 | 7 | 8 | // Enable tooltips (uses jQuery) 9 | $(function() { 10 | $(document).tooltip(); 11 | }); 12 | 13 | 14 | var progress_bar_animation_duration = 450; 15 | var refresh_interval = 500; 16 | var completed_pb_ids = {}; 17 | refresh(); 18 | setInterval(refresh, refresh_interval); 19 | 20 | 21 | // Update progress bar given an ID and a progress (between 0-1) 22 | function update_progress_bar(pb_id, progress) 23 | { 24 | $("#pb_" + pb_id).stop().css("width", $("#pb_" + pb_id).width()).animate( 25 | { 26 | width: (progress * 100) + '%', 27 | easing: 'linear' 28 | }, 29 | { 30 | duration: progress_bar_animation_duration, 31 | start: function(promise) 32 | { 33 | // Set text 34 | if (progress * $("#pb_" + pb_id + "_container").width() != 0) 35 | { 36 | $(this).text(Math.round(progress * 100) + '%'); 37 | } 38 | } 39 | }); 40 | } 41 | 42 | 43 | /** 44 | * http://stackoverflow.com/questions/2353211/hsl-to-rgb-color-conversion 45 | * 46 | * Converts an HSL color value to RGB. Conversion formula 47 | * adapted from http://en.wikipedia.org/wiki/HSL_color_space. 48 | * Assumes h, s, and l are contained in the set [0, 1] and 49 | * returns r, g, and b in the set [0, 255]. 50 | * 51 | * @param Number h The hue 52 | * @param Number s The saturation 53 | * @param Number l The lightness 54 | * @return Array The RGB representation 55 | */ 56 | function hslToRgb(h, s, l) 57 | { 58 | var r, g, b; 59 | 60 | if(s == 0){ 61 | r = g = b = l; // achromatic 62 | }else{ 63 | function hue2rgb(p, q, t){ 64 | if(t < 0) t += 1; 65 | if(t > 1) t -= 1; 66 | if(t < 1/6) return p + (q - p) * 6 * t; 67 | if(t < 1/2) return q; 68 | if(t < 2/3) return p + (q - p) * (2/3 - t) * 6; 69 | return p; 70 | } 71 | 72 | var q = l < 0.5 ? l * (1 + s) : l + s - l * s; 73 | var p = 2 * l - q; 74 | r = hue2rgb(p, q, h + 1/3); 75 | g = hue2rgb(p, q, h); 76 | b = hue2rgb(p, q, h - 1/3); 77 | } 78 | 79 | return [Math.floor(r * 255), Math.floor(g * 255), Math.floor(b * 255)]; 80 | } 81 | 82 | 83 | // convert a number to a color using hsl 84 | function numberToColorHsl(i) 85 | { 86 | // as the function expects a value between 0 and 1, and red = 0° and green = 120° 87 | // we convert the input to the appropriate hue value 88 | var hue = i * 1.2 / 3.6; 89 | // we convert hsl to rgb (saturation 100%, lightness 50%) 90 | var rgb = hslToRgb(hue, 1, .7); 91 | // we format to css value and return 92 | return 'rgb(' + rgb[0] + ',' + rgb[1] + ',' + rgb[2] + ')'; 93 | } 94 | 95 | 96 | // Hide part of a text if it's too long and add read more/read less functionality 97 | function AddReadMore(tag_id, char_limit, text) 98 | { 99 | // Only update when the text changes. We strip the ' ... Read more'/' ... Read less' parts (14 characters) 100 | var original_text = $("#" + tag_id).text(); 101 | if (original_text.substring(0, original_text.length - 14) == text) 102 | return; 103 | 104 | if (text.length > char_limit) 105 | { 106 | var first_part = text.substring(0, char_limit); 107 | var second_part = text.substring(char_limit, text.length); 108 | var new_html = first_part + " " + 109 | "... Read more"; 112 | } 113 | else 114 | { 115 | var new_html = text; 116 | } 117 | 118 | $("#" + tag_id).html(new_html); 119 | } 120 | 121 | 122 | // Refresh contents 123 | function refresh() 124 | { 125 | $.getJSON($SCRIPT_ROOT + '/_progress_bar_update', {}, function(data) 126 | { 127 | var i, worker_id, worker_prefix, task_idx, task_prefix; 128 | for (i = 0; i < data.result.length; i++) 129 | { 130 | var pb = data.result[i]; 131 | var is_new = false; 132 | 133 | // Check if progress-bar exists 134 | if ($('#pb_' + pb.id).length == 0) 135 | { 136 | // If not, request new HTML for progress bar and prepend it to table 137 | $.getJSON($SCRIPT_ROOT + '/_progress_bar_new', 138 | {pb_id: pb.id, has_insights: !$.isEmptyObject(pb.insights)}, function(new_data) 139 | { 140 | $('#progress-table > tbody').prepend(new_data.result); 141 | }); 142 | 143 | is_new = true; 144 | } 145 | 146 | // If it's already completed, do nothing, except when this is a new progress bar (e.g., when refreshed) or 147 | // when the success status has changed 148 | if (pb.id in completed_pb_ids && completed_pb_ids[pb.id] === pb.success && !is_new) 149 | { 150 | continue; 151 | } 152 | 153 | // Set new progress 154 | update_progress_bar(pb.id, pb.percentage); 155 | $('#pb_' + pb.id + '_n').text(pb.n); 156 | $('#pb_' + pb.id + '_total').text(pb.total); 157 | $('#pb_' + pb.id + '_started').text(pb.started); 158 | $('#pb_' + pb.id + '_duration').text(pb.duration); 159 | $('#pb_' + pb.id + '_remaining').text(pb.remaining); 160 | $('#pb_' + pb.id + '_finished').text(pb.finished); 161 | 162 | // Set insights, if available 163 | if (!$.isEmptyObject(pb.insights)) 164 | { 165 | $('#pb_' + pb.id + '_insights_total_start_up_time').text(pb.insights['total_start_up_time']); 166 | $('#pb_' + pb.id + '_insights_start_up_time_mean').text(pb.insights['start_up_time_mean']); 167 | $('#pb_' + pb.id + '_insights_start_up_time_std').text(pb.insights['start_up_time_std']); 168 | $('#pb_' + pb.id + '_insights_start_up_ratio').text((pb.insights['start_up_ratio'] * 100.).toFixed(2)) 169 | .css('color', numberToColorHsl(1.0 - pb.insights['start_up_ratio'])); 170 | $('#pb_' + pb.id + '_insights_total_init_time').text(pb.insights['total_init_time']); 171 | $('#pb_' + pb.id + '_insights_init_time_mean').text(pb.insights['init_time_mean']); 172 | $('#pb_' + pb.id + '_insights_init_time_std').text(pb.insights['init_time_std']); 173 | $('#pb_' + pb.id + '_insights_init_ratio').text((pb.insights['init_ratio'] * 100.).toFixed(2)) 174 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio'])); 175 | $('#pb_' + pb.id + '_insights_total_waiting_time').text(pb.insights['total_waiting_time']); 176 | $('#pb_' + pb.id + '_insights_waiting_time_mean').text(pb.insights['waiting_time_mean']); 177 | $('#pb_' + pb.id + '_insights_waiting_time_std').text(pb.insights['waiting_time_std']); 178 | $('#pb_' + pb.id + '_insights_waiting_ratio').text((pb.insights['waiting_ratio'] * 100.).toFixed(2)) 179 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio'])); 180 | $('#pb_' + pb.id + '_insights_total_working_time').text(pb.insights['total_working_time']); 181 | $('#pb_' + pb.id + '_insights_working_time_mean').text(pb.insights['working_time_mean']); 182 | $('#pb_' + pb.id + '_insights_working_time_std').text(pb.insights['working_time_std']); 183 | $('#pb_' + pb.id + '_insights_working_ratio').text((pb.insights['working_ratio'] * 100.).toFixed(2)) 184 | .css('color', numberToColorHsl(pb.insights['working_ratio'])); 185 | $('#pb_' + pb.id + '_insights_total_exit_time').text(pb.insights['total_exit_time']); 186 | $('#pb_' + pb.id + '_insights_exit_time_mean').text(pb.insights['exit_time_mean']); 187 | $('#pb_' + pb.id + '_insights_exit_time_std').text(pb.insights['exit_time_std']); 188 | $('#pb_' + pb.id + '_insights_exit_ratio').text((pb.insights['exit_ratio'] * 100.).toFixed(2)) 189 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio'])); 190 | for (worker_id = 0; worker_id < pb.insights['n_completed_tasks'].length; worker_id++) 191 | { 192 | worker_prefix = '#pb_' + pb.id + '_insights_worker_' + worker_id; 193 | $(worker_prefix + '_tasks_completed').text(pb.insights['n_completed_tasks'][worker_id]); 194 | $(worker_prefix + '_start_up_time').text(pb.insights['start_up_time'][worker_id]); 195 | $(worker_prefix + '_init_time').text(pb.insights['init_time'][worker_id]); 196 | $(worker_prefix + '_waiting_time').text(pb.insights['waiting_time'][worker_id]); 197 | $(worker_prefix + '_working_time').text(pb.insights['working_time'][worker_id]); 198 | $(worker_prefix + '_exit_time').text(pb.insights['exit_time'][worker_id]); 199 | } 200 | for (task_idx = 0; task_idx < pb.insights['top_5_max_task_durations'].length; task_idx++) 201 | { 202 | task_prefix = '#pb_' + pb.id + '_insights_task_' + task_idx; 203 | $(task_prefix).show(); 204 | $(task_prefix + '_duration').text(pb.insights['top_5_max_task_durations'][task_idx]); 205 | AddReadMore("pb_" + pb.id + "_insights_task_" + task_idx + "_args", 70, 206 | pb.insights['top_5_max_task_args'][task_idx]); 207 | } 208 | } 209 | 210 | if (pb.success) 211 | { 212 | // Success if we're at 100% 213 | if (pb.n == pb.total) 214 | { 215 | $('#pb_' + pb.id).addClass('bg-success'); 216 | 217 | // Make lightsaber light up 218 | if (!(pb.id in completed_pb_ids)) 219 | { 220 | $('.lightsaber').animate({color: '#00FF00'}, 300).animate({color: '#dc3545'}, 300); 221 | } 222 | completed_pb_ids[pb.id] = true; 223 | } 224 | } 225 | else 226 | { 227 | // Danger if we've encountered a failure 228 | $('#pb_' + pb.id).addClass('bg-danger'); 229 | 230 | // Add traceback info 231 | $('#pb_' + pb.id + '_traceback').show().text(pb.traceback); 232 | 233 | // Add a flashing flash 234 | $('#pb_' + pb.id + '_flash').fadeIn(200).fadeOut(200).fadeIn(200).fadeOut(200).fadeIn(200); 235 | 236 | // Make lightsaber light up 237 | if (!(pb.id in completed_pb_ids)) 238 | { 239 | $('.lightsaber').animate({color: '#000000'}, 300).animate({color: '#dc3545'}, 300); 240 | } 241 | completed_pb_ids[pb.id] = false; 242 | } 243 | } 244 | }); 245 | return false; 246 | } 247 | -------------------------------------------------------------------------------- /mpire/dashboard/static/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 40px; 3 | } 4 | 5 | h1 { 6 | margin-bottom: 40px; 7 | } 8 | 9 | h1 .username { 10 | font-size: 0.4em; 11 | vertical-align: middle; 12 | cursor: help; 13 | } 14 | 15 | h1 .username_brackets { 16 | margin-left: 0.3em; 17 | margin-right: 0.3em; 18 | color: rgb(0, 255, 255); 19 | } 20 | 21 | h1 .username_at { 22 | margin-left: 0.1em; 23 | margin-right: 0.1em; 24 | } 25 | 26 | #menu-top-right { 27 | float: right; 28 | } 29 | 30 | #menu-top-right > div, 31 | #menu-top-right > a { 32 | display: inline-block; 33 | margin-left: 10px; 34 | } 35 | 36 | .mpire { 37 | position: fixed; 38 | bottom: 0; 39 | right: 40px; 40 | z-index: -99; 41 | font-size: 60%; 42 | color: #6c757d; 43 | } 44 | 45 | .lightsaber { 46 | color: #dc3545; 47 | } 48 | 49 | .pb_container { 50 | width: 100%; 51 | height: 18px; 52 | border-radius: .25rem; 53 | overflow: hidden; 54 | background-color: #FFF; 55 | } 56 | 57 | .pb { 58 | height: 100%; 59 | background-color: #007bff; 60 | text-align: center; 61 | } 62 | 63 | .pb_details_left_filler { 64 | float: left; 65 | width: 3%; 66 | height: 1em; 67 | } 68 | 69 | .pb_details_right { 70 | overflow: hidden; 71 | margin-top: 6px; 72 | padding-right: 2em; 73 | } 74 | 75 | .clickable { 76 | cursor: pointer; 77 | } 78 | 79 | td.pb_details { 80 | padding: 0; 81 | background-color: rgba(255, 255, 255, .025); 82 | } 83 | 84 | td.pb_details > div { 85 | display: none; 86 | padding: 12px 12px 24px 12px; 87 | } 88 | 89 | .separator { 90 | display: flex; 91 | align-items: center; 92 | text-align: center; 93 | font-size: 1.05em; 94 | margin-top: 10px; 95 | margin-bottom: 20px; 96 | } 97 | 98 | .separator::before, 99 | .separator::after { 100 | content: ''; 101 | border-bottom: 1px solid rgba(255, 255, 255, .2); 102 | } 103 | 104 | .separator::before { 105 | flex: 0.025; 106 | } 107 | 108 | .separator::after { 109 | flex: 0.975; 110 | } 111 | 112 | .separator:not(:empty)::before { 113 | margin-right: 1em; 114 | } 115 | 116 | .separator:not(:empty)::after { 117 | margin-left: 1em; 118 | } 119 | 120 | .separator.clickable:hover { 121 | color: rgb(255, 235, 156); 122 | } 123 | 124 | .separator.clickable:hover::before, 125 | .separator.clickable:hover::after { 126 | border-bottom: 1px solid rgba(255, 255, 255, 0.6); 127 | } 128 | 129 | .insights { 130 | display: none; 131 | } 132 | 133 | .insights p.info { 134 | color: #ccc; 135 | margin: 1em 0; 136 | } 137 | 138 | .insights span.info { 139 | color: rgb(0, 255, 255); 140 | } 141 | 142 | .insights span.clickable { 143 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; 144 | word-break: normal; 145 | color: rgb(255, 255, 255); 146 | } 147 | 148 | .insights span.clickable:hover { 149 | color: rgb(255, 235, 156); 150 | } 151 | 152 | .insights-left { 153 | float: left; 154 | width: 48%; 155 | } 156 | 157 | .insights-middle { 158 | float: left; 159 | width: 4%; 160 | height: 1em; 161 | } 162 | 163 | .insights-right { 164 | float: left; 165 | width: 48%; 166 | } 167 | 168 | .insights table { 169 | margin-bottom: 20px; 170 | } 171 | 172 | .insights th, 173 | .insights td { 174 | padding: 0.5em 1.0em; 175 | } 176 | 177 | .insights td { 178 | text-align: right; 179 | } 180 | 181 | .insights_table { 182 | width: 100%; 183 | table-layout: fixed; 184 | } 185 | 186 | .insights_table tr th:first-child, 187 | .insights_table tr td:first-child { 188 | width: 20px; 189 | } 190 | 191 | .insights_table tr th:nth-child(2), 192 | .insights_table tr td:nth-child(2) { 193 | width: 100px; 194 | text-align: right; 195 | } 196 | 197 | .insights_table tr th:nth-child(3), 198 | .insights_table tr td:nth-child(3) { 199 | text-align: left; 200 | word-break: break-all; 201 | } 202 | 203 | .insights_table .code { 204 | font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; 205 | word-break: break-all; 206 | color: rgb(0, 255, 255); 207 | } 208 | 209 | .glyphicon { 210 | display: none; 211 | } 212 | 213 | code { 214 | margin-left: 0.4em; 215 | margin-right: 0.4em; 216 | color: rgb(0, 255, 255); 217 | } 218 | 219 | p { 220 | margin-bottom: 0.4em; 221 | } 222 | 223 | .traceback { 224 | display: none; 225 | color: rgb(255, 100, 100); 226 | border-top: 1px dashed; 227 | margin-top: 20px; 228 | padding-top: 20px; 229 | word-break: break-all; 230 | white-space: pre-wrap; 231 | } 232 | 233 | .hidden { 234 | display: none; 235 | } -------------------------------------------------------------------------------- /mpire/dashboard/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | MPIRE 10 | 11 | 12 | 13 | 14 | 15 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | {% include 'menu_top_right.html' %} 27 | 28 |

MPIRE 29 | [{{ username }}@{{ hostname }}] 30 |

31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
#TasksProgressDurationRemainingStartedFinished / ETA
47 | 48 | {% include 'mpire.html' %} 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /mpire/dashboard/templates/menu_top_right.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mpire/dashboard/templates/mpire.html: -------------------------------------------------------------------------------- 1 |
 2 |                        .-.
 3 |                       |_:_|
 4 |                      /(_Y_)\
 5 |                     ( \/M\/ )
 6 |  '.               _.'-/'-'\-'._
 7 |    ':           _/.--'[[[[]'--.\_
 8 |      ':        /_'  : |::"| :  '.\
 9 |        ':     //   ./ |oUU| \.'  :\
10 |          ':  _:'..' \_|___|_/ :   :|
11 |            ':.  .'  |_[___]_|  :.':\
12 |             [::\ |  :  | |  :   ; : \
13 |              '-'   \/'.| |.' \  .;.' |
14 |              |\_    \  '-'   :       |
15 |              |  \    \ .:    :   |   |
16 |              |   \    | '.   :    \  |
17 |              /       \   :. .;       |
18 |             /     |   |  :__/     :  \\
19 |            |  |   |    \:   | \   |   ||
20 |           /    \  : :  |:   /  |__|   /|
21 |           |     : : :_/_|  /'._\  '--|_\
22 |           /___.-/_|-'   \  \
23 |                          '-'
24 | 
-------------------------------------------------------------------------------- /mpire/dashboard/templates/progress_bar.html: -------------------------------------------------------------------------------- 1 | 2 | {id} 3 | - / - 4 | 5 |
6 |
7 |
8 | 9 | - 10 | 11 | - 12 | - 13 | - 14 | 15 | 16 | 17 |
18 |
19 |
20 |

Task details

21 |

Function: {function_name}, on line {function_line_no} 22 | of {user}{function_filename}

23 |

Invoked on line {invoked_line_no} of {invoked_filename}, 24 | through {invoked_code_context}

25 |

 26 |                     
27 | 28 |
29 |
30 |

Insights (click to expand)

35 |
36 |

Start up time denotes the time to spin up 37 | a worker. Init time is the time a worker spends on the initialization function, when 38 | provided. Waiting time is the time a worker needs to wait for new tasks to come in. 39 | Working time is the time a worker spends on the task at hand. Exit time is the time a 40 | worker spends on the exit function, when provided.

41 |
42 |

Global stats

43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 |
TotalMeanStdRatio (%)
Start up time
Init time
Waiting time
Working time
Exit time
86 | 87 |

Task stats

88 |

This section shows the top 5 tasks 89 | based on duration and is updated every 2 seconds.

90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
TimeArguments
122 |
123 | 124 |
125 | 126 |
127 |

Worker stats

128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | {insights_workers} 140 |
WorkerTasks completedT. start up timeT. init timeT. waiting timeT. working timeT. exit time
141 |
142 | 143 |
144 |
145 | 146 |
147 | 148 | -------------------------------------------------------------------------------- /mpire/dashboard/utils.py: -------------------------------------------------------------------------------- 1 | import getpass 2 | import inspect 3 | import socket 4 | from functools import partial 5 | from typing import Callable, Dict, List, Sequence, Tuple, Union 6 | import types 7 | 8 | DASHBOARD_FUNCTION_STACKLEVEL = 1 9 | 10 | 11 | def get_two_available_ports(port_range: Sequence) -> Tuple[int, int]: 12 | """ 13 | Get two available ports, one from the start and one from the end of the range 14 | 15 | :param port_range: Port range to try. Reverses the list and will then pick the first one available 16 | :raises OSError: If there are not enough ports available 17 | :return: Two available ports 18 | """ 19 | def _port_available(port_nr: int) -> bool: 20 | """ 21 | Checks if a port is available 22 | 23 | :param port_nr: Port number to check 24 | :return: True if available, False otherwise 25 | """ 26 | try: 27 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 28 | s.bind(('', port_nr)) 29 | s.close() 30 | return True 31 | except OSError: 32 | return False 33 | 34 | available_ports = set() 35 | for port_nr in port_range: 36 | if _port_available(port_nr): 37 | available_ports.add(port_nr) 38 | break 39 | 40 | for port_nr in reversed(port_range): 41 | if _port_available(port_nr): 42 | available_ports.add(port_nr) 43 | break 44 | 45 | if len(available_ports) != 2: 46 | raise OSError(f"Dashboard Manager Server: there are not enough ports available: {port_range}") 47 | 48 | return tuple(sorted(available_ports)) 49 | 50 | 51 | def get_stacklevel() -> int: 52 | """ 53 | Gets the stack level to use when obtaining function details (used for the dashboard) 54 | 55 | :return: Stack level 56 | """ 57 | return DASHBOARD_FUNCTION_STACKLEVEL 58 | 59 | 60 | def set_stacklevel(stacklevel: int) -> None: 61 | """ 62 | Sets the stack level to use when obtaining function details (used for the dashboard) 63 | 64 | :param stacklevel: Stack level 65 | """ 66 | global DASHBOARD_FUNCTION_STACKLEVEL 67 | DASHBOARD_FUNCTION_STACKLEVEL = stacklevel 68 | 69 | 70 | def get_function_details(func: Callable) -> Dict[str, Union[str, int]]: 71 | """ 72 | Obtain function details, including: 73 | 74 | - function filename 75 | - function line number 76 | - function name 77 | - invoked from filename 78 | - invoked from line number 79 | - invoked code context 80 | 81 | :param func: Function to call each time new task arguments become available. When passing on the worker ID the 82 | function should receive the worker ID as its first argument. If shared objects are provided the function should 83 | receive those as the next argument. If the worker state has been enabled it should receive a state variable as 84 | the next argument 85 | :return: Function details dictionary 86 | """ 87 | # Get the frame in which the pool.map(...) was called. We obtain the current stack and skip all frames which 88 | # involve the current mpire module. If the desired stack level is higher than 1, we continue until we've reached 89 | # the desired stack level. We then obtain the code context of that frame. 90 | invoked_frame = None 91 | stacklevel = 0 92 | for frame_info in inspect.stack(): 93 | if frame_info.frame.f_globals['__name__'].split('.')[0] != 'mpire' or stacklevel > 0: 94 | invoked_frame = frame_info 95 | stacklevel += 1 96 | if stacklevel == DASHBOARD_FUNCTION_STACKLEVEL: 97 | break 98 | 99 | # Obtain proper code context. Usually the last line of the invoked code is returned, but we want the complete 100 | # code snippet that called this function. That's why we increase the context size and need to find the start and 101 | # ending of the snippet. A context size of 10 should suffice. The end of the snippet is where we encounter the 102 | # line found when context=1 (i.e., what is returned in invoked_frame.code_context). The start is where we see 103 | # something along the lines of `.[i]map[_unordered](`. 104 | code_context = inspect.getframeinfo(invoked_frame.frame, context=10).code_context 105 | if code_context is not None: 106 | code_context = code_context[:code_context.index(invoked_frame.code_context[0]) + 1] 107 | code_context = find_calling_lines(code_context) 108 | invoked_line_no = invoked_frame.lineno - (len(code_context) - 1) 109 | code_context = ' '.join(line.strip() for line in code_context) 110 | else: 111 | invoked_line_no = 'N/A' 112 | 113 | if isinstance(func, partial): 114 | # If we're dealing with a partial, obtain the function within 115 | func = func.func 116 | elif hasattr(func, '__call__') and not isinstance(func, (type, types.FunctionType, types.MethodType)): 117 | # If we're dealing with a callable class instance, use its __call__ method 118 | func = func.__call__ 119 | 120 | # We use a try/except block as some constructs don't allow this. E.g., in the case the function is a MagicMock 121 | # (i.e., in unit tests) these inspections will fail 122 | try: 123 | function_filename = inspect.getabsfile(func) 124 | function_line_no = func.__code__.co_firstlineno 125 | function_name = func.__name__ 126 | except: 127 | function_filename = 'n/a' 128 | function_line_no = 'n/a' 129 | function_name = 'n/a' 130 | 131 | # Obtain user. This can fail when the current uid refers to a non-existing user, which can happen when running in a 132 | # container as a non-root user. See https://github.com/sybrenjansen/mpire/issues/128. 133 | try: 134 | user = getpass.getuser() 135 | except KeyError: 136 | user = "n/a" 137 | 138 | # Populate details 139 | func_details = {'user': f'{user}@{socket.gethostname()}', 140 | 'function_filename': function_filename, 141 | 'function_line_no': function_line_no, 142 | 'function_name': function_name, 143 | 'invoked_filename': invoked_frame.filename, 144 | 'invoked_line_no': invoked_line_no, 145 | 'invoked_code_context': code_context} 146 | 147 | return func_details 148 | 149 | 150 | def find_calling_lines(code_context: List[str]) -> List[str]: 151 | """ 152 | Tries to find the lines corresponding to the calling function 153 | 154 | :param code_context: List of code lines 155 | :return: List of code lines 156 | """ 157 | # Traverse the lines in reverse order. We need a closing bracket to indicate the end of the calling function. From 158 | # that point on we work our way backward until we find the corresponding opening bracket. There can be more bracket 159 | # groups in between, so we have to keep counting brackets until we've found the right one. 160 | n_parentheses_groups = 0 161 | found_parentheses_group = False 162 | inside_string = False 163 | inside_string_ch = None 164 | line_nr = 1 165 | for line_nr, line in enumerate(reversed(code_context), start=1): 166 | for ch in reversed(line): 167 | 168 | # If we're inside a string keep ignoring characters until we find the closing string character 169 | if inside_string: 170 | if ch == inside_string_ch: 171 | inside_string = False 172 | 173 | # Check if a string has started 174 | elif ch in {'"', "'"}: 175 | inside_string = True 176 | inside_string_ch = ch 177 | 178 | # Closing parenthesis group 179 | elif ch == ')': 180 | n_parentheses_groups += 1 181 | found_parentheses_group = True 182 | 183 | # Starting parenthesis group 184 | elif ch == '(': 185 | n_parentheses_groups -= 1 186 | 187 | # Check if we've found the corresponding opening bracket 188 | if found_parentheses_group and n_parentheses_groups == 0: 189 | break 190 | 191 | return code_context[-line_nr:] 192 | -------------------------------------------------------------------------------- /mpire/exception.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict, Tuple 3 | 4 | from pygments import highlight 5 | from pygments.lexers import Python3TracebackLexer 6 | from pygments.formatters import TerminalFormatter 7 | 8 | ANSI_ESCAPE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 9 | 10 | 11 | class StopWorker(Exception): 12 | """ Exception used to kill a worker """ 13 | pass 14 | 15 | 16 | class InterruptWorker(Exception): 17 | """ Exception used to interrupt a worker """ 18 | pass 19 | 20 | 21 | class CannotPickleExceptionError(Exception): 22 | """ Exception used when Pickle has trouble pickling the actual Exception """ 23 | pass 24 | 25 | 26 | def highlight_traceback(traceback_str: str) -> str: 27 | """ 28 | Highlight a traceback string in a terminal-friendly way 29 | 30 | :param traceback_str: The traceback string to highlight 31 | :return: The highlighted traceback string 32 | """ 33 | return highlight(traceback_str, Python3TracebackLexer(), TerminalFormatter()) 34 | 35 | 36 | def remove_highlighting(traceback_str: str) -> str: 37 | """ 38 | Remove the highlighting from a traceback string 39 | 40 | Taken from https://stackoverflow.com/a/14693789/4486236. 41 | 42 | :param traceback_str: The traceback string to remove the highlighting from 43 | :return: The traceback string without highlighting 44 | """ 45 | return ANSI_ESCAPE.sub('', traceback_str) 46 | 47 | 48 | def populate_exception(err_type: type, err_args: Any, err_state: Dict, 49 | traceback_str: str) -> Tuple[Exception, Exception]: 50 | """ 51 | Populate an exception with the given arguments 52 | 53 | :param err_type: The type of the exception 54 | :param err_args: The arguments of the exception 55 | :param err_state: The state of the exception 56 | :param traceback_str: The traceback string of the exception 57 | :return: A tuple of the exception and the original exception 58 | """ 59 | err = err_type.__new__(err_type) 60 | err.args = err_args 61 | err.__dict__.update(err_state) 62 | traceback_err = Exception(highlight_traceback(traceback_str)) 63 | 64 | return err, traceback_err 65 | -------------------------------------------------------------------------------- /mpire/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/mpire/py.typed -------------------------------------------------------------------------------- /mpire/signal.py: -------------------------------------------------------------------------------- 1 | from inspect import Traceback 2 | from signal import getsignal, SIG_IGN, SIGINT, signal as signal_, Signals 3 | from threading import current_thread, main_thread 4 | from types import FrameType 5 | from typing import Type 6 | 7 | 8 | class DelayedKeyboardInterrupt: 9 | 10 | def __init__(self) -> None: 11 | self.signal_received = None 12 | 13 | def __enter__(self) -> None: 14 | # When we're in a thread we can't use signal handling 15 | if current_thread() == main_thread(): 16 | self.signal_received = False 17 | self.old_handler = signal_(SIGINT, self.handler) 18 | 19 | def handler(self, sig: Signals, frame: FrameType) -> None: 20 | self.signal_received = (sig, frame) 21 | 22 | def __exit__(self, exc_type: Type, exc_val: Exception, exc_tb: Traceback) -> None: 23 | if current_thread() == main_thread(): 24 | signal_(SIGINT, self.old_handler) 25 | if self.signal_received: 26 | self.old_handler(*self.signal_received) 27 | 28 | 29 | class DisableKeyboardInterruptSignal: 30 | 31 | def __enter__(self) -> None: 32 | if current_thread() == main_thread(): 33 | # Prevent signal from propagating to child process 34 | self._handler = getsignal(SIGINT) 35 | ignore_keyboard_interrupt() 36 | 37 | def __exit__(self, exc_type: Type, exc_val: Exception, exc_tb: Traceback) -> None: 38 | if current_thread() == main_thread(): 39 | # Restore signal 40 | signal_(SIGINT, self._handler) 41 | 42 | 43 | def ignore_keyboard_interrupt(): 44 | signal_(SIGINT, SIG_IGN) 45 | -------------------------------------------------------------------------------- /mpire/tqdm_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import warnings 3 | from contextlib import redirect_stderr, redirect_stdout 4 | from io import StringIO 5 | from multiprocessing import Lock as mp_Lock 6 | from multiprocessing.synchronize import Lock as LockType 7 | from typing import Optional, Tuple, Type 8 | 9 | from tqdm import TqdmExperimentalWarning, tqdm as tqdm_std 10 | from tqdm.notebook import tqdm as tqdm_notebook 11 | try: 12 | from tqdm.rich import tqdm as tqdm_rich 13 | RICH_AVAILABLE = True 14 | except ImportError: 15 | tqdm_rich = None 16 | RICH_AVAILABLE = False 17 | 18 | from mpire.context import mp_dill 19 | from mpire.signal import DisableKeyboardInterruptSignal 20 | from mpire.utils import create_sync_manager 21 | 22 | PROGRESS_BAR_DEFAULT_STYLE = 'std' 23 | TqdmConnectionDetails = Tuple[LockType, "TqdmPositionRegister"] 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class TqdmMpire: 29 | """ Abstract class for tqdm classes that are used in mpire""" 30 | 31 | main_progress_bar = False 32 | 33 | @classmethod 34 | def set_main_progress_bar(cls, main: bool) -> None: 35 | """ 36 | Marks this progress bar as the main progress bar 37 | 38 | :param main: Whether this progress bar is the main progress bar 39 | """ 40 | cls.main_progress_bar = main 41 | 42 | def update(self, n: int = 1) -> None: 43 | """ 44 | Update the progress bar. Forces a final refresh when the progress bar is finished. 45 | 46 | :param n: Number of steps to update the progress bar with 47 | """ 48 | super().update(n) 49 | if self.n == self.total: 50 | self.final_refresh() 51 | 52 | def update_total(self, total: int) -> None: 53 | """ 54 | Update the total number of steps of the progress bar. Forces a refresh to show the new total. 55 | 56 | :param total: Total number of steps 57 | """ 58 | self.total = total 59 | self.refresh() 60 | 61 | def final_refresh(self, highest_progress_bar_position: Optional[int] = None) -> None: 62 | """ 63 | Final refresh of the progress bar. This function is called when the progress bar is finished. It should 64 | perform a final refresh of the progress bar and close it. 65 | 66 | :param highest_progress_bar_position: Highest progress bar position in case of multiple progress bars 67 | """ 68 | self.refresh() 69 | self.close() 70 | 71 | @classmethod 72 | def check_options(cls, options: dict) -> None: 73 | """ 74 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the 75 | options are invalid. 76 | 77 | :param options: Options passed to the tqdm class 78 | """ 79 | with redirect_stderr(StringIO()), redirect_stdout(StringIO()): 80 | cls(**options) 81 | 82 | 83 | class TqdmMpireStd(TqdmMpire, tqdm_std): 84 | """ A tqdm class that shows a standard progress bar. """ 85 | 86 | def final_refresh(self, highest_progress_bar_position: Optional[int] = None) -> None: 87 | """ 88 | Final refresh of the progress bar. This function is called when the progress bar is finished. It should 89 | perform a final refresh. 90 | 91 | When we're using a standard progress bar and this is the main progress bar, we add as many newlines as the 92 | highest progress bar position, such that new output is added after the progress bars. 93 | 94 | :param highest_progress_bar_position: Highest progress bar position in case of multiple progress bars 95 | """ 96 | self.refresh() 97 | self.disable = True 98 | if self.main_progress_bar and highest_progress_bar_position is not None: 99 | self.fp.write('\n' * (highest_progress_bar_position + 1)) 100 | 101 | 102 | if RICH_AVAILABLE: 103 | class TqdmMpireRich(TqdmMpire, tqdm_rich): 104 | """ A tqdm class that shows a rich progress bar. """ 105 | 106 | @classmethod 107 | def check_options(cls, options: dict) -> None: 108 | """ 109 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the 110 | options are invalid. 111 | 112 | For rich progress bars we disable the progress bar, because we don't want to show the progress bar in the 113 | terminal. For some reason, redirecting stdout/stderr makes the rich progress bar not work properly afterwards. 114 | 115 | :param options: Options passed to the tqdm class 116 | """ 117 | options = options.copy() 118 | if "options" not in options: 119 | options["options"] = {"disable": True} 120 | else: 121 | options["options"]["disable"] = True 122 | with warnings.catch_warnings(): 123 | warnings.simplefilter("ignore", TqdmExperimentalWarning) 124 | cls(**options) 125 | 126 | def display(self, *args, **kwargs) -> None: 127 | """ 128 | Display the progress bar and force a refresh of the widget. The refresh is needed to show the final update. 129 | """ 130 | super().display(*args, **kwargs) 131 | self._prog.refresh() 132 | 133 | else: 134 | class TqdmMpireRich(TqdmMpire): 135 | 136 | def __init__(self, *args, **kwargs) -> None: 137 | raise ImportError("rich is not installed. Please install rich to use rich progress bars.") 138 | 139 | 140 | class TqdmMpireNotebook(TqdmMpire, tqdm_notebook): 141 | """ A tqdm class that shows a GUI widget in notebooks. """ 142 | 143 | def __init__(self, *args, **kwargs) -> None: 144 | """ 145 | In case we're running tqdm in a notebook we need to apply a dirty hack to get progress bars working. 146 | Solution adapted from https://github.com/tqdm/tqdm/issues/485#issuecomment-473338308 147 | """ 148 | if not self.main_progress_bar: 149 | print(' ', end='', flush=True) 150 | super().__init__(*args, **kwargs) 151 | 152 | def update_total(self, total: int) -> None: 153 | """ 154 | Update the total number of steps of the progress bar. Forces a refresh to show the new total. 155 | 156 | In a notebook we also need to update the max value of the progress bar widget. 157 | 158 | :param total: Total number of steps 159 | """ 160 | self.container.children[1].max = total 161 | return super().update_total(total) 162 | 163 | @classmethod 164 | def check_options(cls, options: dict) -> None: 165 | """ 166 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the 167 | options are invalid. 168 | 169 | For notebook progress bars we set display to false, because redirecting stdout/stderr doesn't work for notebook 170 | widgets. 171 | 172 | :param options: Options passed to the tqdm class 173 | """ 174 | options = options.copy() 175 | options["display"] = False 176 | cls(**options) 177 | 178 | 179 | class TqdmMpireDashboardOnly(TqdmMpire, tqdm_std): 180 | """ 181 | A tqdm class that gives no output, but will still update the internal progress-bar attributes that the 182 | dashboard relies on. 183 | """ 184 | 185 | def __init__(self, *args, **kwargs) -> None: 186 | """ Set the file to a StringIO object so that no output is given """ 187 | kwargs["file"] = StringIO() 188 | super().__init__(*args, **kwargs) 189 | 190 | def display(self, *args, **kwargs) -> None: 191 | """ Don't display anything """ 192 | pass 193 | 194 | 195 | def get_tqdm(progress_bar_style: Optional[str]) -> Type[TqdmMpire]: 196 | """ 197 | Get the tqdm class to use based on the progress bar style 198 | 199 | :param progress_bar_style: The progress bar style to use. Can be one of ``None``, ``std``, or ``notebook`` 200 | :return: A tuple containing the tqdm class to use and a boolean indicating whether the progress bar is a notebook 201 | widget 202 | """ 203 | if progress_bar_style is None: 204 | progress_bar_style = PROGRESS_BAR_DEFAULT_STYLE 205 | if progress_bar_style == 'std': 206 | return TqdmMpireStd 207 | elif progress_bar_style == 'rich': 208 | return TqdmMpireRich 209 | elif progress_bar_style == 'notebook': 210 | return TqdmMpireNotebook 211 | elif progress_bar_style == 'dashboard': 212 | return TqdmMpireDashboardOnly 213 | else: 214 | raise ValueError(f'Invalid progress bar style: {progress_bar_style}. ' 215 | f'Use either None (=default), "std", or "notebook"') 216 | 217 | 218 | class TqdmPositionRegister: 219 | 220 | """ 221 | Class that keeps track of all the registered progress bar positions. Needed to properly display multiple tqdm 222 | progress bars 223 | """ 224 | 225 | def __init__(self, use_dill: bool) -> None: 226 | """ 227 | :param use_dill: Whether dill is used as serialization library 228 | """ 229 | self.lock = mp_dill.Lock() if use_dill else mp_Lock() 230 | self.highest_position = None 231 | 232 | def register_progress_bar_position(self, position: int) -> bool: 233 | """ 234 | Register new progress bar position. Returns True when it's the first one to register 235 | 236 | :param position: Progress bar position 237 | :return: Whether this progress bar is the first one to register 238 | """ 239 | with self.lock: 240 | first_one = self.highest_position is None 241 | if self.highest_position is None or position > self.highest_position: 242 | self.highest_position = position 243 | 244 | return first_one 245 | 246 | def get_highest_progress_bar_position(self) -> Optional[int]: 247 | """ 248 | Obtain the highest registered progress bar position 249 | 250 | :return: Highest progress bar position 251 | """ 252 | with self.lock: 253 | return self.highest_position 254 | 255 | def reset_progress_bar_positions(self) -> None: 256 | """ 257 | Reset the registered progress bar positions 258 | """ 259 | with self.lock: 260 | self.highest_position = None 261 | 262 | 263 | class TqdmManager: 264 | 265 | """Tqdm manager wrapper for syncing multiple progress bars, independent of process start method used.""" 266 | 267 | MANAGER = None 268 | LOCK = None 269 | POSITION_REGISTER = None 270 | 271 | @classmethod 272 | def start_manager(cls, use_dill: bool) -> bool: 273 | """ 274 | Sets up and starts the tqdm manager 275 | 276 | :param use_dill: Whether dill is used as serialization library 277 | :return: Whether the manager was started 278 | """ 279 | # Don't do anything when there's already a tqdm manager that has started 280 | if cls.LOCK is not None: 281 | return False 282 | 283 | logger.debug("Starting TQDM manager") 284 | 285 | # Create manager 286 | with DisableKeyboardInterruptSignal(): 287 | cls.MANAGER = create_sync_manager(use_dill) 288 | cls.MANAGER.register('TqdmPositionRegister', TqdmPositionRegister) 289 | cls.MANAGER.start() 290 | cls.LOCK = cls.MANAGER.Lock() 291 | cls.POSITION_REGISTER = cls.MANAGER.TqdmPositionRegister(use_dill) 292 | 293 | return True 294 | 295 | @classmethod 296 | def stop_manager(cls) -> None: 297 | """ 298 | Stops the tqdm manager 299 | """ 300 | cls.MANAGER.shutdown() 301 | cls.MANAGER = None 302 | cls.LOCK = None 303 | cls.POSITION_REGISTER = None 304 | 305 | @classmethod 306 | def get_connection_details(cls) -> TqdmConnectionDetails: 307 | """ 308 | Obtains the connection details of the tqdm manager. These details are needed to be passed on to child process 309 | when the start method is either forkserver or spawn. 310 | 311 | :return: TQDM lock and position register 312 | """ 313 | return cls.LOCK, cls.POSITION_REGISTER 314 | 315 | @classmethod 316 | def set_connection_details(cls, tqdm_connection_details: TqdmConnectionDetails) -> None: 317 | """ 318 | Sets the tqdm connection details. 319 | 320 | :param tqdm_connection_details: TQDM lock and position register 321 | """ 322 | cls.LOCK, cls.POSITION_REGISTER = tqdm_connection_details 323 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Make requirements.txt and setup.py work together properly 2 | # https://caremad.io/2013/07/setup-vs-requirement/ 3 | -e . -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | build_docs = build_sphinx -a -b html -E --source-dir docs/ --build-dir docs/_build -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | 4 | def read_description(): 5 | with open('README.rst') as file: 6 | return file.read() 7 | 8 | 9 | if __name__ == '__main__': 10 | setup( 11 | name='mpire', 12 | version='2.10.2', 13 | author='Sybren Jansen', 14 | description='A Python package for easy multiprocessing, but faster than multiprocessing', 15 | long_description=read_description(), 16 | url='https://github.com/sybrenjansen/mpire', 17 | license='MIT', 18 | packages=find_packages(exclude=['*tests*']), 19 | scripts=['bin/mpire-dashboard'], 20 | install_requires=['importlib_resources; python_version<"3.9"', 21 | 'pywin32>=301; platform_system=="Windows"', 22 | 'pygments>=2.0', 23 | 'tqdm>=4.27'], 24 | include_package_data=True, 25 | extras_require={ 26 | 'dashboard': ['flask'], 27 | 'dill': ['multiprocess; python_version<"3.11"', 28 | 'multiprocess>=0.70.15; python_version>="3.11"'], 29 | 'docs': ['docutils==0.17.1', 30 | 'sphinx==3.2.1', 31 | 'sphinx-rtd-theme==0.5.0', 32 | 'sphinx-autodoc-typehints==1.11.0', 33 | 'sphinxcontrib-images==0.9.2', 34 | 'sphinx-versions==1.0.1'], 35 | 'testing': ['ipywidgets', 36 | 'multiprocess; python_version<"3.11"', 37 | 'multiprocess>=0.70.15; python_version>="3.11"', 38 | 'numpy', 39 | 'pywin32>=301; platform_system=="Windows"', 40 | 'rich'], 41 | }, 42 | test_suite='tests', 43 | tests_require=['multiprocess', 'numpy'], 44 | classifiers=[ 45 | # Development status 46 | 'Development Status :: 5 - Production/Stable', 47 | 48 | # Supported Python versions 49 | 'Programming Language :: Python :: 3.8', 50 | 'Programming Language :: Python :: 3.9', 51 | 'Programming Language :: Python :: 3.10', 52 | 'Programming Language :: Python :: 3.11', 53 | 'Programming Language :: Python :: 3.12', 54 | 55 | # License 56 | 'License :: OSI Approved :: MIT License', 57 | 58 | # Topic 59 | 'Topic :: Software Development', 60 | 'Topic :: Software Development :: Libraries', 61 | 'Topic :: Software Development :: Libraries :: Python Modules' 62 | ] 63 | ) 64 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sybrenjansen/mpire/fd37e7cba3d6aac03b69e24afc0f22e7f8116b6b/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_signal.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | import os 3 | import signal 4 | import unittest 5 | 6 | from mpire.context import RUNNING_WINDOWS 7 | from mpire.signal import DelayedKeyboardInterrupt, DisableKeyboardInterruptSignal 8 | from tests.utils import ConditionalDecorator 9 | 10 | 11 | @ConditionalDecorator(unittest.skip("Signals aren't fully supported on Windows"), RUNNING_WINDOWS) 12 | class DelayedKeyboardInterruptTest(unittest.TestCase): 13 | 14 | def test_delayed_keyboard_interrupt(self): 15 | """ 16 | The process should delay the keyboard interrupt in case ``in_thread=False``, so the expected value should be 1. 17 | However, we can't send signals to threads and so the DelayedKeyboardInterrupt doesn't do anything in that case. 18 | So there's no point in testing this with threading 19 | """ 20 | # Create events so we know when the process has started and we can send an interrupt 21 | started_event = mp.Event() 22 | quit_event = mp.Event() 23 | value = mp.Value('i', 0) 24 | 25 | # Start process and wait until it starts 26 | p = mp.Process(target=self.delayed_process_job, args=(started_event, quit_event, value)) 27 | p.start() 28 | started_event.wait() 29 | 30 | # Send kill signal and wait for it to join 31 | os.kill(p.pid, signal.SIGINT) 32 | quit_event.set() 33 | p.join() 34 | 35 | # Verify expected value. 36 | self.assertEqual(value.value, 1) 37 | 38 | @staticmethod 39 | def delayed_process_job(started_event: mp.Event, quit_event: mp.Event, value: mp.Value): 40 | """ 41 | Should be affected by interrupt 42 | """ 43 | try: 44 | with DelayedKeyboardInterrupt(): 45 | started_event.set() 46 | quit_event.wait() 47 | value.value = 1 48 | except KeyboardInterrupt: 49 | pass 50 | else: 51 | value.value = 2 52 | 53 | 54 | @ConditionalDecorator(unittest.skip("Signals aren't fully supported on Windows"), RUNNING_WINDOWS) 55 | class DisabledKeyboardInterruptTest(unittest.TestCase): 56 | 57 | def test_disabled_keyboard_interrupt(self): 58 | """ 59 | The process should ignore a keyboard interrupt entirely, which means the expected value should be True 60 | """ 61 | # Create events so we know when the process has started and we can send an interrupt 62 | started_event = mp.Event() 63 | quit_event = mp.Event() 64 | value = mp.Value('b', False) 65 | p = mp.Process(target=self.disabled_process_job, args=(started_event, quit_event, value)) 66 | p.start() 67 | started_event.wait() 68 | os.kill(p.pid, signal.SIGINT) 69 | quit_event.set() 70 | p.join() 71 | 72 | # If everything worked the value should be set to True 73 | self.assertEqual(value.value, True) 74 | 75 | @staticmethod 76 | def disabled_process_job(started_event: mp.Event, quit_event: mp.Event, value: mp.Value): 77 | """ 78 | Should not be affected by interrupt 79 | """ 80 | with DisableKeyboardInterruptSignal(): 81 | started_event.set() 82 | quit_event.wait() 83 | value.value = True 84 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | 4 | class ConditionalDecorator: 5 | 6 | def __init__(self, decorator: Callable, condition: bool) -> None: 7 | """ 8 | Decorator which takes a decorator and a condition as input. Only when the condition is met the decorator is used 9 | 10 | :param decorator: Decorator 11 | :param condition: Condition (boolean) 12 | """ 13 | self.decorator = decorator 14 | self.condition = condition 15 | 16 | def __call__(self, func) -> Callable: 17 | """ 18 | Enables the conditional decorator 19 | 20 | :param func: Function to decorated 21 | :return: Decorated function if condition is met, otherwise just the function 22 | """ 23 | if self.condition: 24 | return self.decorator(func) 25 | else: 26 | return func 27 | --------------------------------------------------------------------------------