--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Sybren Jansen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.github/workflows/github-pages.yml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on: push
4 |
5 | jobs:
6 | build-n-publish:
7 | name: Build and publish documentation to Github
8 | runs-on: ubuntu-20.04
9 |
10 | steps:
11 | - uses: actions/checkout@v3
12 | - name: Set up Python
13 | uses: actions/setup-python@v4
14 | with:
15 | python-version: "3.6"
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install --upgrade pip
19 | pip install setuptools wheel twine rich
20 | pip install .[dashboard]
21 | pip install .[dill]
22 | pip install .[docs]
23 | - name: Build documentation
24 | run: |
25 | sphinx-versioning build -r master ./docs/ ./docs/_build/html/
26 | - name: Publish documentation to Github
27 | if: startsWith(github.ref, 'refs/tags')
28 | uses: peaceiris/actions-gh-pages@v3.8.0
29 | with:
30 | deploy_key: ${{ secrets.DEPLOY_GITHUB_PAGES_KEY }}
31 | external_repository: sybrenjansen/sybrenjansen.github.io
32 | publish_branch: main
33 | publish_dir: ./docs/_build/html/
34 | destination_dir: mpire
35 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/dill.rst:
--------------------------------------------------------------------------------
1 | .. _use_dill:
2 |
3 | Dill
4 | ====
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | For some functions or tasks it can be useful to not rely on pickle, but on some more powerful serialization backends
11 | like dill_. ``dill`` isn't installed by default. See :ref:`dilldep` for more information on installing the dependencies.
12 |
13 | One specific example where ``dill`` shines is when using start method ``spawn`` (the default on Windows) in combination
14 | with iPython or Jupyter notebooks. ``dill`` enables parallelizing more exotic objects like lambdas and functions defined
15 | in iPython and Jupyter notebooks. For all benefits of ``dill``, please refer to the `dill documentation`_.
16 |
17 | Once the dependencies have been installed, you can enable it using the ``use_dill`` flag:
18 |
19 | .. code-block:: python
20 |
21 | with WorkerPool(n_jobs=4, use_dill=True) as pool:
22 | ...
23 |
24 | .. note::
25 |
26 | When using ``dill`` it can potentially slow down processing. This is the cost of having a more reliable and
27 | powerful serialization backend.
28 |
29 | .. _dill: https://pypi.org/project/dill/
30 | .. _dill documentation: https://github.com/uqfoundation/dill
31 |
--------------------------------------------------------------------------------
/mpire/dashboard/connection_classes.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from multiprocessing import Event
3 | from multiprocessing.managers import BaseManager
4 | from multiprocessing.synchronize import Event as EventType
5 | from typing import Optional
6 |
7 |
8 | class DashboardStartedEvent:
9 |
10 | def __init__(self) -> None:
11 | self.event: Optional[EventType] = None
12 |
13 | def init(self) -> None:
14 | self.event = Event()
15 |
16 | def reset(self) -> None:
17 | self.event = None
18 |
19 | def set(self) -> None:
20 | if self.event is None:
21 | self.init()
22 | self.event.set()
23 |
24 | def is_set(self) -> bool:
25 | return self.event.is_set() if self.event is not None else False
26 |
27 | def wait(self, timeout: Optional[float] = None) -> bool:
28 | return self.event.wait(timeout) if self.event is not None else False
29 |
30 |
31 | class DashboardManager(BaseManager):
32 | pass
33 |
34 |
35 | @dataclass
36 | class DashboardManagerConnectionDetails:
37 | host: Optional[str] = None
38 | port: Optional[int] = None
39 |
40 | def clear(self) -> None:
41 | self.host = None
42 | self.port = None
43 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/cpu_pinning.rst:
--------------------------------------------------------------------------------
1 | CPU pinning
2 | ===========
3 |
4 | You can pin the child processes of :obj:`mpire.WorkerPool` to specific CPUs by using the ``cpu_ids`` parameter in the
5 | constructor:
6 |
7 | .. code-block:: python
8 |
9 | # Pin the two child processes to CPUs 2 and 3
10 | with WorkerPool(n_jobs=2, cpu_ids=[2, 3]) as pool:
11 | ...
12 |
13 | # Pin the child processes to CPUs 40-59
14 | with WorkerPool(n_jobs=20, cpu_ids=list(range(40, 60))) as pool:
15 | ...
16 |
17 | # All child processes have to share a single core:
18 | with WorkerPool(n_jobs=4, cpu_ids=[0]) as pool:
19 | ...
20 |
21 | # All child processes have to share multiple cores, namely 4-7:
22 | with WorkerPool(n_jobs=4, cpu_ids=[[4, 5, 6, 7]]) as pool:
23 | ...
24 |
25 | # Each child process can use two distinctive cores:
26 | with WorkerPool(n_jobs=4, cpu_ids=[[0, 1], [2, 3], [4, 5], [6, 7]]) as pool:
27 | ...
28 |
29 | CPU IDs have to be positive integers, not exceeding the number of CPUs available (which can be retrieved by using
30 | :meth:`mpire.cpu_count`). Use ``None`` to disable CPU pinning (which is the default).
31 |
32 | .. note::
33 |
34 | Pinning processes to CPU IDs doesn't work when using threading or when you're on macOS.
--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | Contribution guidelines
2 | =======================
3 |
4 | If you want to contribute to MPIRE, great! Please follow the steps below to ensure a smooth process:
5 |
6 | 1. Clone the project.
7 | 2. Create a new branch for your feature or bug fix. Give you branch a meaningful name.
8 | 3. Make your feature addition or bug fix.
9 | 4. Add tests for it and test it yourself. Make sure it both works for Unix and Windows based systems, or make sure to
10 | document why it doesn't work for one of the platforms.
11 | 5. Add documentation for it. Don't forget about the changelog:
12 |
13 | - Reference the issue number from GitHub in the changelog, if applicable (see current changelog for examples).
14 | - Don't mention a date or a version number here, but use ``Unreleased`` instead.
15 |
16 | 6. Commit with a meaningful commit message (e.g. the changelog).
17 | 7. Open a pull request.
18 | 8. Resolve any issues or comments by the reviewer.
19 | 9. Merge PR by squashing all your individual commits.
20 |
21 | Making a release
22 | ----------------
23 |
24 | A release is only made by the project maintainer. The following steps are required:
25 |
26 | 1. Update the changelog with the release date and version number. Version numbers follow the `Semantic Versioning`_
27 | guidelines
28 | 2. Update the version number in ``setup.py`` and ``docs/conf.py``.
29 | 3. Commit and push the changes.
30 | 4. Make sure the tests pass on GitHub Actions.
31 | 5. Create a tag for the release by using ``git tag -a vX.Y.Z -m "vX.Y.Z"``.
32 | 6. Push the tag to GitHub by using ``git push origin vX.Y.Z``.
33 |
34 | .. _Semantic Versioning: https://semver.org/
35 |
--------------------------------------------------------------------------------
/mpire/signal.py:
--------------------------------------------------------------------------------
1 | from inspect import Traceback
2 | from signal import getsignal, SIG_IGN, SIGINT, signal as signal_, Signals
3 | from threading import current_thread, main_thread
4 | from types import FrameType
5 | from typing import Type
6 |
7 |
8 | class DelayedKeyboardInterrupt:
9 |
10 | def __init__(self) -> None:
11 | self.signal_received = None
12 |
13 | def __enter__(self) -> None:
14 | # When we're in a thread we can't use signal handling
15 | if current_thread() == main_thread():
16 | self.signal_received = False
17 | self.old_handler = signal_(SIGINT, self.handler)
18 |
19 | def handler(self, sig: Signals, frame: FrameType) -> None:
20 | self.signal_received = (sig, frame)
21 |
22 | def __exit__(self, exc_type: Type, exc_val: Exception, exc_tb: Traceback) -> None:
23 | if current_thread() == main_thread():
24 | signal_(SIGINT, self.old_handler)
25 | if self.signal_received:
26 | self.old_handler(*self.signal_received)
27 |
28 |
29 | class DisableKeyboardInterruptSignal:
30 |
31 | def __enter__(self) -> None:
32 | if current_thread() == main_thread():
33 | # Prevent signal from propagating to child process
34 | self._handler = getsignal(SIGINT)
35 | ignore_keyboard_interrupt()
36 |
37 | def __exit__(self, exc_type: Type, exc_val: Exception, exc_tb: Traceback) -> None:
38 | if current_thread() == main_thread():
39 | # Restore signal
40 | signal_(SIGINT, self._handler)
41 |
42 |
43 | def ignore_keyboard_interrupt():
44 | signal_(SIGINT, SIG_IGN)
45 |
--------------------------------------------------------------------------------
/bin/mpire-dashboard:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import signal
4 | from typing import Sequence
5 |
6 | from mpire.dashboard import start_dashboard
7 |
8 |
9 | def get_port_range() -> Sequence:
10 | """
11 | :return: port range
12 | """
13 | def _port_range(range_str) -> Sequence:
14 | n1, n2 = map(int, range_str.split('-'))
15 | if len(range(n1, n2)) < 2:
16 | raise ValueError
17 | return range(n1, n2)
18 |
19 | parser = argparse.ArgumentParser(description='MPIRE Dashboard')
20 | parser.add_argument('--port-range', dest='port_range', required=False, default=range(8080, 8100), type=_port_range,
21 | help='Port range for starting a dashboard. The range should accommodate at least two ports: '
22 | 'one for the webserver and one for the Python Manager server. Example: 6060-6080 will be '
23 | 'converted to `range(6060, 6080)`. Default: `range(8080, 8100)`.')
24 | return parser.parse_args().port_range
25 |
26 |
27 | if __name__ == '__main__':
28 | # Obtain port range
29 | port_range = get_port_range()
30 |
31 | # Start a dashboard
32 | print("Starting MPIRE dashboard...")
33 | dashboard_details = start_dashboard(port_range)
34 |
35 | # Print some details on how to connect
36 | print()
37 | print("MPIRE dashboard started on http://localhost:{}".format(dashboard_details['dashboard_port_nr']))
38 | print("Server is listening on {}:{}".format(dashboard_details['manager_host'],
39 | dashboard_details['manager_port_nr']))
40 | print("-" * 50)
41 | signal.pause()
42 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/order_tasks.rst:
--------------------------------------------------------------------------------
1 | Order tasks
2 | ===========
3 |
4 | .. contents:: Contents
5 | :depth: 2
6 | :local:
7 |
8 | In some settings it can be useful to supply the tasks to workers in a round-robin fashion. This means worker 0 will get
9 | task 0, worker 1 will get task 1, etc. After each worker got a task, we start with worker 0 again instead of picking the
10 | worker that has most recently completed a task.
11 |
12 | When the chunk size is larger than 1, the tasks are distributed to the workers in order, but in chunks. I.e., when
13 | ``chunk_size=3`` tasks 0, 1, and 2 will be assigned to worker 0, tasks 3, 4, and 5 to worker 1, and so on.
14 |
15 | When ``keep_alive`` is set to ``True`` and the second ``map`` call is made, MPIRE resets the worker order and starts at
16 | worker 0 again.
17 |
18 | .. warning::
19 |
20 | When tasks vary in execution time, the default task scheduler makes sure each worker is busy for approximately the
21 | same amount of time. This can mean that some workers execute more tasks than others. When using ``order_tasks`` this
22 | is no longer the case and therefore the total execution time is likely to be higher.
23 |
24 | You can enable/disable task ordering by setting the ``order_tasks`` flag:
25 |
26 | .. code-block:: python
27 |
28 | def task(x):
29 | pass
30 |
31 | with WorkerPool(n_jobs=4, order_tasks=True) as pool:
32 | pool.map(task, range(10))
33 |
34 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of
35 | :meth:`mpire.WorkerPool.set_order_tasks`:
36 |
37 | .. code-block:: python
38 |
39 | with WorkerPool(n_jobs=4) as pool:
40 | pool.set_order_tasks()
41 | pool.map(task, range(10))
42 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Build
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ${{ matrix.os }}
16 | strategy:
17 | matrix:
18 | os: [ubuntu-20.04, windows-latest, macos-latest]
19 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
20 |
21 | steps:
22 | - uses: actions/checkout@v3
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install flake8 pytest
31 | pip install .[dashboard]
32 | pip install .[dill]
33 | pip install .[testing]
34 | - name: Set ulimit for macOS
35 | if: matrix.os == 'macos-latest'
36 | run: |
37 | ulimit -a
38 | ulimit -n 1024
39 | - name: Lint with flake8
40 | run: |
41 | # stop the build if there are Python syntax errors or undefined names
42 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
43 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
44 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
45 | - name: Test with pytest
46 | timeout-minutes: 30
47 | run: |
48 | pytest -v -o log_cli=true -s
49 |
--------------------------------------------------------------------------------
/mpire/context.py:
--------------------------------------------------------------------------------
1 | import multiprocessing as mp
2 | try:
3 | import multiprocess as mp_dill
4 | import multiprocess.managers # Needed in utils.py
5 | except ImportError:
6 | mp_dill = None
7 | import platform
8 | import threading
9 |
10 | # Check if fork is available as start method. It's not available on Windows machines
11 | try:
12 | mp.get_context('fork')
13 | FORK_AVAILABLE = True
14 | except ValueError:
15 | FORK_AVAILABLE = False
16 |
17 | # Check if we're running on Windows or MacOS
18 | RUNNING_WINDOWS = platform.system() == "Windows"
19 | RUNNING_MACOS = platform.system() == "Darwin"
20 |
21 |
22 | # Threading context so we can use threading as backend as well
23 | class ThreadingContext:
24 |
25 | Barrier = threading.Barrier
26 | Condition = threading.Condition
27 | Event = threading.Event
28 | Lock = threading.Lock
29 | RLock = threading.RLock
30 | Thread = threading.Thread
31 |
32 | # threading doesn't have Array and JoinableQueue, so we take it from multiprocessing. Both are thread-safe. We need
33 | # the Process class for the MPIRE insights SyncManager instance.
34 | Array = mp.Array
35 | JoinableQueue = mp.JoinableQueue
36 | Process = mp.Process
37 | Value = mp.Value
38 |
39 |
40 | MP_CONTEXTS = {'mp': {'fork': mp.get_context('fork') if FORK_AVAILABLE else None,
41 | 'forkserver': mp.get_context('forkserver') if FORK_AVAILABLE else None,
42 | 'spawn': mp.get_context('spawn')},
43 | 'threading': ThreadingContext}
44 | if mp_dill is not None:
45 | MP_CONTEXTS['mp_dill'] = {'fork': mp_dill.get_context('fork') if FORK_AVAILABLE else None,
46 | 'forkserver': mp_dill.get_context('forkserver') if FORK_AVAILABLE else None,
47 | 'spawn': mp_dill.get_context('spawn')}
48 |
49 | DEFAULT_START_METHOD = 'fork' if FORK_AVAILABLE else 'spawn'
50 |
--------------------------------------------------------------------------------
/docs/usage/map/max_tasks_active.rst:
--------------------------------------------------------------------------------
1 | .. _max_active_tasks:
2 |
3 | Maximum number of active tasks
4 | ==============================
5 |
6 | When you have tasks that take up a lot of memory you can do a few things:
7 |
8 | - Limit the number of jobs (i.e., the number of tasks currently being available to the workers, tasks that are in the
9 | queue ready to be processed).
10 | - Limit the number of active tasks
11 |
12 | The first option is the most obvious one to save memory when the processes themselves use up much memory. The second is
13 | convenient when the argument list takes up too much memory. For example, suppose you want to kick off an enormous amount
14 | of jobs (let's say a billion) of which the arguments take up 1 KB per task (e.g., large strings), then that task queue
15 | would take up ~1 TB of memory!
16 |
17 | In such cases, a good rule of thumb would be to have twice the amount of active chunks of tasks than there are jobs.
18 | This means that when all workers complete their task at the same time each would directly be able to continue with
19 | another task. When workers take on their new tasks the generator of tasks is iterated to the point that again there
20 | would be twice the amount of active chunks of tasks.
21 |
22 | In MPIRE, the maximum number of active tasks by default is set to ``n_jobs * chunk_size * 2``, so you don't have to
23 | tweak it for memory optimization. If, for whatever reason, you want to change this behavior, you can do so by setting
24 | the ``max_active_tasks`` parameter:
25 |
26 | .. code-block:: python
27 |
28 | with WorkerPool(n_jobs=4) as pool:
29 | results = pool.map(task, range(int(1e300)), iterable_len=int(1e300),
30 | chunk_size=int(1e5), max_tasks_active=4 * int(1e5))
31 |
32 | .. note::
33 |
34 | Setting the ``max_tasks_active`` parameter to a value lower than ``n_jobs * chunk_size`` can result in some workers
35 | not being able to do anything.
36 |
--------------------------------------------------------------------------------
/mpire/dashboard/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | MPIRE
10 |
11 |
12 |
13 |
14 |
15 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | {% include 'menu_top_right.html' %}
27 |
28 |
MPIRE
29 | [{{ username }}@{{ hostname }}]
30 |
31 |
32 |
33 |
34 |
35 |
#
36 |
Tasks
37 |
Progress
38 |
Duration
39 |
Remaining
40 |
Started
41 |
Finished / ETA
42 |
43 |
44 |
45 |
46 |
47 |
48 | {% include 'mpire.html' %}
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/worker_id.rst:
--------------------------------------------------------------------------------
1 | .. _workerID:
2 |
3 | Accessing the worker ID
4 | =======================
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | Each worker in MPIRE is given an integer ID to distinguish them. Worker #1 will have ID ``0``, #2 will have ID ``1``,
11 | etc. Sometimes it can be useful to have access to this ID.
12 |
13 | By default, the worker ID is not passed on. You can enable/disable this by setting the ``pass_worker_id`` flag:
14 |
15 | .. code-block:: python
16 |
17 | def task(worker_id, x):
18 | pass
19 |
20 | with WorkerPool(n_jobs=4, pass_worker_id=True) as pool:
21 | pool.map(task, range(10))
22 |
23 | .. important::
24 |
25 | The worker ID will always be the first argument passed on to the provided function.
26 |
27 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of
28 | :meth:`mpire.WorkerPool.pass_on_worker_id`:
29 |
30 | .. code-block:: python
31 |
32 | with WorkerPool(n_jobs=4) as pool:
33 | pool.pass_on_worker_id()
34 | pool.map(task, range(10))
35 |
36 | Elaborate example
37 | -----------------
38 |
39 | Here's a more elaborate example of using the worker ID together with a shared array, where each worker can only access
40 | the element corresponding to its worker ID, making the use of locking unnecessary:
41 |
42 | .. code-block:: python
43 |
44 | def square_sum(worker_id, shared_objects, x):
45 | # Even though the shared objects is a single container, we 'unpack' it anyway
46 | results_container = shared_objects
47 |
48 | # Square and sum
49 | results_container[worker_id] += x * x
50 |
51 | # Use a shared array of size equal to the number of jobs to store the results
52 | results_container = Array('f', 4, lock=False)
53 |
54 | with WorkerPool(n_jobs=4, shared_objects=results_container, pass_worker_id=True) as pool:
55 | # Square the results and store them in the results container
56 | pool.map_unordered(square_sum, range(100))
57 |
--------------------------------------------------------------------------------
/docs/usage/map/timeouts.rst:
--------------------------------------------------------------------------------
1 | .. _timeouts:
2 |
3 | Timeouts
4 | ========
5 |
6 | Timeouts can be set separately for the target, ``worker_init`` and ``worker_exit`` functions. When a timeout has been
7 | set and reached, it will throw a ``TimeoutError``:
8 |
9 | .. code-block:: python
10 |
11 | # Will raise TimeoutError, provided that the target function takes longer
12 | # than half a second to complete
13 | with WorkerPool(n_jobs=5) as pool:
14 | pool.map(time_consuming_function, range(10), task_timeout=0.5)
15 |
16 | # Will raise TimeoutError, provided that the worker_init function takes longer
17 | # than 3 seconds to complete or the worker_exit function takes longer than
18 | # 150.5 seconds to complete
19 | with WorkerPool(n_jobs=5) as pool:
20 | pool.map(time_consuming_function, range(10), worker_init=init, worker_exit=exit_,
21 | worker_init_timeout=3.0, worker_exit_timeout=150.5)
22 |
23 | Use ``None`` (=default) to disable timeouts.
24 |
25 | ``imap`` and ``imap_unordered``
26 | -------------------------------
27 |
28 | When you're using one of the lazy map functions (e.g., ``imap`` or ``imap_unordered``) then an exception will only be
29 | raised when the function is actually running. E.g. when executing:
30 |
31 | .. code-block:: python
32 |
33 | with WorkerPool(n_jobs=5) as pool:
34 | results = pool.imap(time_consuming_function, range(10), task_timeout=0.5)
35 |
36 | this will never raise. This is because ``imap`` and ``imap_unordered`` return a generator object, which stops executing
37 | until it gets the trigger to go beyond the ``yield`` statement. When iterating through the results, it will raise as
38 | expected:
39 |
40 | .. code-block:: python
41 |
42 | with WorkerPool(n_jobs=5) as pool:
43 | results = pool.imap(time_consuming_function, range(10), task_timeout=0.5)
44 | for result in results:
45 | ...
46 |
47 | Threading
48 | ---------
49 |
50 | When using ``threading`` as start method MPIRE won't be able to interrupt certain functions, like ``time.sleep``.
--------------------------------------------------------------------------------
/mpire/exception.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any, Dict, Tuple
3 |
4 | from pygments import highlight
5 | from pygments.lexers import Python3TracebackLexer
6 | from pygments.formatters import TerminalFormatter
7 |
8 | ANSI_ESCAPE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
9 |
10 |
11 | class StopWorker(Exception):
12 | """ Exception used to kill a worker """
13 | pass
14 |
15 |
16 | class InterruptWorker(Exception):
17 | """ Exception used to interrupt a worker """
18 | pass
19 |
20 |
21 | class CannotPickleExceptionError(Exception):
22 | """ Exception used when Pickle has trouble pickling the actual Exception """
23 | pass
24 |
25 |
26 | def highlight_traceback(traceback_str: str) -> str:
27 | """
28 | Highlight a traceback string in a terminal-friendly way
29 |
30 | :param traceback_str: The traceback string to highlight
31 | :return: The highlighted traceback string
32 | """
33 | return highlight(traceback_str, Python3TracebackLexer(), TerminalFormatter())
34 |
35 |
36 | def remove_highlighting(traceback_str: str) -> str:
37 | """
38 | Remove the highlighting from a traceback string
39 |
40 | Taken from https://stackoverflow.com/a/14693789/4486236.
41 |
42 | :param traceback_str: The traceback string to remove the highlighting from
43 | :return: The traceback string without highlighting
44 | """
45 | return ANSI_ESCAPE.sub('', traceback_str)
46 |
47 |
48 | def populate_exception(err_type: type, err_args: Any, err_state: Dict,
49 | traceback_str: str) -> Tuple[Exception, Exception]:
50 | """
51 | Populate an exception with the given arguments
52 |
53 | :param err_type: The type of the exception
54 | :param err_args: The arguments of the exception
55 | :param err_state: The state of the exception
56 | :param traceback_str: The traceback string of the exception
57 | :return: A tuple of the exception and the original exception
58 | """
59 | err = err_type.__new__(err_type)
60 | err.args = err_args
61 | err.__dict__.update(err_state)
62 | traceback_err = Exception(highlight_traceback(traceback_str))
63 |
64 | return err, traceback_err
65 |
--------------------------------------------------------------------------------
/docs/usage/map/worker_init_exit.rst:
--------------------------------------------------------------------------------
1 | .. _worker_init_exit:
2 |
3 | Worker init and exit
4 | ====================
5 |
6 | When you want to initialize a worker you can make use of the ``worker_init`` parameter of any ``map`` function. This
7 | will call the initialization function only once per worker. Similarly, if you need to clean up the worker at the end of
8 | its lifecycle you can use the ``worker_exit`` parameter. Additionally, the exit function can return anything you like,
9 | which can be collected using :meth:`mpire.WorkerPool.get_exit_results` after the workers are done.
10 |
11 | Both init and exit functions receive the worker ID, shared objects, and worker state in the same way as the task
12 | function does, given they're enabled.
13 |
14 | For example:
15 |
16 | .. code-block:: python
17 |
18 | def init_func(worker_state):
19 | # Initialize a counter for each worker
20 | worker_state['count_even'] = 0
21 |
22 | def square_and_count_even(worker_state, x):
23 | # Count number of even numbers and return the square
24 | if x % 2 == 0:
25 | worker_state['count_even'] += 1
26 | return x * x
27 |
28 | def exit_func(worker_state):
29 | # Return the counter
30 | return worker_state['count_even']
31 |
32 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool:
33 | pool.map(square_and_count_even, range(100), worker_init=init_func, worker_exit=exit_func)
34 | print(pool.get_exit_results()) # Output, e.g.: [13, 13, 12, 12]
35 | print(sum(pool.get_exit_results())) # Output: 50
36 |
37 | .. important::
38 |
39 | When the ``worker_lifespan`` option is used to restart workers during execution, the exit function will be called
40 | for the worker that's shutting down and the init function will be called again for the new worker. Therefore, the
41 | number of elements in the list that's returned from :meth:`mpire.WorkerPool.get_exit_results` does not always equal
42 | ``n_jobs``.
43 |
44 | .. important::
45 |
46 | When ``keep_alive`` is enabled the workers won't be terminated after a ``map`` call. This means the exit function
47 | won't be called until it's time for cleaning up the entire pool. You will have to explicitly call
48 | :meth:`mpire.WorkerPool.stop_and_join` to receive the exit results.
49 |
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .strike {
2 | text-decoration: line-through;
3 | }
4 |
5 | /* From theme.css, but .section has been replaced by section to work around the new change of
to
6 | , which for some reason happened
7 | */
8 | .rst-content section ul {
9 | list-style:disc;
10 | line-height:24px;
11 | margin-bottom:24px
12 | }
13 | .rst-content section ul li {
14 | list-style:disc;
15 | margin-left:24px
16 | }
17 | .rst-content section ul li p:last-child,
18 | .rst-content section ul li ul {
19 | margin-top:0;
20 | margin-bottom:0
21 | }
22 | .rst-content section ul li li {
23 | list-style:circle
24 | }
25 | .rst-content section ul li li li {
26 | list-style:square
27 | }
28 | .rst-content section ul li ol li {
29 | list-style:decimal
30 | }
31 | .rst-content section ol {
32 | list-style:decimal;
33 | line-height:24px;
34 | margin-bottom:24px
35 | }
36 | .rst-content section ol li {
37 | list-style:decimal;
38 | margin-left:24px
39 | }
40 | .rst-content section ol li p:last-child,
41 | .rst-content section ol li ul {
42 | margin-bottom:0
43 | }
44 | .rst-content section ol li ul li {
45 | list-style:disc
46 | }
47 | .rst-content section ol.loweralpha,
48 | .rst-content section ol.loweralpha>li {
49 | list-style:lower-alpha
50 | }
51 | .rst-content section ol.upperalpha,
52 | .rst-content section ol.upperalpha>li {
53 | list-style:upper-alpha
54 | }
55 | .rst-content section ol li>*,
56 | .rst-content section ul li>* {
57 | margin-top:12px;
58 | margin-bottom:12px
59 | }
60 | .rst-content section ol li>:first-child,
61 | .rst-content section ul li>:first-child {
62 | margin-top:0
63 | }
64 | .rst-content section ol li>p,
65 | .rst-content section ol li>p:last-child,
66 | .rst-content section ul li>p,
67 | .rst-content section ul li>p:last-child {
68 | margin-bottom:12px
69 | }
70 | .rst-content section ol li>p:only-child,
71 | .rst-content section ol li>p:only-child:last-child,
72 | .rst-content section ul li>p:only-child,
73 | .rst-content section ul li>p:only-child:last-child {
74 | margin-bottom:0
75 | }
76 | .rst-content section ol li>ol,
77 | .rst-content section ol li>ul,
78 | .rst-content section ul li>ol,
79 | .rst-content section ul li>ul {
80 | margin-bottom:12px
81 | }
82 | .rst-content section ol.simple li>*,
83 | .rst-content section ol.simple li ol,
84 | .rst-content section ol.simple li ul,
85 | .rst-content section ul.simple li>*,
86 | .rst-content section ul.simple li ol,
87 | .rst-content section ul.simple li ul {
88 | margin-top:0;
89 | margin-bottom:0
90 | }
--------------------------------------------------------------------------------
/mpire/dashboard/connection_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Tuple
2 |
3 | from mpire.dashboard.connection_classes import DashboardManagerConnectionDetails, DashboardStartedEvent
4 |
5 | # If a user has not installed the dashboard dependencies than the imports below will fail
6 | try:
7 | from mpire.dashboard import connect_to_dashboard
8 | from mpire.dashboard.dashboard import DASHBOARD_STARTED_EVENT
9 | from mpire.dashboard.manager import DASHBOARD_MANAGER_CONNECTION_DETAILS
10 | except (ImportError, ModuleNotFoundError):
11 | DASHBOARD_MANAGER_CONNECTION_DETAILS = DashboardManagerConnectionDetails()
12 | DASHBOARD_STARTED_EVENT = DashboardStartedEvent()
13 |
14 | def connect_to_dashboard(*_):
15 | pass
16 |
17 | DashboardConnectionDetails = Tuple[Optional[str], Optional[int], bool]
18 |
19 |
20 | def get_dashboard_connection_details() -> DashboardConnectionDetails:
21 | """
22 | Obtains the connection details of a dasbhoard. These details are needed to be passed on to child process when the
23 | start method is either forkserver or spawn.
24 |
25 | :return: Dashboard manager host, port_nr and whether a dashboard is started/connected
26 | """
27 | return (DASHBOARD_MANAGER_CONNECTION_DETAILS.host, DASHBOARD_MANAGER_CONNECTION_DETAILS.port,
28 | DASHBOARD_STARTED_EVENT.is_set())
29 |
30 |
31 | def set_dashboard_connection(dashboard_connection_details: DashboardConnectionDetails,
32 | auto_connect: bool = True) -> None:
33 | """
34 | Sets the dashboard connection details and connects to an existing dashboard if needed.
35 |
36 | :param dashboard_connection_details: Dashboard manager host, port_nr and whether a dashboard is started/connected
37 | :param auto_connect: Whether to automatically connect to a server when the dashboard_started event is set
38 | """
39 | global DASHBOARD_MANAGER_CONNECTION_DETAILS
40 |
41 | dashboard_manager_host, dashboard_manager_port_nr, dashboard_started = dashboard_connection_details
42 | if (dashboard_manager_host is not None and dashboard_manager_port_nr is not None and
43 | not DASHBOARD_STARTED_EVENT.is_set()):
44 | if dashboard_started and auto_connect:
45 | connect_to_dashboard(dashboard_manager_port_nr, dashboard_manager_host)
46 | else:
47 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = dashboard_manager_host
48 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = dashboard_manager_port_nr
49 | if dashboard_started:
50 | DASHBOARD_STARTED_EVENT.set()
51 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 |
4 | def read_description():
5 | with open('README.rst') as file:
6 | return file.read()
7 |
8 |
9 | if __name__ == '__main__':
10 | setup(
11 | name='mpire',
12 | version='2.10.2',
13 | author='Sybren Jansen',
14 | description='A Python package for easy multiprocessing, but faster than multiprocessing',
15 | long_description=read_description(),
16 | url='https://github.com/sybrenjansen/mpire',
17 | license='MIT',
18 | packages=find_packages(exclude=['*tests*']),
19 | scripts=['bin/mpire-dashboard'],
20 | install_requires=['importlib_resources; python_version<"3.9"',
21 | 'pywin32>=301; platform_system=="Windows"',
22 | 'pygments>=2.0',
23 | 'tqdm>=4.27'],
24 | include_package_data=True,
25 | extras_require={
26 | 'dashboard': ['flask'],
27 | 'dill': ['multiprocess; python_version<"3.11"',
28 | 'multiprocess>=0.70.15; python_version>="3.11"'],
29 | 'docs': ['docutils==0.17.1',
30 | 'sphinx==3.2.1',
31 | 'sphinx-rtd-theme==0.5.0',
32 | 'sphinx-autodoc-typehints==1.11.0',
33 | 'sphinxcontrib-images==0.9.2',
34 | 'sphinx-versions==1.0.1'],
35 | 'testing': ['ipywidgets',
36 | 'multiprocess; python_version<"3.11"',
37 | 'multiprocess>=0.70.15; python_version>="3.11"',
38 | 'numpy',
39 | 'pywin32>=301; platform_system=="Windows"',
40 | 'rich'],
41 | },
42 | test_suite='tests',
43 | tests_require=['multiprocess', 'numpy'],
44 | classifiers=[
45 | # Development status
46 | 'Development Status :: 5 - Production/Stable',
47 |
48 | # Supported Python versions
49 | 'Programming Language :: Python :: 3.8',
50 | 'Programming Language :: Python :: 3.9',
51 | 'Programming Language :: Python :: 3.10',
52 | 'Programming Language :: Python :: 3.11',
53 | 'Programming Language :: Python :: 3.12',
54 |
55 | # License
56 | 'License :: OSI Approved :: MIT License',
57 |
58 | # Topic
59 | 'Topic :: Software Development',
60 | 'Topic :: Software Development :: Libraries',
61 | 'Topic :: Software Development :: Libraries :: Python Modules'
62 | ]
63 | )
64 |
--------------------------------------------------------------------------------
/docs/usage/map/task_chunking.rst:
--------------------------------------------------------------------------------
1 | .. _Task chunking:
2 |
3 | Task chunking
4 | =============
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | By default, MPIRE chunks the given tasks in to ``64 * n_jobs`` chunks. Each worker is given one chunk of tasks at a time
11 | before returning its results. This usually makes processing faster when you have rather small tasks (computation wise)
12 | and results are pickled/unpickled when they are send to a worker or main process. Chunking the tasks and results ensures
13 | that each process has to pickle/unpickle less often.
14 |
15 | However, to determine the number of tasks in the argument list the iterable should implement the ``__len__`` method,
16 | which is available in default containers like ``list`` or ``tuple``, but isn't available in most generator objects
17 | (the ``range`` object is one of the exceptions). To allow working with generators each ``map`` function has the option
18 | to pass the iterable length:
19 |
20 | .. code-block:: python
21 |
22 | with WorkerPool(n_jobs=4) as pool:
23 | # 1. This will issue a warning and sets the chunk size to 1
24 | results = pool.map(square, ((x,) for x in range(1000)))
25 |
26 | # 2. This will issue a warning as well and sets the chunk size to 1
27 | results = pool.map(square, ((x,) for x in range(1000)), n_splits=4)
28 |
29 | # 3. Square the numbers using a generator using a specific number of splits
30 | results = pool.map(square, ((x,) for x in range(1000)), iterable_len=1000, n_splits=4)
31 |
32 | # 4. Square the numbers using a generator using automatic chunking
33 | results = pool.map(square, ((x,) for x in range(1000)), iterable_len=1000)
34 |
35 | # 5. Square the numbers using a generator using a fixed chunk size
36 | results = pool.map(square, ((x,) for x in range(1000)), chunk_size=4)
37 |
38 | In the first two examples the function call will issue a warning because MPIRE doesn't know how large the chunks should
39 | be as the total number of tasks is unknown, therefore it will fall back to a chunk size of 1. The third example should
40 | work as expected where 4 chunks are used. The fourth example uses 256 chunks (the default 64 times the number of
41 | workers). The last example uses a fixed chunk size of four, so MPIRE doesn't need to know the iterable length.
42 |
43 | You can also call the chunk function manually:
44 |
45 | .. code-block:: python
46 |
47 | from mpire.utils import chunk_tasks
48 |
49 | # Convert to list because chunk_tasks returns a generator
50 | print(list(chunk_tasks(range(10), n_splits=3)))
51 | print(list(chunk_tasks(range(10), chunk_size=2.5)))
52 | print(list(chunk_tasks((x for x in range(10)), iterable_len=10, n_splits=6)))
53 |
54 | will output:
55 |
56 | .. code-block:: python
57 |
58 | [(0, 1, 2, 3), (4, 5, 6), (7, 8, 9)]
59 | [(0, 1, 2), (3, 4), (5, 6, 7), (8, 9)]
60 | [(0, 1), (2, 3), (4,), (5, 6), (7, 8), (9,)]
61 |
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | :ref:`MPIRE ` builds are distributed through PyPi_.
5 |
6 | .. _PyPi: https://pypi.org/
7 |
8 | MPIRE can be installed through pip:
9 |
10 | .. code-block:: bash
11 |
12 | pip install mpire
13 |
14 | and is available through conda-forge:
15 |
16 | .. code-block:: bash
17 |
18 | conda install -c conda-forge mpire
19 |
20 |
21 | Dependencies
22 | ------------
23 |
24 | - Python >= 3.8
25 |
26 | Python packages (installed automatically when installing MPIRE):
27 |
28 | - tqdm
29 | - pygments
30 | - pywin32 (Windows only)
31 | - importlib_resources (Python < 3.9 only)
32 |
33 | .. note::
34 |
35 | When using MPIRE on Windows with conda, you might need to install ``pywin32`` using ``conda install pywin32`` when
36 | encountering a ``DLL failed to load`` error.
37 |
38 | .. _dilldep:
39 |
40 | Dill
41 | ~~~~
42 |
43 | For some functions or tasks it can be useful to not rely on pickle, but on some more powerful serialization backend,
44 | like dill_. ``dill`` isn't installed by default as it has a BSD license, while MPIRE has an MIT license. If you want
45 | to use it, the license of MPIRE will change to a BSD license as well, as required by the original BSD license. See the
46 | `BSD license of multiprocess`_ for more information.
47 |
48 | You can enable ``dill`` by executing:
49 |
50 | .. code-block:: bash
51 |
52 | pip install mpire[dill]
53 |
54 | This will install multiprocess_, which uses ``dill`` under the hood. You can enable the use of ``dill`` by setting
55 | ``use_dill=True`` in the :obj:`mpire.WorkerPool` constructor.
56 |
57 | .. _dill: https://pypi.org/project/dill/
58 | .. _multiprocess: https://github.com/uqfoundation/multiprocess
59 | .. _BSD license of multiprocess: https://github.com/uqfoundation/multiprocess/blob/master/LICENSE
60 |
61 |
62 | .. _richdep:
63 |
64 | Rich progress bars
65 | ~~~~~~~~~~~~~~~~~~
66 |
67 | If you want to use rich_ progress bars, you have to install the dependencies for it manually:
68 |
69 | .. code-block:: bash
70 |
71 | pip install rich
72 |
73 |
74 | .. _rich: https://github.com/Textualize/rich
75 |
76 |
77 | .. _dashboarddep:
78 |
79 | Dashboard
80 | ~~~~~~~~~
81 |
82 | Optionally, you can install the dependencies for the MPIRE dashboard, which depends on Flask_. Similarly as with
83 | ``dill``, ``Flask`` has a BSD-license. Installing these dependencies will change the license of MPIRE to BSD as well.
84 | See the `BSD license of Flask`_ for more information.
85 |
86 | The dashboard allows you to see progress information from a browser. This is convenient when running scripts in a
87 | notebook or screen, or want to share the progress information with others. Install the appropriate dependencies to
88 | enable this:
89 |
90 | .. code-block:: bash
91 |
92 | pip install mpire[dashboard]
93 |
94 | .. _Flask: https://flask.palletsprojects.com/en/1.1.x/
95 | .. _BSD license of Flask: https://github.com/pallets/flask/blob/main/LICENSE.rst
96 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to the MPIRE documentation!
2 | ===================================
3 |
4 | MPIRE, short for MultiProcessing Is Really Easy, is a Python package for multiprocessing. MPIRE is faster in
5 | most scenarios, packs more features, and is generally more user-friendly than the default multiprocessing package. It
6 | combines the convenient map like functions of ``multiprocessing.Pool`` with the benefits of using copy-on-write shared
7 | objects of ``multiprocessing.Process``, together with easy-to-use worker state, worker insights, worker init and exit
8 | functions, timeouts, and progress bar functionality.
9 |
10 | Features
11 | --------
12 |
13 | - Faster execution than other multiprocessing libraries. See benchmarks_.
14 | - Intuitive, Pythonic syntax
15 | - Multiprocessing with ``map``/``map_unordered``/``imap``/``imap_unordered``/``apply``/``apply_async`` functions
16 | - Easy use of copy-on-write shared objects with a pool of workers (copy-on-write is only available for start method
17 | ``fork``, so it's not supported on Windows)
18 | - Each worker can have its own state and with convenient worker init and exit functionality this state can be easily
19 | manipulated (e.g., to load a memory-intensive model only once for each worker without the need of sending it through a
20 | queue)
21 | - Progress bar support using tqdm_ (``rich`` and notebook widgets are supported)
22 | - Progress dashboard support
23 | - Worker insights to provide insight into your multiprocessing efficiency
24 | - Graceful and user-friendly exception handling
25 | - Timeouts, including for worker init and exit functions
26 | - Automatic task chunking for all available map functions to speed up processing of small task queues (including numpy
27 | arrays)
28 | - Adjustable maximum number of active tasks to avoid memory problems
29 | - Automatic restarting of workers after a specified number of tasks to reduce memory footprint
30 | - Nested pool of workers are allowed when setting the ``daemon`` option
31 | - Child processes can be pinned to specific or a range of CPUs
32 | - Optionally utilizes dill_ as serialization backend through multiprocess_, enabling parallelizing more exotic objects,
33 | lambdas, and functions in iPython and Jupyter notebooks.
34 |
35 | MPIRE has been tested on Linux, macOS, and Windows. There are a few minor known caveats for Windows and macOS users,
36 | which can be found at :ref:`troubleshooting_windows`.
37 |
38 | .. _benchmarks: https://towardsdatascience.com/mpire-for-python-multiprocessing-is-really-easy-d2ae7999a3e9
39 | .. _dill: https://pypi.org/project/dill/
40 | .. _multiprocess: https://github.com/uqfoundation/multiprocess
41 | .. _tqdm: https://tqdm.github.io/
42 |
43 | Contents
44 | --------
45 |
46 | .. toctree::
47 | :hidden:
48 |
49 | self
50 |
51 | .. toctree::
52 | :maxdepth: 3
53 | :titlesonly:
54 |
55 | install
56 | getting_started
57 | usage/index
58 | troubleshooting
59 | reference/index
60 | contributing
61 | changelog
62 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/worker_state.rst:
--------------------------------------------------------------------------------
1 | .. _worker_state:
2 |
3 | Worker state
4 | ============
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | If you want to let each worker have its own state you can use the ``use_worker_state`` flag:
11 |
12 | .. code-block:: python
13 |
14 | def task(worker_state, x):
15 | if "local_sum" not in worker_state:
16 | worker_state["local_sum"] = 0
17 | worker_state["local_sum"] += x
18 |
19 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool:
20 | results = pool.map(task, range(100))
21 |
22 | .. important::
23 |
24 | The worker state is passed on as the third argument, after the worker ID and shared objects (when enabled), to the
25 | provided function.
26 |
27 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of
28 | :meth:`mpire.WorkerPool.set_use_worker_state`:
29 |
30 | .. code-block:: python
31 |
32 | with WorkerPool(n_jobs=4) as pool:
33 | pool.set_use_worker_state()
34 | pool.map(task, range(100))
35 |
36 | Combining worker state with worker_init and worker_exit
37 | -------------------------------------------------------
38 |
39 | The worker state can be combined with the ``worker_init`` and ``worker_exit`` parameters of each ``map`` function,
40 | leading to some really useful capabilities:
41 |
42 | .. code-block:: python
43 |
44 | import numpy as np
45 | import pickle
46 |
47 | def load_big_model(worker_state):
48 | # Load a model which takes up a lot of memory
49 | with open('./a_really_big_model.p3', 'rb') as f:
50 | worker_state['model'] = pickle.load(f)
51 |
52 | def model_predict(worker_state, x):
53 | # Predict
54 | return worker_state['model'].predict(x)
55 |
56 | with WorkerPool(n_jobs=4, use_worker_state=True) as pool:
57 | # Let the model predict
58 | data = np.array([[...]])
59 | results = pool.map(model_predict, data, worker_init=load_big_model)
60 |
61 | More information about the ``worker_init`` and ``worker_exit`` parameters can be found at :ref:`worker_init_exit`.
62 |
63 | Combining worker state with keep_alive
64 | --------------------------------------
65 |
66 | By default, workers are restarted each time a ``map`` function is executed. As described in :ref:`keep_alive` this can
67 | be circumvented by using ``keep_alive=True``. This also ensures worker state is kept across consecutive ``map`` calls:
68 |
69 | .. code-block:: python
70 |
71 | with WorkerPool(n_jobs=4, use_worker_state=True, keep_alive=True) as pool:
72 | # Let the model predict
73 | data = np.array([[...]])
74 | results = pool.map(model_predict, data, worker_init=load_big_model)
75 |
76 | # Predict some more
77 | more_data = np.array([[...]])
78 | more_results = pool.map(model_predict, more_data)
79 |
80 | In this example we don't need to supply the ``worker_init`` function to the second ``map`` call, as the workers will be
81 | reused. When ``worker_lifespan`` is set, though, this rule doesn't apply.
82 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/start_method.rst:
--------------------------------------------------------------------------------
1 | .. _start_methods:
2 |
3 | Process start method
4 | ====================
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | The ``multiprocessing`` package allows you to start processes using a few different methods: ``'fork'``, ``'spawn'`` or
11 | ``'forkserver'``. Threading is also available by using ``'threading'``. For detailed information on the multiprocessing
12 | contexts, please refer to the multiprocessing documentation_ and caveats_ section. In short:
13 |
14 | fork
15 | Copies the parent process such that the child process is effectively identical. This includes copying everything
16 | currently in memory. This is sometimes useful, but other times useless or even a serious bottleneck. ``fork``
17 | enables the use of copy-on-write shared objects (see :ref:`shared_objects`).
18 | spawn
19 | Starts a fresh python interpreter where only those resources necessary are inherited.
20 | forkserver
21 | First starts a server process (using ``'spawn'``). Whenever a new process is needed the parent process requests the
22 | server to fork a new process.
23 | threading
24 | Starts child threads. Suffers from the Global Interpreter Lock (GIL), but works fine for I/O intensive tasks.
25 |
26 | For an overview of start method availability and defaults, please refer to the following table:
27 |
28 | .. list-table::
29 | :header-rows: 1
30 |
31 | * - Start method
32 | - Available on Unix
33 | - Available on Windows
34 | * - ``fork``
35 | - Yes (default)
36 | - No
37 | * - ``spawn``
38 | - Yes
39 | - Yes (default)
40 | * - ``forkserver``
41 | - Yes
42 | - No
43 | * - ``threading``
44 | - Yes
45 | - Yes
46 |
47 | Spawn and forkserver
48 | --------------------
49 |
50 | When using ``spawn`` or ``forkserver`` as start method, be aware that global variables (constants are fine) might have a
51 | different value than you might expect. You also have to import packages within the called function:
52 |
53 | .. code-block:: python
54 |
55 | import os
56 |
57 | def failing_job(folder, filename):
58 | return os.path.join(folder, filename)
59 |
60 | # This will fail because 'os' is not copied to the child processes
61 | with WorkerPool(n_jobs=2, start_method='spawn') as pool:
62 | pool.map(failing_job, [('folder', '0.p3'), ('folder', '1.p3')])
63 |
64 | .. code-block:: python
65 |
66 | def working_job(folder, filename):
67 | import os
68 | return os.path.join(folder, filename)
69 |
70 | # This will work
71 | with WorkerPool(n_jobs=2, start_method='spawn') as pool:
72 | pool.map(working_job, [('folder', '0.p3'), ('folder', '1.p3')])
73 |
74 | A lot of effort has been put into making the progress bar, dashboard, and nested pools (with multiple progress bars)
75 | work well with ``spawn`` and ``forkserver``. So, everything should work fine.
76 |
77 | .. _documentation: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
78 | .. _caveats: https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods
79 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/keep_alive.rst:
--------------------------------------------------------------------------------
1 | .. _keep_alive:
2 |
3 | Keep alive
4 | ==========
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | By default, workers are restarted on each ``map`` call. This is done to clean up resources as quickly as possible when
11 | the work is done.
12 |
13 | Workers can be kept alive in between consecutive map calls using the ``keep_alive`` flag. This is useful when your
14 | workers have a long startup time and you need to call one of the map functions multiple times.
15 |
16 | .. code-block:: python
17 |
18 | def foo(x):
19 | pass
20 |
21 | with WorkerPool(n_jobs=4, keep_alive=True) as pool:
22 | pool.map(task, range(100))
23 | pool.map(task, range(100)) # Workers are reused here
24 |
25 | Instead of passing the flag to the :obj:`mpire.WorkerPool` constructor you can also make use of
26 | :meth:`mpire.WorkerPool.set_keep_alive`:
27 |
28 | .. code-block:: python
29 |
30 | with WorkerPool(n_jobs=4) as pool:
31 | pool.map(task, range(100))
32 | pool.map(task, range(100)) # Workers are restarted
33 | pool.set_keep_alive()
34 | pool.map(task, range(100)) # Workers are reused here
35 |
36 | Caveats
37 | -------
38 |
39 | Changing some WorkerPool init parameters do require a restart. These include ``pass_worker_id``, ``shared_objects``, and
40 | ``use_worker_state``.
41 |
42 | Keeping workers alive works even when the function to be called or any other parameter passed on to the ``map`` function
43 | changes.
44 |
45 | However, when you're changing either the ``worker_init`` and/or ``worker_exit`` function while ``keep_alive`` is
46 | enabled, you need to be aware this can have undesired side-effects. ``worker_init`` functions are only executed when a
47 | worker is started and ``worker_exit`` functions when a worker is terminated. When ``keep_alive`` is enabled, workers
48 | aren't restarted in between consecutive ``map`` calls, so those functions are not called.
49 |
50 | .. code-block:: python
51 |
52 | def init_func_1(): pass
53 | def exit_func_1(): pass
54 |
55 | def init_func_2(): pass
56 | def init_func_2(): pass
57 |
58 | with WorkerPool(n_jobs=4, keep_alive=True) as pool:
59 | pool.map(task, range(100), worker_init=init_func_1, worker_exit=exit_func_1)
60 | pool.map(task, range(100), worker_init=init_func_2, worker_exit=exit_func_2)
61 |
62 | In the above example ``init_func_1`` is called for each worker when the workers are started. After the first ``map``
63 | call ``exit_func_1`` is not called because workers are kept alive. During the second ``map`` call ``init_func_2`` isn't
64 | called as well, because the workers are still alive. When exiting the context manager the workers are shut down and
65 | ``exit_func_2`` is called.
66 |
67 | It gets even trickier when you also enable ``worker_lifespan``. In this scenario during the first ``map`` call a worker
68 | could've reached its maximum lifespan and is forced to restart, while others haven't. The exit function of the worker to
69 | be restarted is called (i.e., ``exit_func_1``). When calling ``map`` for the second time and the exit function is
70 | changed, the other workers will execute the new exit function when they need to be restarted (i.e., ``exit_func_2``).
71 |
--------------------------------------------------------------------------------
/tests/test_signal.py:
--------------------------------------------------------------------------------
1 | import multiprocessing as mp
2 | import os
3 | import signal
4 | import unittest
5 |
6 | from mpire.context import RUNNING_WINDOWS
7 | from mpire.signal import DelayedKeyboardInterrupt, DisableKeyboardInterruptSignal
8 | from tests.utils import ConditionalDecorator
9 |
10 |
11 | @ConditionalDecorator(unittest.skip("Signals aren't fully supported on Windows"), RUNNING_WINDOWS)
12 | class DelayedKeyboardInterruptTest(unittest.TestCase):
13 |
14 | def test_delayed_keyboard_interrupt(self):
15 | """
16 | The process should delay the keyboard interrupt in case ``in_thread=False``, so the expected value should be 1.
17 | However, we can't send signals to threads and so the DelayedKeyboardInterrupt doesn't do anything in that case.
18 | So there's no point in testing this with threading
19 | """
20 | # Create events so we know when the process has started and we can send an interrupt
21 | started_event = mp.Event()
22 | quit_event = mp.Event()
23 | value = mp.Value('i', 0)
24 |
25 | # Start process and wait until it starts
26 | p = mp.Process(target=self.delayed_process_job, args=(started_event, quit_event, value))
27 | p.start()
28 | started_event.wait()
29 |
30 | # Send kill signal and wait for it to join
31 | os.kill(p.pid, signal.SIGINT)
32 | quit_event.set()
33 | p.join()
34 |
35 | # Verify expected value.
36 | self.assertEqual(value.value, 1)
37 |
38 | @staticmethod
39 | def delayed_process_job(started_event: mp.Event, quit_event: mp.Event, value: mp.Value):
40 | """
41 | Should be affected by interrupt
42 | """
43 | try:
44 | with DelayedKeyboardInterrupt():
45 | started_event.set()
46 | quit_event.wait()
47 | value.value = 1
48 | except KeyboardInterrupt:
49 | pass
50 | else:
51 | value.value = 2
52 |
53 |
54 | @ConditionalDecorator(unittest.skip("Signals aren't fully supported on Windows"), RUNNING_WINDOWS)
55 | class DisabledKeyboardInterruptTest(unittest.TestCase):
56 |
57 | def test_disabled_keyboard_interrupt(self):
58 | """
59 | The process should ignore a keyboard interrupt entirely, which means the expected value should be True
60 | """
61 | # Create events so we know when the process has started and we can send an interrupt
62 | started_event = mp.Event()
63 | quit_event = mp.Event()
64 | value = mp.Value('b', False)
65 | p = mp.Process(target=self.disabled_process_job, args=(started_event, quit_event, value))
66 | p.start()
67 | started_event.wait()
68 | os.kill(p.pid, signal.SIGINT)
69 | quit_event.set()
70 | p.join()
71 |
72 | # If everything worked the value should be set to True
73 | self.assertEqual(value.value, True)
74 |
75 | @staticmethod
76 | def disabled_process_job(started_event: mp.Event, quit_event: mp.Event, value: mp.Value):
77 | """
78 | Should not be affected by interrupt
79 | """
80 | with DisableKeyboardInterruptSignal():
81 | started_event.set()
82 | quit_event.wait()
83 | value.value = True
84 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/setup.rst:
--------------------------------------------------------------------------------
1 | Starting a WorkerPool
2 | =====================
3 |
4 | .. contents:: Contents
5 | :depth: 2
6 | :local:
7 |
8 | The :obj:`mpire.WorkerPool` class controls a pool of worker processes similarly to a ``multiprocessing.Pool``. It
9 | contains all the ``map`` like functions (with the addition of :meth:`mpire.WorkerPool.map_unordered`), together with
10 | the ``apply`` and ``apply_async`` functions (see :ref:`apply-family`).
11 |
12 | An :obj:`mpire.WorkerPool` can be started in two different ways. The first and recommended way to do so is using a
13 | context manager:
14 |
15 | .. code-block:: python
16 |
17 | from mpire import WorkerPool
18 |
19 | # Start a pool of 4 workers
20 | with WorkerPool(n_jobs=4) as pool:
21 | # Do some processing here
22 | pass
23 |
24 | The ``with`` statement takes care of properly joining/terminating the spawned worker processes after the block has
25 | ended.
26 |
27 | The other way is to do it manually:
28 |
29 | .. code-block:: python
30 |
31 | # Start a pool of 4 workers
32 | pool = WorkerPool(n_jobs=4)
33 |
34 | # Do some processing here
35 | pass
36 |
37 | # Only needed when keep_alive=True:
38 | # Clean up pool (this will block until all processing has completed)
39 | pool.stop_and_join() # or use pool.join() which is an alias of stop_and_join()
40 |
41 | # In the case you want to kill the processes, even though they are still busy
42 | pool.terminate()
43 |
44 | When using ``n_jobs=None`` MPIRE will spawn as many processes as there are CPUs on your system. Specifying more jobs
45 | than you have CPUs is, of course, possible as well.
46 |
47 | .. warning::
48 |
49 | In the manual approach, the results queue should be drained before joining the workers, otherwise you can get a
50 | deadlock. If you want to join either way, use :meth:`mpire.WorkerPool.terminate`. For more information, see the
51 | warnings in the Python docs here_.
52 |
53 | .. _here: https://docs.python.org/3/library/multiprocessing.html#pipes-and-queues
54 |
55 |
56 | Nested WorkerPools
57 | ------------------
58 |
59 | By default, the :obj:`mpire.WorkerPool` class spawns daemon child processes who are not able to create child processes
60 | themselves, so nested pools are not allowed. There's an option to create non-daemon child processes to allow for nested
61 | structures:
62 |
63 | .. code-block:: python
64 |
65 | def job(...)
66 | with WorkerPool(n_jobs=4) as p:
67 | # Do some work
68 | results = p.map(...)
69 |
70 | with WorkerPool(n_jobs=4, daemon=True, start_method='spawn') as pool:
71 | # This will raise an AssertionError telling you daemon processes
72 | # can't start child processes
73 | pool.map(job, ...)
74 |
75 | with WorkerPool(n_jobs=4, daemon=False, start_method='spawn') as pool:
76 | # This will work just fine
77 | pool.map(job, ...)
78 |
79 | .. note::
80 |
81 | Nested pools aren't supported when using threading.
82 |
83 | .. warning::
84 |
85 | Spawning processes is not thread-safe_! Both ``start`` and ``join`` methods of the ``process`` class alter global
86 | variables. If you still want to have nested pools, the safest bet is to use ``spawn`` as start method.
87 |
88 | .. note::
89 |
90 | Due to a strange bug in Python, using ``forkserver`` as start method in a nested pool is not allowed when the
91 | outer pool is using ``fork``, as the forkserver will not have been started there. For it to work your outer pool
92 | will have to have either ``spawn`` or ``forkserver`` as start method.
93 |
94 | .. warning::
95 |
96 | Nested pools aren't production ready. Error handling and keyboard interrupts when using nested pools can, on some
97 | rare occassions (~1% of the time), still cause deadlocks. Use at your own risk.
98 |
99 | When a function is guaranteed to finish successfully, using nested pools is absolutely fine.
100 |
101 | .. _thread-safe: https://bugs.python.org/issue40860
102 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/worker_insights.rst:
--------------------------------------------------------------------------------
1 | .. _worker insights:
2 |
3 | Worker insights
4 | ===============
5 |
6 | Worker insights gives you insight in your multiprocessing efficiency by tracking worker start up time, waiting time and
7 | time spend on executing tasks. Tracking is disabled by default, but can be enabled by setting ``enable_insights``:
8 |
9 | .. code-block:: python
10 |
11 | with WorkerPool(n_jobs=4, enable_insights=True) as pool:
12 | pool.map(task, range(100))
13 |
14 | The overhead is very minimal and you shouldn't really notice it, even on very small tasks. You can view the tracking
15 | results using :meth:`mpire.WorkerPool.get_insights` or use :meth:`mpire.WorkerPool.print_insights` to directly print
16 | the insights to console:
17 |
18 | .. code-block:: python
19 |
20 | import time
21 |
22 | def sleep_and_square(x):
23 | # For illustration purposes
24 | time.sleep(x / 1000)
25 | return x * x
26 |
27 | with WorkerPool(n_jobs=4, enable_insights=True) as pool:
28 | pool.map(sleep_and_square, range(100))
29 | insights = pool.get_insights()
30 | print(insights)
31 |
32 | # Output:
33 | {'n_completed_tasks': [28, 24, 24, 24],
34 | 'total_start_up_time': '0:00:00.038',
35 | 'total_init_time': '0:00:00',
36 | 'total_waiting_time': '0:00:00.798',
37 | 'total_working_time': '0:00:04.980',
38 | 'total_exit_time': '0:00:00',
39 | 'total_time': '0:00:05.816',
40 | 'start_up_time': ['0:00:00.010', '0:00:00.008', '0:00:00.008', '0:00:00.011'],
41 | 'start_up_time_mean': '0:00:00.009',
42 | 'start_up_time_std': '0:00:00.001',
43 | 'start_up_ratio': 0.006610452621805033,
44 | 'init_time': ['0:00:00', '0:00:00', '0:00:00', '0:00:00'],
45 | 'init_time_mean': '0:00:00',
46 | 'init_time_std': '0:00:00',
47 | 'init_ratio': 0.0,
48 | 'waiting_time': ['0:00:00.309', '0:00:00.311', '0:00:00.165', '0:00:00.012'],
49 | 'waiting_time_mean': '0:00:00.199',
50 | 'waiting_time_std': '0:00:00.123',
51 | 'waiting_ratio': 0.13722942739284952,
52 | 'working_time': ['0:00:01.142', '0:00:01.135', '0:00:01.278', '0:00:01.423'],
53 | 'working_time_mean': '0:00:01.245',
54 | 'working_time_std': '0:00:00.117',
55 | 'working_ratio': 0.8561601182661567,
56 | 'exit_time': ['0:00:00', '0:00:00', '0:00:00', '0:00:00']
57 | 'exit_time_mean': '0:00:00',
58 | 'exit_time_std': '0:00:00',
59 | 'exit_ratio': 0.0,
60 | 'top_5_max_task_durations': ['0:00:00.099', '0:00:00.098', '0:00:00.097', '0:00:00.096',
61 | '0:00:00.095'],
62 | 'top_5_max_task_args': ['Arg 0: 99', 'Arg 0: 98', 'Arg 0: 97', 'Arg 0: 96', 'Arg 0: 95']}
63 |
64 | We specified 4 workers, so there are 4 entries in the ``n_completed_tasks``, ``start_up_time``, ``init_time``,
65 | ``waiting_time``, ``working_time``, and ``exit_time`` containers. They show per worker the number of completed tasks,
66 | the total start up time, the total time spend on the ``worker_init`` function, the total time waiting for new tasks,
67 | total time spend on main function, and the total time spend on the ``worker_exit`` function, respectively. The insights
68 | also contain mean, standard deviation, and ratio of the tracked time. The ratio is the time for that part divided by the
69 | total time. In general, the higher the working ratio the more efficient your multiprocessing setup is. Of course, your
70 | setup might still not be optimal because the task itself is inefficient, but timing that is beyond the scope of MPIRE.
71 |
72 | Additionally, the insights keep track of the top 5 tasks that took the longest to run. The data is split up in two
73 | containers: one for the duration and one for the arguments that were passed on to the task function. Both are sorted
74 | based on task duration (desc), so index ``0`` of the args list corresponds to index ``0`` of the duration list, etc.
75 |
76 | When using the MPIRE :ref:`Dashboard` you can track these insights in real-time. See :ref:`Dashboard` for more
77 | information.
78 |
79 | .. note::
80 |
81 | When using `imap` or `imap_unordered` you can view the insights during execution. Simply call ``get_insights()``
82 | or ``print_insights()`` inside your loop where you process the results.
83 |
--------------------------------------------------------------------------------
/docs/usage/map/numpy.rst:
--------------------------------------------------------------------------------
1 | Numpy arrays
2 | ============
3 |
4 | .. contents:: Contents
5 | :depth: 2
6 | :local:
7 |
8 | Chunking
9 | --------
10 |
11 | Numpy arrays are treated a little bit differently when passed on to the ``map`` functions. Usually MPIRE uses
12 | ``itertools.islice`` for chunking, which depends on the ``__iter__`` special function of the container object. But
13 | applying that to numpy arrays:
14 |
15 | .. code-block:: python
16 |
17 | import numpy as np
18 |
19 | # Create random array
20 | arr = np.random.rand(10, 3)
21 |
22 | # Chunk the array using default chunking
23 | arr_iter = iter(arr)
24 | chunk_size = 3
25 | while True:
26 | chunk = list(itertools.islice(arr_iter, chunk_size))
27 | if chunk:
28 | yield chunk
29 | else:
30 | break
31 |
32 | would yield:
33 |
34 | .. code-block:: python
35 |
36 | [array([0.68438994, 0.9701514 , 0.40083965]), array([0.88428556, 0.2083905 , 0.61490443]),
37 | array([0.89249174, 0.39902235, 0.70762541])]
38 | [array([0.18850964, 0.1022777 , 0.41539432]), array([0.07327858, 0.18608165, 0.75862301]),
39 | array([0.69215651, 0.4211941 , 0.31029439])]
40 | [array([0.82571272, 0.72257819, 0.86079131]), array([0.91285817, 0.49398461, 0.27863929]),
41 | array([0.146981 , 0.84671211, 0.30122806])]
42 | [array([0.11783283, 0.12585031, 0.39864368])]
43 |
44 | In other words, each row of the array is now in its own array and each one of them is given to the target function
45 | individually. Instead, MPIRE will chunk them in to something more reasonable using numpy slicing instead:
46 |
47 | .. code-block:: python
48 |
49 | from mpire.utils import chunk_tasks
50 |
51 | for chunk in chunk_tasks(arr, chunk_size=chunk_size):
52 | print(repr(chunk))
53 |
54 | Output:
55 |
56 | .. code-block:: python
57 |
58 | array([[0.68438994, 0.9701514 , 0.40083965],
59 | [0.88428556, 0.2083905 , 0.61490443],
60 | [0.89249174, 0.39902235, 0.70762541]])
61 | array([[0.18850964, 0.1022777 , 0.41539432],
62 | [0.07327858, 0.18608165, 0.75862301],
63 | [0.69215651, 0.4211941 , 0.31029439]])
64 | array([[0.82571272, 0.72257819, 0.86079131],
65 | [0.91285817, 0.49398461, 0.27863929],
66 | [0.146981 , 0.84671211, 0.30122806]])
67 | array([[0.11783283, 0.12585031, 0.39864368]])
68 |
69 | Each chunk is now a single numpy array containing as many rows as the chunk size, except for the last chunk as there
70 | aren't enough rows left.
71 |
72 | Return value
73 | ------------
74 |
75 | When the user provided function returns numpy arrays and you're applying the :meth:`mpire.WorkerPool.map` function MPIRE
76 | will concatenate the resulting numpy arrays to a single array by default. For example:
77 |
78 | .. code-block:: python
79 |
80 | def add_five(x):
81 | return x + 5
82 |
83 | with WorkerPool(n_jobs=4) as pool:
84 | results = pool.map(add_five, arr, chunk_size=chunk_size)
85 |
86 | will return:
87 |
88 | .. code-block:: python
89 |
90 | array([[5.68438994, 5.9701514 , 5.40083965],
91 | [5.88428556, 5.2083905 , 5.61490443],
92 | [5.89249174, 5.39902235, 5.70762541],
93 | [5.18850964, 5.1022777 , 5.41539432],
94 | [5.07327858, 5.18608165, 5.75862301],
95 | [5.69215651, 5.4211941 , 5.31029439],
96 | [5.82571272, 5.72257819, 5.86079131],
97 | [5.91285817, 5.49398461, 5.27863929],
98 | [5.146981 , 5.84671211, 5.30122806],
99 | [5.11783283, 5.12585031, 5.39864368]])
100 |
101 | This behavior can be cancelled by using the ``concatenate_numpy_output`` flag:
102 |
103 | .. code-block:: python
104 |
105 | with WorkerPool(n_jobs=4) as pool:
106 | results = pool.map(add_five, arr, chunk_size=chunk_size, concatenate_numpy_output=False)
107 |
108 | This will return individual arrays:
109 |
110 | .. code-block:: python
111 |
112 | [array([[5.68438994, 5.9701514 , 5.40083965],
113 | [5.88428556, 5.2083905 , 5.61490443],
114 | [5.89249174, 5.39902235, 5.70762541]]),
115 | array([[5.18850964, 5.1022777 , 5.41539432],
116 | [5.07327858, 5.18608165, 5.75862301],
117 | [5.69215651, 5.4211941 , 5.31029439]]),
118 | array([[5.82571272, 5.72257819, 5.86079131],
119 | [5.91285817, 5.49398461, 5.27863929],
120 | [5.146981 , 5.84671211, 5.30122806]]),
121 | array([[5.11783283, 5.12585031, 5.39864368]])]
122 |
--------------------------------------------------------------------------------
/docs/usage/workerpool/shared_objects.rst:
--------------------------------------------------------------------------------
1 | .. _shared_objects:
2 |
3 | Shared objects
4 | ==============
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | MPIRE allows you to provide shared objects to the workers in a similar way as is possible with the
11 | ``multiprocessing.Process`` class. For the start method ``fork`` these shared objects are treated as ``copy-on-write``,
12 | which means they are only copied once changes are made to them. Otherwise they share the same memory address. This is
13 | convenient if you want to let workers access a large dataset that wouldn't fit in memory when copied multiple times.
14 |
15 | .. note::
16 |
17 | The start method ``fork`` isn't available on Windows, which means copy-on-write isn't supported there.
18 |
19 | For ``threading`` these shared objects are readable and writable without copies being made. For the start methods
20 | ``spawn`` and ``forkserver`` the shared objects are copied once for each worker, in contrast to copying it for each
21 | task which is done when using a regular ``multiprocessing.Pool``.
22 |
23 | .. code-block:: python
24 |
25 | def task(dataset, x):
26 | # Do something with this copy-on-write dataset
27 | ...
28 |
29 | def main():
30 | dataset = ... # Load big dataset
31 | with WorkerPool(n_jobs=4, shared_objects=dataset, start_method='fork') as pool:
32 | ... = pool.map(task, range(100))
33 |
34 | Multiple objects can be provided by placing them, for example, in a tuple container.
35 |
36 | Apart from sharing regular Python objects between workers, you can also share multiprocessing synchronization
37 | primitives such as ``multiprocessing.Lock`` using this method. Objects like these require to be shared through
38 | inheritance, which is exactly how shared objects in MPIRE are passed on.
39 |
40 | .. important::
41 |
42 | Shared objects are passed on as the second argument, after the worker ID (when enabled), to the provided function.
43 |
44 | Instead of passing the shared objects to the :obj:`mpire.WorkerPool` constructor you can also use the
45 | :meth:`mpire.WorkerPool.set_shared_objects` function:
46 |
47 | .. code-block:: python
48 |
49 | def main():
50 | dataset = ... # Load big dataset
51 | with WorkerPool(n_jobs=4, start_method='fork') as pool:
52 | pool.set_shared_objects(dataset)
53 | ... = pool.map(task, range(100))
54 |
55 | Shared objects have to be specified before the workers are started. Workers are started once the first ``map`` call is
56 | executed. When ``keep_alive=True`` and the workers are reused, changing the shared objects between two consecutive
57 | ``map`` calls won't work.
58 |
59 |
60 | Copy-on-write alternatives
61 | --------------------------
62 |
63 | When copy-on-write is not available for you, you can also use shared objects to share a ``multiprocessing.Array``,
64 | ``multiprocessing.Value``, or another object with ``multiprocessing.Manager``. You can then store results in the same
65 | object from multiple processes. However, you should keep the amount of synchronization to a minimum when the resources
66 | are protected with a lock, or disable locking if your situation allows it as is shown here:
67 |
68 | .. code-block:: python
69 |
70 | from multiprocessing import Array
71 |
72 | def square_add_and_modulo_with_index(shared_objects, idx, x):
73 | # Unpack results containers
74 | square_results_container, add_results_container = shared_objects
75 |
76 | # Square, add and modulo
77 | square_results_container[idx] = x * x
78 | add_results_container[idx] = x + x
79 | return x % 2
80 |
81 | def main():
82 | # Use a shared array of size 100 and type float to store the results
83 | square_results_container = Array('f', 100, lock=False)
84 | add_results_container = Array('f', 100, lock=False)
85 | shared_objects = square_results_container, add_results_container
86 | with WorkerPool(n_jobs=4, shared_objects=shared_objects) as pool:
87 |
88 | # Square, add and modulo the results and store them in the results containers
89 | modulo_results = pool.map(square_add_and_modulo_with_index,
90 | enumerate(range(100)), iterable_len=100)
91 |
92 | In the example above we create two results containers, one for squaring and for adding the given value, and disable
93 | locking for both. Additionally, we also return a value, even though we use shared objects for storing results. We can
94 | safely disable locking here as each task writes to a different index in the array, so no race conditions can occur.
95 | Disabling locking is, of course, a lot faster than having it enabled.
96 |
--------------------------------------------------------------------------------
/mpire/dashboard/static/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | margin: 40px;
3 | }
4 |
5 | h1 {
6 | margin-bottom: 40px;
7 | }
8 |
9 | h1 .username {
10 | font-size: 0.4em;
11 | vertical-align: middle;
12 | cursor: help;
13 | }
14 |
15 | h1 .username_brackets {
16 | margin-left: 0.3em;
17 | margin-right: 0.3em;
18 | color: rgb(0, 255, 255);
19 | }
20 |
21 | h1 .username_at {
22 | margin-left: 0.1em;
23 | margin-right: 0.1em;
24 | }
25 |
26 | #menu-top-right {
27 | float: right;
28 | }
29 |
30 | #menu-top-right > div,
31 | #menu-top-right > a {
32 | display: inline-block;
33 | margin-left: 10px;
34 | }
35 |
36 | .mpire {
37 | position: fixed;
38 | bottom: 0;
39 | right: 40px;
40 | z-index: -99;
41 | font-size: 60%;
42 | color: #6c757d;
43 | }
44 |
45 | .lightsaber {
46 | color: #dc3545;
47 | }
48 |
49 | .pb_container {
50 | width: 100%;
51 | height: 18px;
52 | border-radius: .25rem;
53 | overflow: hidden;
54 | background-color: #FFF;
55 | }
56 |
57 | .pb {
58 | height: 100%;
59 | background-color: #007bff;
60 | text-align: center;
61 | }
62 |
63 | .pb_details_left_filler {
64 | float: left;
65 | width: 3%;
66 | height: 1em;
67 | }
68 |
69 | .pb_details_right {
70 | overflow: hidden;
71 | margin-top: 6px;
72 | padding-right: 2em;
73 | }
74 |
75 | .clickable {
76 | cursor: pointer;
77 | }
78 |
79 | td.pb_details {
80 | padding: 0;
81 | background-color: rgba(255, 255, 255, .025);
82 | }
83 |
84 | td.pb_details > div {
85 | display: none;
86 | padding: 12px 12px 24px 12px;
87 | }
88 |
89 | .separator {
90 | display: flex;
91 | align-items: center;
92 | text-align: center;
93 | font-size: 1.05em;
94 | margin-top: 10px;
95 | margin-bottom: 20px;
96 | }
97 |
98 | .separator::before,
99 | .separator::after {
100 | content: '';
101 | border-bottom: 1px solid rgba(255, 255, 255, .2);
102 | }
103 |
104 | .separator::before {
105 | flex: 0.025;
106 | }
107 |
108 | .separator::after {
109 | flex: 0.975;
110 | }
111 |
112 | .separator:not(:empty)::before {
113 | margin-right: 1em;
114 | }
115 |
116 | .separator:not(:empty)::after {
117 | margin-left: 1em;
118 | }
119 |
120 | .separator.clickable:hover {
121 | color: rgb(255, 235, 156);
122 | }
123 |
124 | .separator.clickable:hover::before,
125 | .separator.clickable:hover::after {
126 | border-bottom: 1px solid rgba(255, 255, 255, 0.6);
127 | }
128 |
129 | .insights {
130 | display: none;
131 | }
132 |
133 | .insights p.info {
134 | color: #ccc;
135 | margin: 1em 0;
136 | }
137 |
138 | .insights span.info {
139 | color: rgb(0, 255, 255);
140 | }
141 |
142 | .insights span.clickable {
143 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
144 | word-break: normal;
145 | color: rgb(255, 255, 255);
146 | }
147 |
148 | .insights span.clickable:hover {
149 | color: rgb(255, 235, 156);
150 | }
151 |
152 | .insights-left {
153 | float: left;
154 | width: 48%;
155 | }
156 |
157 | .insights-middle {
158 | float: left;
159 | width: 4%;
160 | height: 1em;
161 | }
162 |
163 | .insights-right {
164 | float: left;
165 | width: 48%;
166 | }
167 |
168 | .insights table {
169 | margin-bottom: 20px;
170 | }
171 |
172 | .insights th,
173 | .insights td {
174 | padding: 0.5em 1.0em;
175 | }
176 |
177 | .insights td {
178 | text-align: right;
179 | }
180 |
181 | .insights_table {
182 | width: 100%;
183 | table-layout: fixed;
184 | }
185 |
186 | .insights_table tr th:first-child,
187 | .insights_table tr td:first-child {
188 | width: 20px;
189 | }
190 |
191 | .insights_table tr th:nth-child(2),
192 | .insights_table tr td:nth-child(2) {
193 | width: 100px;
194 | text-align: right;
195 | }
196 |
197 | .insights_table tr th:nth-child(3),
198 | .insights_table tr td:nth-child(3) {
199 | text-align: left;
200 | word-break: break-all;
201 | }
202 |
203 | .insights_table .code {
204 | font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
205 | word-break: break-all;
206 | color: rgb(0, 255, 255);
207 | }
208 |
209 | .glyphicon {
210 | display: none;
211 | }
212 |
213 | code {
214 | margin-left: 0.4em;
215 | margin-right: 0.4em;
216 | color: rgb(0, 255, 255);
217 | }
218 |
219 | p {
220 | margin-bottom: 0.4em;
221 | }
222 |
223 | .traceback {
224 | display: none;
225 | color: rgb(255, 100, 100);
226 | border-top: 1px dashed;
227 | margin-top: 20px;
228 | padding-top: 20px;
229 | word-break: break-all;
230 | white-space: pre-wrap;
231 | }
232 |
233 | .hidden {
234 | display: none;
235 | }
--------------------------------------------------------------------------------
/mpire/dashboard/manager.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import Lock
2 | from multiprocessing.synchronize import Lock as LockType
3 | from multiprocessing.managers import BaseProxy
4 | from typing import Dict, Optional, Tuple
5 |
6 | from mpire.dashboard.connection_classes import DashboardManager, DashboardManagerConnectionDetails
7 | from mpire.signal import ignore_keyboard_interrupt
8 |
9 |
10 | # Dict for tqdm progress bar updates
11 | DASHBOARD_TQDM_DICT = None
12 |
13 | # Dict for tqdm progress bar details (function called etc.)
14 | DASHBOARD_TQDM_DETAILS_DICT = None
15 |
16 | # Lock for registering new progress bars
17 | DASHBOARD_TQDM_LOCK = None
18 |
19 | # Connection details for connecting to a manager
20 | DASHBOARD_MANAGER_CONNECTION_DETAILS = DashboardManagerConnectionDetails()
21 |
22 |
23 | def get_dashboard_tqdm_dict() -> Dict:
24 | """
25 | :return: Dashboard tqdm dict which should be used in a DashboardManager context
26 | """
27 | global DASHBOARD_TQDM_DICT
28 | if DASHBOARD_TQDM_DICT is None:
29 | DASHBOARD_TQDM_DICT = {}
30 | return DASHBOARD_TQDM_DICT
31 |
32 |
33 | def get_dashboard_tqdm_details_dict() -> Dict:
34 | """
35 | :return: Dashboard tqdm details dict which should be used in a DashboardManager context
36 | """
37 | global DASHBOARD_TQDM_DETAILS_DICT
38 | if DASHBOARD_TQDM_DETAILS_DICT is None:
39 | DASHBOARD_TQDM_DETAILS_DICT = {}
40 | return DASHBOARD_TQDM_DETAILS_DICT
41 |
42 |
43 | def get_dashboard_tqdm_lock() -> LockType:
44 | """
45 | :return: Dashboard tqdm lock which should be used in a DashboardManager context
46 | """
47 | global DASHBOARD_TQDM_LOCK
48 | if DASHBOARD_TQDM_LOCK is None:
49 | DASHBOARD_TQDM_LOCK = Lock()
50 | return DASHBOARD_TQDM_LOCK
51 |
52 |
53 | def start_manager_server(manager_port_nr: int) -> DashboardManager:
54 | """
55 | Start a SyncManager
56 |
57 | :param manager_port_nr: Port number to use for the manager
58 | :return: SyncManager and hostname
59 | """
60 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK, \
61 | DASHBOARD_MANAGER_HOST, DASHBOARD_MANAGER_PORT
62 |
63 | DashboardManager.register('get_dashboard_tqdm_dict', get_dashboard_tqdm_dict)
64 | DashboardManager.register('get_dashboard_tqdm_details_dict', get_dashboard_tqdm_details_dict)
65 | DashboardManager.register('get_dashboard_tqdm_lock', get_dashboard_tqdm_lock)
66 |
67 | # Create manager
68 | dm = DashboardManager(address=("127.0.0.1", manager_port_nr), authkey=b'mpire_dashboard')
69 | dm.start(ignore_keyboard_interrupt)
70 | DASHBOARD_TQDM_DICT = dm.get_dashboard_tqdm_dict()
71 | DASHBOARD_TQDM_DETAILS_DICT = dm.get_dashboard_tqdm_details_dict()
72 | DASHBOARD_TQDM_LOCK = dm.get_dashboard_tqdm_lock()
73 |
74 | # Set host and port number so other processes know where to connect to
75 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = "127.0.0.1"
76 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = manager_port_nr
77 |
78 | return dm
79 |
80 |
81 | def shutdown_manager_server(manager: Optional[DashboardManager]) -> None:
82 | """
83 | Shutdown a DashboardManager
84 |
85 | :param manager: DashboardManager to shutdown
86 | """
87 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK
88 | if manager is not None:
89 | manager.shutdown()
90 | DASHBOARD_TQDM_DICT = None
91 | DASHBOARD_TQDM_DETAILS_DICT = None
92 | DASHBOARD_TQDM_LOCK = None
93 | DASHBOARD_MANAGER_CONNECTION_DETAILS.clear()
94 |
95 |
96 | def get_manager_client_dicts() -> Tuple[BaseProxy, BaseProxy, BaseProxy]:
97 | """
98 | Connect to a DashboardManager and obtain the synchronized tqdm dashboard dicts
99 |
100 | :return: DashboardManager tqdm dict, tqdm details dict, tqdm lock
101 | """
102 | global DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK
103 |
104 | # If we're already connected to a manager, return the dicts directly
105 | if DASHBOARD_TQDM_DICT is not None:
106 | return DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK
107 |
108 | # Connect to a server
109 | DashboardManager.register('get_dashboard_tqdm_dict', get_dashboard_tqdm_dict)
110 | DashboardManager.register('get_dashboard_tqdm_details_dict', get_dashboard_tqdm_details_dict)
111 | DashboardManager.register('get_dashboard_tqdm_lock', get_dashboard_tqdm_lock)
112 | dm = DashboardManager(
113 | address=(DASHBOARD_MANAGER_CONNECTION_DETAILS.host, DASHBOARD_MANAGER_CONNECTION_DETAILS.port),
114 | authkey=b'mpire_dashboard'
115 | )
116 | dm.connect()
117 |
118 | DASHBOARD_TQDM_DICT = dm.get_dashboard_tqdm_dict()
119 | DASHBOARD_TQDM_DETAILS_DICT = dm.get_dashboard_tqdm_details_dict()
120 | DASHBOARD_TQDM_LOCK = dm.get_dashboard_tqdm_lock()
121 | return DASHBOARD_TQDM_DICT, DASHBOARD_TQDM_DETAILS_DICT, DASHBOARD_TQDM_LOCK
122 |
--------------------------------------------------------------------------------
/docs/usage/map/progress_bar.rst:
--------------------------------------------------------------------------------
1 | Progress bar
2 | ============
3 |
4 | .. contents:: Contents
5 | :depth: 2
6 | :local:
7 |
8 | Progress bar support is added through the tqdm_ package (installed by default when installing MPIRE). The most easy way
9 | to include a progress bar is by enabling the ``progress_bar`` flag in any of the ``map`` functions:
10 |
11 | .. code-block:: python
12 |
13 | with WorkerPool(n_jobs=4) as pool:
14 | pool.map(task, range(100), progress_bar=True)
15 |
16 | This will display a basic ``tqdm`` progress bar displaying the time elapsed and remaining, number of tasks completed
17 | (including a percentage value) and the speed (i.e., number of tasks completed per time unit).
18 |
19 |
20 | .. _progress_bar_style:
21 |
22 | Progress bar style
23 | ------------------
24 |
25 | You can switch to a different progress bar style by changing the ``progress_bar_style`` parameter. For example, when
26 | you require a notebook widget use ``'notebook'`` as the style:
27 |
28 | .. code-block:: python
29 |
30 | with WorkerPool(n_jobs=4) as pool:
31 | pool.map(task, range(100), progress_bar=True, progress_bar_style='notebook')
32 |
33 | The available styles are:
34 |
35 | - ``None``: use the default style (= ``'std'`` , see below)
36 | - ``'std'``: use the standard ``tqdm`` progress bar
37 | - ``'rich'``: use the rich progress bar (requires the ``rich`` package to be installed, see :ref:`richdep`)
38 | - ``'notebook'``: use the Jupyter notebook widget
39 | - ``'dashboard'``: use only the progress bar on the dashboard
40 |
41 | When in a terminal and using the ``'notebook'`` style, the progress bar will behave weirdly. This is not recommended.
42 |
43 | .. note::
44 |
45 | If you run into problems with getting the progress bar to work in a Jupyter notebook (with ``'notebook'`` style),
46 | have a look at :ref:`troubleshooting_progress_bar`.
47 |
48 | Changing the default style
49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
50 |
51 | You can change the default style by setting the :obj:`mpire.tqdm_utils.PROGRESS_BAR_DEFAULT_STYLE` variable:
52 |
53 | .. code-block:: python
54 |
55 | import mpire.tqdm_utils
56 |
57 | mpire.tqdm_utils.PROGRESS_BAR_DEFAULT_STYLE = 'notebook'
58 |
59 | .. _tqdm: https://pypi.python.org/pypi/tqdm
60 |
61 |
62 | Progress bar options
63 | --------------------
64 |
65 | The ``tqdm`` progress bar can be configured using the ``progress_bar_options`` parameter. This parameter accepts a
66 | dictionary with keyword arguments that will be passed to the ``tqdm`` constructor.
67 |
68 | Some options in ``tqdm`` will be overwritten by MPIRE. These include the ``iterable``, ``total`` and ``leave``
69 | parameters. The ``iterable`` is set to the iterable passed on to the ``map`` function. The ``total`` parameter is set to
70 | the number of tasks to be completed. The ``leave`` parameter is always set to ``True``. Some other parameters have a
71 | default value assigned to them, but can be overwritten by the user.
72 |
73 | Here's an example where we change the description, the units, and the colour of the progress bar:
74 |
75 | .. code-block:: python
76 |
77 | with WorkerPool(n_jobs=4) as pool:
78 | pool.map(some_func, some_data, progress_bar=True,
79 | progress_bar_options={'desc': 'Processing', 'unit': 'items', 'colour': 'green'})
80 |
81 | For a complete list of available options, check out the `tqdm docs`_.
82 |
83 | .. _`tqdm docs`: https://tqdm.github.io/docs/tqdm/#__init__
84 |
85 | Progress bar position
86 | ~~~~~~~~~~~~~~~~~~~~~
87 |
88 | You can easily print a progress bar on a different position on the terminal using the ``position`` parameter of
89 | ``tqdm``, which facilitates the use of multiple progress bars. Here's an example of using multiple progress bars using
90 | nested WorkerPools:
91 |
92 | .. code-block:: python
93 |
94 | def dispatcher(worker_id, X):
95 | with WorkerPool(n_jobs=4) as nested_pool:
96 | return nested_pool.map(task, X, progress_bar=True,
97 | progress_bar_options={'position': worker_id + 1})
98 |
99 | def main():
100 | with WorkerPool(n_jobs=4, daemon=False, pass_worker_id=True) as pool:
101 | pool.map(dispatcher, ((range(x, x + 100),) for x in range(100)), iterable_len=100,
102 | n_splits=4, progress_bar=True)
103 |
104 | main()
105 |
106 | We use ``worker_id + 1`` here because the worker IDs start at zero and we reserve position 0 for the progress bar of
107 | the main WorkerPool (which is the default).
108 |
109 | It goes without saying that you shouldn't specify the same progress bar position multiple times.
110 |
111 | .. note::
112 |
113 | When using the ``rich`` progress bar style, the ``position`` parameter cannot be used. An exception will be raised
114 | when trying to do so.
115 |
116 | .. note::
117 |
118 | Most progress bar options are completely ignored when in a Jupyter/IPython notebook session or in the MPIRE
119 | dashboard.
120 |
121 |
--------------------------------------------------------------------------------
/docs/getting_started.rst:
--------------------------------------------------------------------------------
1 | Getting started
2 | ===============
3 |
4 | Suppose you have a time consuming function that receives some input and returns its results. This could look like the
5 | following:
6 |
7 | .. code-block:: python
8 |
9 | import time
10 |
11 | def time_consuming_function(x):
12 | time.sleep(1) # Simulate that this function takes long to complete
13 | return ...
14 |
15 | results = [time_consuming_function(x) for x in range(10)]
16 |
17 | Running this function takes about 10 seconds to complete.
18 |
19 | Functions like these are known as `embarrassingly parallel`_ problems, functions that require little to no effort to
20 | turn into a parallel task. Parallelizing a simple function as this can be as easy as importing ``multiprocessing`` and
21 | using the ``multiprocessing.Pool`` class:
22 |
23 | .. _embarrassingly parallel: https://en.wikipedia.org/wiki/Embarrassingly_parallel
24 |
25 | .. code-block:: python
26 |
27 | from multiprocessing import Pool
28 |
29 | with Pool(processes=5) as pool:
30 | results = pool.map(time_consuming_function, range(10))
31 |
32 | We configured to have 5 workers, so we can handle 5 tasks in parallel. As a result, this function will complete in about
33 | 2 seconds.
34 |
35 | MPIRE can be used almost as a drop-in replacement to ``multiprocessing``. We use the :obj:`mpire.WorkerPool` class and
36 | call one of the available ``map`` functions:
37 |
38 | .. code-block:: python
39 |
40 | from mpire import WorkerPool
41 |
42 | with WorkerPool(n_jobs=5) as pool:
43 | results = pool.map(time_consuming_function, range(10))
44 |
45 | Similarly, this will complete in about 2 seconds. The differences in code are small: there's no need to learn a
46 | completely new multiprocessing syntax, if you're used to vanilla ``multiprocessing``. The additional available
47 | functionality, though, is what sets MPIRE apart.
48 |
49 | Progress bar
50 | ------------
51 |
52 | Suppose we want to know the status of the current task: how many tasks are completed, how long before the work is ready?
53 | It's as simple as setting the ``progress_bar`` parameter to ``True``:
54 |
55 | .. code-block:: python
56 |
57 | with WorkerPool(n_jobs=5) as pool:
58 | results = pool.map(time_consuming_function, range(10), progress_bar=True)
59 |
60 | And it will output a nicely formatted tqdm_ progress bar.
61 |
62 | MPIRE also offers a dashboard, for which you need to install additional :ref:`dependencies `. See
63 | :ref:`Dashboard` for more information.
64 |
65 | .. _tqdm: https://tqdm.github.io/
66 |
67 |
68 | Shared objects
69 | --------------
70 |
71 | If you have one or more objects that you want to share between all workers you can make use of the copy-on-write
72 | ``shared_objects`` option of MPIRE. MPIRE will pass on these objects only once for each worker without
73 | copying/serialization. Only when the object is altered in the worker function it will start copying it for that worker.
74 |
75 | .. note::
76 |
77 | Copy-on-write is not available on Windows, as it requires the start method ``fork``.
78 |
79 | .. code-block:: python
80 |
81 | def time_consuming_function(some_object, x):
82 | time.sleep(1) # Simulate that this function takes long to complete
83 | return ...
84 |
85 | def main():
86 | some_object = ...
87 | with WorkerPool(n_jobs=5, shared_objects=some_object, start_method='fork') as pool:
88 | results = pool.map(time_consuming_function, range(10), progress_bar=True)
89 |
90 | See :ref:`shared_objects` for more details.
91 |
92 | Worker initialization
93 | ---------------------
94 |
95 | Need to initialize each worker before starting the work? Have a look at the ``worker_state`` and ``worker_init``
96 | functionality:
97 |
98 | .. code-block:: python
99 |
100 | def init(worker_state):
101 | # Load a big dataset or model and store it in a worker specific worker_state
102 | worker_state['dataset'] = ...
103 | worker_state['model'] = ...
104 |
105 | def task(worker_state, idx):
106 | # Let the model predict a specific instance of the dataset
107 | return worker_state['model'].predict(worker_state['dataset'][idx])
108 |
109 | with WorkerPool(n_jobs=5, use_worker_state=True) as pool:
110 | results = pool.map(task, range(10), worker_init=init)
111 |
112 | Similarly, you can use the ``worker_exit`` parameter to let MPIRE call a function whenever a worker terminates. You can
113 | even let this exit function return results, which can be obtained later on. See the :ref:`worker_init_exit` section for
114 | more information.
115 |
116 |
117 | Worker insights
118 | ---------------
119 |
120 | When your multiprocessing setup isn't performing as you want it to and you have no clue what's causing it, there's the
121 | worker insights functionality. This will give you some insight in your setup, but it will not profile the function
122 | you're running (there are other libraries for that). Instead, it profiles the worker start up time, waiting time and
123 | working time. When worker init and exit functions are provided it will time those as well.
124 |
125 | Perhaps you're sending a lot of data over the task queue, which makes the waiting time go up. Whatever the case, you
126 | can enable and grab the insights using the ``enable_insights`` flag and :meth:`mpire.WorkerPool.get_insights` function,
127 | respectively:
128 |
129 | .. code-block:: python
130 |
131 | with WorkerPool(n_jobs=5, enable_insights=True) as pool:
132 | results = pool.map(time_consuming_function, range(10))
133 | insights = pool.get_insights()
134 |
135 | See :ref:`worker insights` for a more detailed example and expected output.
136 |
--------------------------------------------------------------------------------
/docs/usage/apply.rst:
--------------------------------------------------------------------------------
1 | .. _apply-family:
2 |
3 | Apply family
4 | ============
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 | :obj:`mpire.WorkerPool` implements two ``apply`` functions, which are very similar to the ones in the
11 | :mod:`multiprocessing` module:
12 |
13 | :meth:`mpire.WorkerPool.apply`
14 | Apply a function to a single task. This is a blocking call.
15 | :meth:`mpire.WorkerPool.apply_async`
16 | A variant of the above, but which is non-blocking. This returns an :obj:`mpire.async_result.AsyncResult` object.
17 |
18 | ``apply``
19 | ---------
20 |
21 | The ``apply`` function is a blocking call, which means that it will not return until the task is completed. If you want
22 | to run multiple different tasks in parallel, you should use the ``apply_async`` function instead. If you require
23 | to run the same function for many tasks in parallel, use the ``map`` functions instead.
24 |
25 | The ``apply`` function takes a function, positional arguments, and keyword arguments, similar to how
26 | :mod:`multiprocessing` does it.
27 |
28 | .. code-block:: python
29 |
30 | def task(a, b, c, d):
31 | return a + b + c + d
32 |
33 | with WorkerPool(n_jobs=1) as pool:
34 | result = pool.apply(task, args=(1, 2), kwargs={'d': 4, 'c': 3})
35 | print(result)
36 |
37 |
38 | ``apply_async``
39 | ---------------
40 |
41 | The ``apply_async`` function is a non-blocking call, which means that it will return immediately. It returns an
42 | :obj:`mpire.async_result.AsyncResult` object, which can be used to get the result of the task at a later moment in time.
43 |
44 | The ``apply_async`` function takes the same parameters as the ``apply`` function.
45 |
46 | .. code-block:: python
47 |
48 | def task(a, b):
49 | return a + b
50 |
51 | with WorkerPool(n_jobs=4) as pool:
52 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)]
53 | results = [async_result.get() for async_result in async_results]
54 |
55 | Obtaining the results should happen while the pool is still running! E.g., the following will deadlock:
56 |
57 | .. code-block::
58 |
59 | with WorkerPool(n_jobs=4) as pool:
60 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)]
61 |
62 | # Will wait forever
63 | results = [async_result.get() for async_result in async_results]
64 |
65 | You can, however, make use of the :meth:`mpire.WorkerPool.stop_and_join()` function to stop the workers and join the
66 | pool. This will make sure that all tasks are completed before the pool exits.
67 |
68 | .. code-block::
69 |
70 | with WorkerPool(n_jobs=4) as pool:
71 | async_results = [pool.apply_async(task, args=(i, i)) for i in range(10)]
72 | pool.stop_and_join()
73 |
74 | # Will not deadlock
75 | results = [async_result.get() for async_result in async_results]
76 |
77 | AsyncResult
78 | -----------
79 |
80 | The :obj:`mpire.async_result.AsyncResult` object has the following convenient methods:
81 |
82 | .. code-block:: python
83 |
84 | with WorkerPool(n_jobs=1) as pool:
85 | async_result = pool.apply_async(task, args=(1, 1))
86 |
87 | # Check if the task is completed
88 | is_completed = async_result.ready()
89 |
90 | # Wait until the task is completed, or until the timeout is reached.
91 | async_result.wait(timeout=10)
92 |
93 | # Get the result of the task. This will block until the task is completed,
94 | # or until the timeout is reached.
95 | result = async_result.get(timeout=None)
96 |
97 | # Check if the task was successful (i.e., did not raise an exception).
98 | # This will raise an exception if the task is not completed yet.
99 | is_successful = async_result.successful()
100 |
101 | Callbacks
102 | ---------
103 |
104 | Each ``apply`` function has a ``callback`` and ``error_callback`` argument. These are functions which are called when
105 | the task is finished. The ``callback`` function is called with the result of the task when the task was completed
106 | successfully, and the ``error_callback`` is called with the exception when the task failed.
107 |
108 | .. code-block:: python
109 |
110 | def task(a):
111 | return a + 1
112 |
113 | def callback(result):
114 | print("Task completed successfully with result:", result)
115 |
116 | def error_callback(exception):
117 | print("Task failed with exception:", exception)
118 |
119 | with WorkerPool(n_jobs=1) as pool:
120 | pool.apply(task, 42, callback=callback, error_callback=error_callback)
121 |
122 |
123 | Worker init and exit
124 | --------------------
125 |
126 | As with the ``map`` family of functions, the ``apply`` family of functions also has ``worker_init`` and ``worker_exit``
127 | arguments. These are functions which are called when a worker is started and stopped, respectively. See
128 | :ref:`worker_init_exit` for more information on these functions.
129 |
130 | .. code-block:: python
131 |
132 | def worker_init():
133 | print("Worker started")
134 |
135 | def worker_exit():
136 | print("Worker stopped")
137 |
138 | with WorkerPool(n_jobs=5) as pool:
139 | pool.apply(task, 42, worker_init=worker_init, worker_exit=worker_exit)
140 |
141 | There's a caveat though. When the first ``apply`` or ``apply_async`` function is executed, the entire pool of workers
142 | is started. This means that in the above example all five workers are started, while only one was needed. This also
143 | means that the ``worker_init`` function is set for all those workers at once. This means you cannot have a different
144 | ``worker_init`` function for each apply task. A second, different ``worker_init`` function will simply be ignored.
145 |
146 | Similarly, the ``worker_exit`` function can only be set once as well. Additionally, exit functions are only called when
147 | a worker exits, which in this case translates to when the pool exits. This means that if you call ``apply`` or
148 | ``apply_async`` multiple times, the ``worker_exit`` function is only called once at the end. Use
149 | :meth:`mpire.WorkerPool.stop_and_join()` to stop the workers, which will cause the ``worker_exit`` function to be
150 | triggered for each worker.
151 |
152 |
153 | Timeouts
154 | --------
155 |
156 | The ``apply`` family of functions also has ``task_timeout``, ``worker_init_timeout`` and ``worker_exit_timeout``
157 | arguments. These are timeouts for the task, the ``worker_init`` function and the ``worker_exit`` function, respectively.
158 | They work similarly as those for the ``map`` functions.
159 |
160 | When a single task times out, only that task is cancelled. The other tasks will continue to run. When a worker init or
161 | exit times out, the entire pool is stopped.
162 |
163 | See :ref:`timeouts` for more information.
164 |
--------------------------------------------------------------------------------
/docs/usage/dashboard.rst:
--------------------------------------------------------------------------------
1 | .. _Dashboard:
2 |
3 | Dashboard
4 | =========
5 |
6 | The dashboard allows you to see progress information from a browser. This is convenient when running scripts in a
7 | notebook or screen, if you want to share the progress information with others, or if you want to get real-time worker
8 | insight information.
9 |
10 | The dashboard dependencies are not installed by default. See :ref:`dashboarddep` for more information.
11 |
12 | .. contents:: Contents
13 | :depth: 2
14 | :local:
15 |
16 |
17 | Starting the dashboard
18 | ----------------------
19 |
20 | You can start the dashboard programmatically:
21 |
22 | .. code-block:: python
23 |
24 | from mpire.dashboard import start_dashboard
25 |
26 | # Will return a dictionary with dashboard details
27 | dashboard_details = start_dashboard()
28 | print(dashboard_details)
29 |
30 | which will print:
31 |
32 | .. code-block:: python
33 |
34 | {'dashboard_port_nr': 8080,
35 | 'manager_host': 'localhost',
36 | 'manager_port_nr': 8081}
37 |
38 | This will start a dashboard on your local machine on port 8080. When the port is already in use MPIRE will try the next,
39 | until it finds an unused one. In the rare case that no ports are available up to port 8099 the function will raise an
40 | ``OSError``. By default, MPIRE tries ports 8080-8100. You can override this range by passing on a custom range object:
41 |
42 |
43 | .. code-block:: python
44 |
45 | dashboard_details = start_dashboard(range(9000, 9100))
46 |
47 | The returned dictionary contains the port number that is ultimately chosen. It also contains information on how to
48 | connect to this dashboard remotely.
49 |
50 | Another way of starting a dashboard is by using the bash script (this doesn't work on Windows!):
51 |
52 | .. code-block:: bash
53 |
54 | $ mpire-dashboard
55 |
56 | This will start a dashboard with the connection details printed on screen. It will say something like:
57 |
58 | .. code-block:: bash
59 |
60 | Starting MPIRE dashboard...
61 |
62 | MPIRE dashboard started on http://localhost:8080
63 | Server is listening on localhost:8098
64 | --------------------------------------------------
65 |
66 | The server part corresponds to the ``manager_host`` and ``manager_port_nr`` from the dictionary returned by
67 | :meth:`mpire.dashboard.start_dashboard`. Similarly to earlier, a custom port range can be provided:
68 |
69 | .. code-block:: bash
70 |
71 | $ mpire-dashboard --port-range 9000-9100
72 |
73 | The benefit of starting a dashboard this way is that your dashboard keeps running in case of errors in your script. You
74 | will be able to see what the error was, when it occurred and where it occurred in your code.
75 |
76 |
77 | Connecting to an existing dashboard
78 | -----------------------------------
79 |
80 | If you have started a dashboard elsewhere, you can connect to it using:
81 |
82 | .. code-block:: python
83 |
84 | from mpire.dashboard import connect_to_dashboard
85 |
86 | connect_to_dashboard(manager_port_nr=8081, manager_host='localhost')
87 |
88 | Make sure you use the ``manager_port_nr``, not the ``dashboard_port_nr`` in the examples above.
89 |
90 | You can connect to an existing dashboard on the same, but also on a remote machine (if the ports are open). If
91 | ``manager_host`` is omitted it will fall back to using ``'localhost'``.
92 |
93 |
94 | Using the dashboard
95 | -------------------
96 |
97 | Once connected to a dashboard you don't need to change anything to your code. When you have enabled the use of
98 | a progress bar in your ``map`` call the progress bar will automatically register itself to the dashboard server and show
99 | up, like here:
100 |
101 | .. code-block:: python
102 |
103 | from mpire import WorkerPool
104 | from mpire.dashboard import connect_to_dashboard
105 |
106 | connect_to_dashboard(8099)
107 |
108 | def square(x):
109 | import time
110 | time.sleep(0.01) # To be able to show progress
111 | return x * x
112 |
113 | with WorkerPool(4) as pool:
114 | pool.map(square, range(10000), progress_bar=True)
115 |
116 | This will show something like:
117 |
118 | .. thumbnail:: mpire_dashboard.png
119 | :title: MPIRE dashboard
120 |
121 | You can click on a progress bar row to view details about the function that is called (which has already been done in
122 | the screenshot above).
123 |
124 | It will let you know when a ``KeyboardInterrupt`` signal was send to the running process:
125 |
126 | .. thumbnail:: mpire_dashboard_keyboard_interrupt.png
127 | :title: MPIRE dashboard - KeyboardInterrupt has been raised
128 |
129 | or show the traceback information in case of an exception:
130 |
131 | .. thumbnail:: mpire_dashboard_error.png
132 | :title: MPIRE dashboard - Error traceback
133 |
134 | In case you have enabled :ref:`worker insights` these insights will be shown real-time in the dashboard:
135 |
136 | .. thumbnail:: mpire_dashboard_insights.png
137 | :title: MPIRE dashboard - Worker insights
138 |
139 | Click on the ``Insights (click to expand/collapse)`` to either expand or collapse the insight details.
140 |
141 | The dashboard will refresh automatically every 0.5 seconds.
142 |
143 |
144 | Stack level
145 | -----------
146 |
147 | By default, the dashboard will show information about the function that is called and where it is called from. However,
148 | in some cases where you have wrapped the function in another function, you might be less interested in the wrapper
149 | function and more interested in the function that is calling this wrapper. In such cases you can use
150 | :meth:`mpire.dashboard.set_stacklevel` to set the stack level. This is the number of levels in the stack to go back in
151 | order to find the frame that contains the function that is invoking MPIRE. For example:
152 |
153 | .. code-block:: python
154 |
155 | from mpire import WorkerPool
156 | from mpire.dashboard import set_stacklevel, start_dashboard
157 |
158 | class WorkerPoolWrapper:
159 | def __init__(self, n_jobs, progress_bar=True):
160 | self.n_jobs = n_jobs
161 | self.progress_bar = progress_bar
162 |
163 | def __call__(self, func, data):
164 | with WorkerPool(self.n_jobs) as pool:
165 | return pool.map(func, data, progress_bar=self.progress_bar)
166 |
167 | def square(x):
168 | return x * x
169 |
170 | if __name__ == '__main__':
171 | start_dashboard()
172 | executor = WorkerPoolWrapper(4, progress_bar=True)
173 | set_stacklevel(1) # default
174 | results = executor(square, range(10000))
175 | set_stacklevel(2)
176 | results = executor(square, range(10000))
177 |
178 | When you run this code you will see that the dashboard will show two progress bars. In both cases, the dashboard will
179 | show the ``square`` function as the function that is called. However, in the first case, it will show
180 | ``return pool.map(func, data, progress_bar=self.progress_bar)`` as the line where it is called from. In the second case,
181 | it will show the ``results = executor(square, range(10000))`` line.
182 |
--------------------------------------------------------------------------------
/docs/usage/map/map.rst:
--------------------------------------------------------------------------------
1 | map family of functions
2 | =======================
3 |
4 | .. contents:: Contents
5 | :depth: 2
6 | :local:
7 |
8 | :obj:`mpire.WorkerPool` implements four types of parallel ``map`` functions, being:
9 |
10 | :meth:`mpire.WorkerPool.map`
11 | Blocks until results are ready, results are ordered in the same way as the provided arguments.
12 | :meth:`mpire.WorkerPool.map_unordered`
13 | The same as :meth:`mpire.WorkerPool.map`, but results are ordered by task completion time. Usually faster than
14 | :meth:`mpire.WorkerPool.map`.
15 | :meth:`mpire.WorkerPool.imap`
16 | Lazy version of :meth:`mpire.WorkerPool.map`, returns a generator. The generator will give results back whenever new
17 | results are ready. Results are ordered in the same way as the provided arguments.
18 | :meth:`mpire.WorkerPool.imap_unordered`
19 | The same as :meth:`mpire.WorkerPool.imap`, but results are ordered by task completion time. Usually faster than
20 | :meth:`mpire.WorkerPool.imap`.
21 |
22 | When using a single worker the unordered versions are equivalent to their ordered counterparts.
23 |
24 | Iterable of arguments
25 | ---------------------
26 |
27 | Each ``map`` function should receive a function and an iterable of arguments, where the elements of the iterable can
28 | be single values or iterables that are unpacked as arguments. If an element is a dictionary, the ``(key, value)`` pairs
29 | will be unpacked with the ``**``-operator.
30 |
31 | .. code-block:: python
32 |
33 | def square(x):
34 | return x * x
35 |
36 | with WorkerPool(n_jobs=4) as pool:
37 | # 1. Square the numbers, results should be: [0, 1, 4, 9, 16, 25, ...]
38 | results = pool.map(square, range(100))
39 |
40 | The first example should work as expected, the numbers are simply squared. MPIRE knows how many tasks there are because
41 | a ``range`` object implements the ``__len__`` method (see :ref:`Task chunking`).
42 |
43 | .. code-block:: python
44 |
45 | with WorkerPool(n_jobs=4) as pool:
46 | # 2. Square the numbers, results should be: [0, 1, 4, 9, 16, 25, ...]
47 | # Note: don't execute this, it will take a long time ...
48 | results = pool.map(square, range(int(1e30)), iterable_len=int(1e30), chunk_size=1)
49 |
50 | In the second example the ``1e30`` number is too large for Python: try calling ``len(range(int(1e30)))``, this will
51 | throw an ``OverflowError`` (don't get me started ...). Therefore, we must use the ``iterable_len`` parameter to let
52 | MPIRE know how large the tasks list is. We also have to specify a chunk size here as the chunk size should be lower than
53 | ``sys.maxsize``.
54 |
55 | .. code-block:: python
56 |
57 | def multiply(x, y):
58 | return x * y
59 |
60 | with WorkerPool(n_jobs=4) as pool:
61 | # 3. Multiply the numbers, results should be [0, 101, 204, 309, 416, ...]
62 | for result in pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100):
63 | ...
64 |
65 | The third example shows an example of using multiple function arguments. Note that we use ``imap`` in this example,
66 | which allows us to process the results whenever they come available, not having to wait for all results to be ready.
67 |
68 | .. code-block:: python
69 |
70 | with WorkerPool(n_jobs=4) as pool:
71 | # 4. Multiply the numbers, results should be [0, 101, ...]
72 | for result in pool.imap(multiply, [{'x': 0, 'y': 100}, {'y': 101, 'x': 1}, ...]):
73 | ...
74 |
75 | The final example shows the use of an iterable of dictionaries. The (key, value) pairs are unpacked with the
76 | ``**``-operator, as you would expect. So it doesn't matter in what order the keys are stored. This should work for
77 | ``collection.OrderedDict`` objects as well.
78 |
79 | Circumvent argument unpacking
80 | -----------------------------
81 |
82 | If you want to avoid unpacking and pass the tuples in example 3 or the dictionaries in example 4 as a whole, you can.
83 | We'll continue on example 4, but the workaround for example 3 is similar.
84 |
85 | Suppose we have the following function which expects a dictionary:
86 |
87 | .. code-block:: python
88 |
89 | def multiply_dict(d):
90 | return d['x'] * d['y']
91 |
92 | Then you would have to convert the list of dictionaries to a list of single argument tuples, where each argument is a
93 | dictionary:
94 |
95 | .. code-block:: python
96 |
97 | with WorkerPool(n_jobs=4) as pool:
98 | # Multiply the numbers, results should be [0, 101, ...]
99 | for result in pool.imap(multiply_dict, [({'x': 0, 'y': 100},),
100 | ({'y': 101, 'x': 1},),
101 | ...]):
102 | ...
103 |
104 | There is a utility function available that does this transformation for you:
105 |
106 | .. code-block:: python
107 |
108 | from mpire.utils import make_single_arguments
109 |
110 | with WorkerPool(n_jobs=4) as pool:
111 | # Multiply the numbers, results should be [0, 101, ...]
112 | for result in pool.imap(multiply_dict, make_single_arguments([{'x': 0, 'y': 100},
113 | {'y': 101, 'x': 1}, ...],
114 | generator=False)):
115 | ...
116 |
117 | :meth:`mpire.utils.make_single_arguments` expects an iterable of arguments and converts them to tuples accordingly. The
118 | second argument of this function specifies if you want the function to return a generator or a materialized list. If we
119 | would like to return a generator we would need to pass on the iterable length as well.
120 |
121 | .. _mixing-multiple-map-calls:
122 |
123 | Mixing ``map`` functions
124 | ------------------------
125 |
126 | ``map`` functions cannot be used while another ``map`` function is still running. E.g., the following will raise an
127 | exception:
128 |
129 | .. code-block:: python
130 |
131 | with WorkerPool(n_jobs=4) as pool:
132 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100)
133 | next(imap_results) # We actually have to start the imap function
134 |
135 | # Will raise because the imap function is still running
136 | map_results = pool.map(square, range(100))
137 |
138 | Make sure to first finish the ``imap`` function before starting a new ``map`` function. This holds for all ``map``
139 | functions.
140 |
141 | Not exhausting a lazy ``imap`` function
142 | ---------------------------------------
143 |
144 | If you don't exhaust a lazy ``imap`` function, but do close the pool, the remaining tasks and results will be lost.
145 | E.g., the following will raise an exception:
146 |
147 | .. code-block:: python
148 |
149 | with WorkerPool(n_jobs=4) as pool:
150 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100)
151 | first_result = next(imap_results) # We actually have to start the imap function
152 | pool.terminate()
153 |
154 | # This will raise
155 | results = list(imap_results)
156 |
157 | Similarly, exiting the ``with`` block terminates the pool as well:
158 |
159 | .. code-block:: python
160 |
161 | with WorkerPool(n_jobs=4) as pool:
162 | imap_results = pool.imap(multiply, zip(range(100), range(100, 200)), iterable_len=100)
163 | first_result = next(imap_results) # We actually have to start the imap function
164 |
165 | # This will raise
166 | results = list(imap_results)
167 |
--------------------------------------------------------------------------------
/docs/mpire.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | .. _secret:
4 |
5 | "The Empire"
6 | ============
7 |
8 | .. code-block:: none
9 |
10 | ,ooo888888888888888oooo,
11 | o8888YYYYYY77iiiiooo8888888o
12 | 8888YYYY77iiYY8888888888888888
13 | [88YYY77iiY88888888888888888888]
14 | 88YY7iYY888888888888888888888888
15 | [88YYi 88888888888888888888888888]
16 | i88Yo8888888888888888888888888888i
17 | i] ^^^88888888^^^ o [i
18 | oi8 i o8o i 8io
19 | ,77788o ^^ ,oooo8888888ooo, ^ o88777,
20 | 7777788888888888888888888888888888877777
21 | 77777888888888888888888888888888877777
22 | 77777788888888^7777777^8888888777777
23 | ,oooo888 ooo 88888778888^7777ooooo7777^8887788888 ,o88^^^^888oo
24 | o8888777788[];78 88888888888888888888888888888888888887 7;8^ 888888888oo^88
25 | o888888iii788 ]; o 78888887788788888^;;^888878877888887 o7;[]88888888888888o
26 | 88888877 ii78[]8;7o 7888878^ ^8788^;;;;;;^878^ ^878877 o7;8 ]878888888888888
27 | [88888888887888 87;7oo 777888o8888^;ii;;ii;^888o87777 oo7;7[]8778888888888888
28 | 88888888888888[]87;777oooooooooooooo888888oooooooooooo77;78]88877i78888888888
29 | o88888888888888 877;7877788777iiiiiii;;;;;iiiiiiiii77877i;78] 88877i;788888888
30 | 88^;iiii^88888 o87;78888888888888888888888888888888888887;778] 88877ii;7788888
31 | ;;;iiiii7iiii^ 87;;888888888888888888888888888888888888887;778] 888777ii;78888
32 | ;iiiii7iiiii7iiii77;i88888888888888888888i7888888888888888877;77i 888877777ii78
33 | iiiiiiiiiii7iiii7iii;;;i7778888888888888ii7788888888888777i;;;;iiii 88888888888
34 | i;iiiiiiiiiiii7iiiiiiiiiiiiiiiiiiiiiiiiii8877iiiiiiiiiiiiiiiiiii877 88888
35 | ii;;iiiiiiiiiiiiii;;;ii^^^;;;ii77777788888888888887777iii;; 77777 78
36 | 77iii;;iiiiiiiiii;;;ii;;;;;;;;;^^^^8888888888888888888777ii;; ii7 ;i78
37 | ^ii;8iiiiiiii ';;;;ii;;;;;;;;;;;;;;;;;;^^oo ooooo^^^88888888;;i7 7;788
38 | o ^;;^^88888^ 'i;;;;;;;;;;;;;;;;;;;;;;;;;;;^^^88oo^^^^888ii7 7;i788
39 | 88ooooooooo ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 788oo^;; 7;i888
40 | 887ii8788888 ;;;;;;;ii;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;^87 7;788
41 | 887i8788888^ ;;;;;;;ii;;;;;;;oo;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;,,, ;;888
42 | 87787888888 ;;;;;;;ii;;;;;;;888888oo;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;,,;i788
43 | 87i8788888^ ';;;ii;;;;;;;8888878777ii8ooo;;;;;;;;;;;;;;;;;;;;;;;;;;i788 7
44 | 77i8788888 ioo;;;;;;oo^^ooooo ^7i88^ooooo;;;;;;;;;;;;;;;;;;;;i7888 78
45 | 7i87788888o 7;ii788887i7;7;788888ooooo7888888ooo;;;;;;;;;;;;;;oo ^^^ 78
46 | i; 7888888^ 8888^o;ii778877;7;7888887;;7;7788878;878;; ;;;;;;;i78888o ^
47 | i8 788888 [88888^^ ooo ^^^^^;;77888^^^^;;7787^^^^ ^^;;;; iiii;i78888888
48 | ^8 7888^ [87888 87 ^877i;i8ooooooo8778oooooo888877ii; iiiiiiii788888888
49 | ^^^ [7i888 87;; ^8i;;i7888888888888888887888888 i7iiiiiii88888^^
50 | 87;88 o87;;;;o 87i;;;78888788888888888888^^ o 8ii7iiiiii;;
51 | 87;i8 877;77888o ^877;;;i7888888888888^^ 7888 78iii7iii7iiii
52 | ^87; 877;778888887o 877;;88888888888^ 7ii7888 788oiiiiiiiii
53 | ^ 877;7 7888888887 877i;;8888887ii 87i78888 7888888888
54 | [87;;7 78888888887 87i;;888887i 87ii78888 7888888888]
55 | 877;7 7788888888887 887i;887i^ 87ii788888 78888888888
56 | 87;i8 788888888888887 887ii;;^ 87ii7888888 78888888888
57 | [87;i8 7888888888888887 ^^^^ 87ii77888888 78888888888
58 | 87;;78 7888888888888887ii 87i78888888 778888888888
59 | 87;788 7888888888888887i] 87i78888888 788888888888
60 | [87;88 778888888888888887 7ii78888888 788888888888
61 | 87;;88 78888888888888887] ii778888888 78888888888]
62 | 7;;788 7888888888888888] i7888888888 78888888888'
63 | 7;;788 7888888888888888 'i788888888 78888888888
64 | 7;i788 788888888888888] 788888888 77888888888]
65 | '7;788 778888888888888] [788888888 78888888888'
66 | ';77888 78888888888888 8888888888 7888888888]
67 | 778888 78888888888888 8888888888 7888888888]
68 | 78888 7888888888888] [8888888888 7888888888
69 | 7888 788888888888] 88888888888 788888888]
70 | 778 78888888888] ]888888888 778888888]
71 | oooooo ^88888^ ^88888^^^^^^^^8888]
72 | 87;78888ooooooo8o ,oooooo oo888oooooo
73 | [877;i77888888888] [;78887i8888878i7888;
74 | ^877;;ii7888ii788 ;i777;7788887787;778;
75 | ^87777;;;iiii777 ;77^^^^^^^^^^^^^^^^;;
76 | ^^^^^^^^^ii7] ^ o88888888877iiioo
77 | 77777o [88777777iiiiii;;778
78 | 77777iii 8877iiiii;;;77888888]
79 | 77iiii;8 [77ii;778 788888888888
80 | 7iii;;88 iii;78888 778888888888
81 | 77i;78888] ;;;;i88888 78888888888
82 | ,7;78888888 [;;i788888 7888888888]
83 | i;788888888 ;i7888888 7888888888
84 | ;788888888] i77888888 788888888]
85 | ';88888888' [77888888 788888888]
86 | [[8ooo88] 78888888 788888888
87 | [88888] 78888888 788888888
88 | ^^^ [7888888 77888888]
89 | 88888888 7888887
90 | 77888888 7888887
91 | ;i88888 788888i
92 | ,;;78888 788877i7
93 | ,7;;i;777777i7i;;7
94 | 87778^^^ ^^^^87778
95 | ^^^^ o777777o ^^^
96 | o77777iiiiii7777o
97 | 7777iiii88888iii777
98 | ;;;i7778888888877ii;;
99 | [i77888888^^^^8888877i]
100 | 77888^oooo8888oooo^8887]
101 | [788888888888888888888888]
102 | 88888888888888888888888888
103 | ]8888888^iiiiiiiii^888888]
104 | iiiiiiiiiiiiiiiiiiiiii
105 | ^^^^^^^^^^^^^
--------------------------------------------------------------------------------
/docs/troubleshooting.rst:
--------------------------------------------------------------------------------
1 | Troubleshooting
2 | ===============
3 |
4 | This section describes some known problems that can arise when using MPIRE.
5 |
6 | .. contents:: Contents
7 | :depth: 2
8 | :local:
9 |
10 |
11 | .. _troubleshooting_progress_bar:
12 |
13 | Progress bar issues with Jupyter notebooks
14 | ------------------------------------------
15 |
16 | When using the progress bar in a Jupyter notebook you might encounter some issues. A few of these are described below,
17 | together with possible solutions.
18 |
19 | IProgress not found
20 | ~~~~~~~~~~~~~~~~~~~
21 |
22 | When you something like ``ImportError: IProgress not found. Please update jupyter and ipywidgets.``, this means
23 | ``ipywidgets`` is not installed. You can install it using ``pip``:
24 |
25 | .. code-block:: bash
26 |
27 | pip install ipywidgets
28 |
29 | or conda:
30 |
31 | .. code-block:: bash
32 |
33 | conda install -c conda-forge ipywidgets
34 |
35 | Have a look at the `ipywidgets documentation`_ for more information.
36 |
37 | .. _ipywidgets documentation: https://ipywidgets.readthedocs.io/en/stable/user_install.html
38 |
39 | Widget Javascript not detected
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 |
42 | When you see something like ``Widget Javascript not detected. It may not be enabled properly.``, this means the
43 | Javascript extension is not enabled. You can enable it using the following command before starting your notebook:
44 |
45 | .. code-block:: bash
46 |
47 | jupyter nbextension enable --py --sys-prefix widgetsnbextension
48 |
49 | Note that you have to restart your notebook server after enabling the extension, simply restarting the kernel won't be
50 | enough.
51 |
52 | Unit tests
53 | ----------
54 |
55 | When using the ``'spawn'`` or ``'forkserver'`` method you'll probably run into one or two issues when running
56 | unittests in your own package. One problem that might occur is that your unittests will restart whenever the piece of
57 | code containing such a start method is called, leading to very funky terminal output. To remedy this problem make sure
58 | your ``setup`` call in ``setup.py`` is surrounded by an ``if __name__ == '__main__':`` clause:
59 |
60 | .. code-block:: python
61 |
62 | from setuptools import setup
63 |
64 | if __name__ == '__main__':
65 |
66 | # Call setup and install any dependencies you have inside the if-clause
67 | setup(...)
68 |
69 | See the 'Safe importing of main module' section at caveats_.
70 |
71 | The second problem you might encounter is that the semaphore tracker of multiprocessing will complain when you run
72 | individual (or a selection of) unittests using ``python setup.py test -s tests.some_test``. At the end of the tests you
73 | will see errors like:
74 |
75 | .. code-block:: python
76 |
77 | Traceback (most recent call last):
78 | File ".../site-packages/multiprocess/semaphore_tracker.py", line 132, in main
79 | cache.remove(name)
80 | KeyError: b'/mp-d3i13qd5'
81 | .../site-packages/multiprocess/semaphore_tracker.py:146: UserWarning: semaphore_tracker: There appear to be 58
82 | leaked semaphores to clean up at shutdown
83 | len(cache))
84 | .../site-packages/multiprocess/semaphore_tracker.py:158: UserWarning: semaphore_tracker: '/mp-f45dt4d6': [Errno 2]
85 | No such file or directory
86 | warnings.warn('semaphore_tracker: %r: %s' % (name, e))
87 | ...
88 |
89 | Your unittests will still succeed and run OK. Unfortunately, I've not found a remedy to this problem using
90 | ``python setup.py test`` yet. What you can use instead is something like the following:
91 |
92 | .. code-block:: python
93 |
94 | python -m unittest tests.some_test
95 |
96 | This will work just fine. See the unittest_ documentation for more information.
97 |
98 | .. _caveats: https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods
99 | .. _unittest: https://docs.python.org/3.4/library/unittest.html#command-line-interface
100 |
101 |
102 | Shutting down takes a long time on error
103 | ----------------------------------------
104 |
105 | When you issue a ``KeyboardInterrupt`` or when an error occured in the function that's run in parallel, there are
106 | situations where MPIRE needs a few seconds to gracefully shutdown. This has to do with the fact that in these situations
107 | the task or results queue can be quite full, still. MPIRE drains these queues until they're completely empty, as to
108 | properly shutdown and clean up every communication channel.
109 |
110 | To remedy this issue you can use the ``max_tasks_active`` parameter and set it to ``n_jobs * 2``, or similar. Aside
111 | from the added benefit that the workers can start more quickly, the queues won't get that full anymore and shutting down
112 | will be much quicker. See :ref:`max_active_tasks` for more information.
113 |
114 | When you're using a lazy map function also be sure to iterate through the results, otherwise that queue will be full and
115 | draining it will take a longer time.
116 |
117 | .. _unpickable_tasks:
118 |
119 | Unpicklable tasks/results
120 | -------------------------
121 |
122 | Sometimes you can encounter deadlocks in your code when using MPIRE. When you encounter this, chances are some tasks or
123 | results from your script can't be pickled. MPIRE makes use of multiprocessing queues for inter-process communication and
124 | if your function returns unpicklable results the queue will unfortunately deadlock.
125 |
126 | The only way to remedy this problem in MPIRE would be to manually pickle objects before sending it to a queue and quit
127 | gracefully when encountering a pickle error. However, this would mean objects would always be pickled twice. This would
128 | add a heavy performance penalty and is therefore not an acceptable solution.
129 |
130 | Instead, the user should make sure their tasks and results are always picklable (which in most cases won't be a
131 | problem), or resort to setting ``use_dill=True``. The latter is capable of pickling a lot more exotic types. See
132 | :ref:`use_dill` for more information.
133 |
134 |
135 | AttributeError: Can't get attribute '' on
136 | ---------------------------------------------------------------------------------------
137 |
138 | This error can occur when inside an iPython or Jupyter notebook session and the function to parallelize is defined in
139 | that session. This is often the result of using ``spawn`` as start method (the default on Windows), which starts a new
140 | process without copying the function in question.
141 |
142 | This error is actually related to the :ref:`unpickable_tasks` problem and can be solved in a similar way. I.e., you can
143 | define your function in a file that can be imported by the child process, or you can resort to using ``dill`` by setting
144 | ``use_dill=True``. See :ref:`use_dill` for more information.
145 |
146 |
147 | .. _troubleshooting_windows:
148 |
149 | Windows
150 | -------
151 |
152 | * When using ``dill`` and an exception occurs, or when the exception occurs in an exit function, it can print additional
153 | ``OSError`` messages in the terminal, but they can be safely ignored.
154 | * The ``mpire-dashboard`` script does not work on Windows.
155 |
156 |
157 | .. _troubleshooting_macos:
158 |
159 | macOS
160 | -----
161 |
162 | * When encountering ``OSError: [Errno 24] Too many open files`` errors, use ``ulimit -n `` to increase the
163 | limit of the number of open files. This is required because MPIRE uses file-descriptor based synchronization
164 | primitives and macOS has a very low default limit. For example, MPIRE uses about 190 file descriptors when using 10
165 | workers.
166 | * Pinning of processes to CPU cores is not supported on macOS. This is because macOS does not support the
167 | ``sched_setaffinity`` system call. A warning will be printed when trying to use this feature.
168 |
--------------------------------------------------------------------------------
/mpire/dashboard/utils.py:
--------------------------------------------------------------------------------
1 | import getpass
2 | import inspect
3 | import socket
4 | from functools import partial
5 | from typing import Callable, Dict, List, Sequence, Tuple, Union
6 | import types
7 |
8 | DASHBOARD_FUNCTION_STACKLEVEL = 1
9 |
10 |
11 | def get_two_available_ports(port_range: Sequence) -> Tuple[int, int]:
12 | """
13 | Get two available ports, one from the start and one from the end of the range
14 |
15 | :param port_range: Port range to try. Reverses the list and will then pick the first one available
16 | :raises OSError: If there are not enough ports available
17 | :return: Two available ports
18 | """
19 | def _port_available(port_nr: int) -> bool:
20 | """
21 | Checks if a port is available
22 |
23 | :param port_nr: Port number to check
24 | :return: True if available, False otherwise
25 | """
26 | try:
27 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
28 | s.bind(('', port_nr))
29 | s.close()
30 | return True
31 | except OSError:
32 | return False
33 |
34 | available_ports = set()
35 | for port_nr in port_range:
36 | if _port_available(port_nr):
37 | available_ports.add(port_nr)
38 | break
39 |
40 | for port_nr in reversed(port_range):
41 | if _port_available(port_nr):
42 | available_ports.add(port_nr)
43 | break
44 |
45 | if len(available_ports) != 2:
46 | raise OSError(f"Dashboard Manager Server: there are not enough ports available: {port_range}")
47 |
48 | return tuple(sorted(available_ports))
49 |
50 |
51 | def get_stacklevel() -> int:
52 | """
53 | Gets the stack level to use when obtaining function details (used for the dashboard)
54 |
55 | :return: Stack level
56 | """
57 | return DASHBOARD_FUNCTION_STACKLEVEL
58 |
59 |
60 | def set_stacklevel(stacklevel: int) -> None:
61 | """
62 | Sets the stack level to use when obtaining function details (used for the dashboard)
63 |
64 | :param stacklevel: Stack level
65 | """
66 | global DASHBOARD_FUNCTION_STACKLEVEL
67 | DASHBOARD_FUNCTION_STACKLEVEL = stacklevel
68 |
69 |
70 | def get_function_details(func: Callable) -> Dict[str, Union[str, int]]:
71 | """
72 | Obtain function details, including:
73 |
74 | - function filename
75 | - function line number
76 | - function name
77 | - invoked from filename
78 | - invoked from line number
79 | - invoked code context
80 |
81 | :param func: Function to call each time new task arguments become available. When passing on the worker ID the
82 | function should receive the worker ID as its first argument. If shared objects are provided the function should
83 | receive those as the next argument. If the worker state has been enabled it should receive a state variable as
84 | the next argument
85 | :return: Function details dictionary
86 | """
87 | # Get the frame in which the pool.map(...) was called. We obtain the current stack and skip all frames which
88 | # involve the current mpire module. If the desired stack level is higher than 1, we continue until we've reached
89 | # the desired stack level. We then obtain the code context of that frame.
90 | invoked_frame = None
91 | stacklevel = 0
92 | for frame_info in inspect.stack():
93 | if frame_info.frame.f_globals['__name__'].split('.')[0] != 'mpire' or stacklevel > 0:
94 | invoked_frame = frame_info
95 | stacklevel += 1
96 | if stacklevel == DASHBOARD_FUNCTION_STACKLEVEL:
97 | break
98 |
99 | # Obtain proper code context. Usually the last line of the invoked code is returned, but we want the complete
100 | # code snippet that called this function. That's why we increase the context size and need to find the start and
101 | # ending of the snippet. A context size of 10 should suffice. The end of the snippet is where we encounter the
102 | # line found when context=1 (i.e., what is returned in invoked_frame.code_context). The start is where we see
103 | # something along the lines of `.[i]map[_unordered](`.
104 | code_context = inspect.getframeinfo(invoked_frame.frame, context=10).code_context
105 | if code_context is not None:
106 | code_context = code_context[:code_context.index(invoked_frame.code_context[0]) + 1]
107 | code_context = find_calling_lines(code_context)
108 | invoked_line_no = invoked_frame.lineno - (len(code_context) - 1)
109 | code_context = ' '.join(line.strip() for line in code_context)
110 | else:
111 | invoked_line_no = 'N/A'
112 |
113 | if isinstance(func, partial):
114 | # If we're dealing with a partial, obtain the function within
115 | func = func.func
116 | elif hasattr(func, '__call__') and not isinstance(func, (type, types.FunctionType, types.MethodType)):
117 | # If we're dealing with a callable class instance, use its __call__ method
118 | func = func.__call__
119 |
120 | # We use a try/except block as some constructs don't allow this. E.g., in the case the function is a MagicMock
121 | # (i.e., in unit tests) these inspections will fail
122 | try:
123 | function_filename = inspect.getabsfile(func)
124 | function_line_no = func.__code__.co_firstlineno
125 | function_name = func.__name__
126 | except:
127 | function_filename = 'n/a'
128 | function_line_no = 'n/a'
129 | function_name = 'n/a'
130 |
131 | # Obtain user. This can fail when the current uid refers to a non-existing user, which can happen when running in a
132 | # container as a non-root user. See https://github.com/sybrenjansen/mpire/issues/128.
133 | try:
134 | user = getpass.getuser()
135 | except KeyError:
136 | user = "n/a"
137 |
138 | # Populate details
139 | func_details = {'user': f'{user}@{socket.gethostname()}',
140 | 'function_filename': function_filename,
141 | 'function_line_no': function_line_no,
142 | 'function_name': function_name,
143 | 'invoked_filename': invoked_frame.filename,
144 | 'invoked_line_no': invoked_line_no,
145 | 'invoked_code_context': code_context}
146 |
147 | return func_details
148 |
149 |
150 | def find_calling_lines(code_context: List[str]) -> List[str]:
151 | """
152 | Tries to find the lines corresponding to the calling function
153 |
154 | :param code_context: List of code lines
155 | :return: List of code lines
156 | """
157 | # Traverse the lines in reverse order. We need a closing bracket to indicate the end of the calling function. From
158 | # that point on we work our way backward until we find the corresponding opening bracket. There can be more bracket
159 | # groups in between, so we have to keep counting brackets until we've found the right one.
160 | n_parentheses_groups = 0
161 | found_parentheses_group = False
162 | inside_string = False
163 | inside_string_ch = None
164 | line_nr = 1
165 | for line_nr, line in enumerate(reversed(code_context), start=1):
166 | for ch in reversed(line):
167 |
168 | # If we're inside a string keep ignoring characters until we find the closing string character
169 | if inside_string:
170 | if ch == inside_string_ch:
171 | inside_string = False
172 |
173 | # Check if a string has started
174 | elif ch in {'"', "'"}:
175 | inside_string = True
176 | inside_string_ch = ch
177 |
178 | # Closing parenthesis group
179 | elif ch == ')':
180 | n_parentheses_groups += 1
181 | found_parentheses_group = True
182 |
183 | # Starting parenthesis group
184 | elif ch == '(':
185 | n_parentheses_groups -= 1
186 |
187 | # Check if we've found the corresponding opening bracket
188 | if found_parentheses_group and n_parentheses_groups == 0:
189 | break
190 |
191 | return code_context[-line_nr:]
192 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # Internal variables.
11 | PAPEROPT_a4 = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
14 | # the i18n builder cannot share the environment and doctrees with the others
15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
16 |
17 | .PHONY: help
18 | help:
19 | @echo "Please use \`make ' where is one of"
20 | @echo " html to make standalone HTML files"
21 | @echo " dirhtml to make HTML files named index.html in directories"
22 | @echo " singlehtml to make a single large HTML file"
23 | @echo " pickle to make pickle files"
24 | @echo " json to make JSON files"
25 | @echo " htmlhelp to make HTML files and a HTML help project"
26 | @echo " qthelp to make HTML files and a qthelp project"
27 | @echo " applehelp to make an Apple Help Book"
28 | @echo " devhelp to make HTML files and a Devhelp project"
29 | @echo " epub to make an epub"
30 | @echo " epub3 to make an epub3"
31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
32 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
34 | @echo " text to make text files"
35 | @echo " man to make manual pages"
36 | @echo " texinfo to make Texinfo files"
37 | @echo " info to make Texinfo files and run them through makeinfo"
38 | @echo " gettext to make PO message catalogs"
39 | @echo " changes to make an overview of all changed/added/deprecated items"
40 | @echo " xml to make Docutils-native XML files"
41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
42 | @echo " linkcheck to check all external links for integrity"
43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
44 | @echo " coverage to run coverage check of the documentation (if enabled)"
45 | @echo " dummy to check syntax errors of document sources"
46 |
47 | .PHONY: clean
48 | clean:
49 | rm -rf $(BUILDDIR)/*
50 |
51 | .PHONY: html
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | .PHONY: dirhtml
58 | dirhtml:
59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
60 | @echo
61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
62 |
63 | .PHONY: singlehtml
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | .PHONY: pickle
70 | pickle:
71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
72 | @echo
73 | @echo "Build finished; now you can process the pickle files."
74 |
75 | .PHONY: json
76 | json:
77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
78 | @echo
79 | @echo "Build finished; now you can process the JSON files."
80 |
81 | .PHONY: htmlhelp
82 | htmlhelp:
83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
84 | @echo
85 | @echo "Build finished; now you can run HTML Help Workshop with the" \
86 | ".hhp project file in $(BUILDDIR)/htmlhelp."
87 |
88 | .PHONY: qthelp
89 | qthelp:
90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
91 | @echo
92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lexsys.qhcp"
95 | @echo "To view the help file:"
96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lexsys.qhc"
97 |
98 | .PHONY: applehelp
99 | applehelp:
100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | @echo
102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | @echo "N.B. You won't be able to view it unless you put it in" \
104 | "~/Library/Documentation/Help or install it in your application" \
105 | "bundle."
106 |
107 | .PHONY: devhelp
108 | devhelp:
109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | @echo
111 | @echo "Build finished."
112 | @echo "To view the help file:"
113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/lexsys"
114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lexsys"
115 | @echo "# devhelp"
116 |
117 | .PHONY: epub
118 | epub:
119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | @echo
121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 |
123 | .PHONY: epub3
124 | epub3:
125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | @echo
127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 |
129 | .PHONY: latex
130 | latex:
131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | @echo
133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | "(use \`make latexpdf' here to do that automatically)."
136 |
137 | .PHONY: latexpdf
138 | latexpdf:
139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | @echo "Running LaTeX files through pdflatex..."
141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 |
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | @echo "Running LaTeX files through platex and dvipdfmx..."
148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 |
151 | .PHONY: text
152 | text:
153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | @echo
155 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
156 |
157 | .PHONY: man
158 | man:
159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | @echo
161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 |
163 | .PHONY: texinfo
164 | texinfo:
165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | @echo
167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | @echo "Run \`make' in that directory to run these through makeinfo" \
169 | "(use \`make info' here to do that automatically)."
170 |
171 | .PHONY: info
172 | info:
173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | @echo "Running Texinfo files through makeinfo..."
175 | make -C $(BUILDDIR)/texinfo info
176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 |
178 | .PHONY: gettext
179 | gettext:
180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | @echo
182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 |
184 | .PHONY: changes
185 | changes:
186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | @echo
188 | @echo "The overview file is in $(BUILDDIR)/changes."
189 |
190 | .PHONY: linkcheck
191 | linkcheck:
192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | @echo
194 | @echo "Link check complete; look for any errors in the above output " \
195 | "or in $(BUILDDIR)/linkcheck/output.txt."
196 |
197 | .PHONY: doctest
198 | doctest:
199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | @echo "Testing of doctests in the sources finished, look at the " \
201 | "results in $(BUILDDIR)/doctest/output.txt."
202 |
203 | .PHONY: coverage
204 | coverage:
205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | @echo "Testing of coverage in the sources finished, look at the " \
207 | "results in $(BUILDDIR)/coverage/python.txt."
208 |
209 | .PHONY: xml
210 | xml:
211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | @echo
213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 |
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | @echo
219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 |
221 | .PHONY: dummy
222 | dummy:
223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | @echo
225 | @echo "Build finished. Dummy builder generates no files."
226 |
--------------------------------------------------------------------------------
/mpire/dashboard/dashboard.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import getpass
3 | try:
4 | from importlib.resources import files as resource
5 | except ImportError:
6 | # Python < 3.9 compatibility
7 | from importlib_resources import files as resource
8 | import logging
9 | import os
10 | import signal
11 | import socket
12 | from datetime import datetime
13 | from multiprocessing import Event, Process
14 | from multiprocessing.managers import BaseProxy
15 | from typing import Dict, Optional, Sequence, Tuple, Union
16 |
17 | from flask import Flask, jsonify, render_template, request
18 | from markupsafe import escape
19 | from werkzeug.serving import make_server
20 |
21 | from mpire.dashboard.connection_classes import DashboardStartedEvent
22 | from mpire.dashboard.manager import (DASHBOARD_MANAGER_CONNECTION_DETAILS,
23 | get_manager_client_dicts, shutdown_manager_server, start_manager_server)
24 | from mpire.dashboard.utils import get_two_available_ports
25 |
26 | logger = logging.getLogger(__name__)
27 | logger_werkzeug = logging.getLogger('werkzeug')
28 | logger_werkzeug.setLevel(logging.ERROR)
29 | app = Flask(__name__)
30 | _server_process = None
31 | with open(resource('mpire.dashboard') / 'templates' / 'progress_bar.html', 'r') as fp:
32 | _progress_bar_html = fp.read()
33 |
34 | _DASHBOARD_MANAGER = None
35 | _DASHBOARD_TQDM_DICT = None
36 | _DASHBOARD_TQDM_DETAILS_DICT = None
37 | DASHBOARD_STARTED_EVENT = DashboardStartedEvent()
38 |
39 |
40 | @app.route('/')
41 | def index() -> str:
42 | """
43 | Obtain the index HTML
44 |
45 | :return: HTML
46 | """
47 | # Obtain user. This can fail when the current uid refers to a non-existing user, which can happen when running in a
48 | # container as a non-root user. See https://github.com/sybrenjansen/mpire/issues/128.
49 | try:
50 | user = getpass.getuser()
51 | except KeyError:
52 | user = "n/a"
53 | return render_template('index.html', username=user, hostname=socket.gethostname(),
54 | manager_host=DASHBOARD_MANAGER_CONNECTION_DETAILS.host or 'localhost',
55 | manager_port_nr=DASHBOARD_MANAGER_CONNECTION_DETAILS.port)
56 |
57 |
58 | @app.route('/_progress_bar_update')
59 | def progress_bar_update() -> str:
60 | """
61 | Obtain progress bar updates (should be called through AJAX)
62 |
63 | :return: JSON string containing progress bar updates
64 | """
65 | # As we get updates only when the progress bar is updated we need to fix the 'duration' and 'time remaining' parts
66 | # (time never stops)
67 | now = datetime.now()
68 | result = []
69 | for pb_id in sorted(_DASHBOARD_TQDM_DICT.keys()):
70 | progress = _DASHBOARD_TQDM_DICT.get(pb_id)
71 | if progress['total'] is None:
72 | progress['total'] = '?'
73 | if progress['success'] and progress['n'] != progress['total']:
74 | progress['duration'] = str(now - progress['started_raw']).rsplit('.', 1)[0]
75 | progress['remaining'] = (str(progress['finished_raw'] - now).rsplit('.', 1)[0]
76 | if progress['finished_raw'] is not None and progress['finished_raw'] > now
77 | else '-')
78 | result.append(progress)
79 |
80 | return jsonify(result=result)
81 |
82 |
83 | @app.route('/_progress_bar_new')
84 | def progress_bar_new() -> str:
85 | """
86 | Obtain a piece of HTML for a new progress bar (should be called through AJAX)
87 |
88 | :return: JSON string containing new progress bar HTML
89 | """
90 | pb_id = int(request.args['pb_id'])
91 | has_insights = request.args['has_insights'] == 'true'
92 |
93 | # Obtain progress bar details. Only show the user@host part if it doesn't equal the user@host of this process
94 | # (in case someone connected to this dashboard from another machine or user)
95 | progress_bar_details = _DASHBOARD_TQDM_DETAILS_DICT.get(pb_id)
96 | if progress_bar_details['user'] == f'{getpass.getuser()}@{socket.gethostname()}':
97 | progress_bar_details['user'] = ''
98 | else:
99 | progress_bar_details['user'] = '{}:'.format(progress_bar_details['user'])
100 |
101 | # Create table for worker insights
102 | insights_workers = []
103 | if has_insights:
104 | for worker_id in range(progress_bar_details['n_jobs']):
105 | insights_workers.append(f"
{worker_id}
"
106 | f"
"
107 | f"
"
108 | f"
"
109 | f"
"
110 | f"
"
111 | f"
"
112 | f"
")
113 | insights_workers = "\n".join(insights_workers)
114 |
115 | return jsonify(result=_progress_bar_html.format(id=pb_id, insights_workers=insights_workers,
116 | has_insights='block' if has_insights else 'none',
117 | **{k: escape(v) for k, v in progress_bar_details.items()}))
118 |
119 |
120 | def start_dashboard(port_range: Sequence = range(8080, 8100)) -> Dict[str, Union[int, str]]:
121 | """
122 | Starts a new MPIRE dashboard
123 |
124 | :param port_range: Port range to try.
125 | :return: A dictionary containing the dashboard port number and manager host and port number being used
126 | """
127 | global _server_process, _DASHBOARD_MANAGER
128 |
129 | if not DASHBOARD_STARTED_EVENT.is_set():
130 |
131 | DASHBOARD_STARTED_EVENT.init()
132 |
133 | dashboard_port_nr, manager_port_nr = get_two_available_ports(port_range)
134 |
135 | # Set up manager server
136 | _DASHBOARD_MANAGER = start_manager_server(manager_port_nr)
137 |
138 | # Start flask server
139 | logging.getLogger('werkzeug').setLevel(logging.WARN)
140 | _server_process = Process(target=_run, args=(DASHBOARD_STARTED_EVENT, dashboard_port_nr,
141 | get_manager_client_dicts()),
142 | daemon=True, name='dashboard-process')
143 | _server_process.start()
144 | DASHBOARD_STARTED_EVENT.wait()
145 |
146 | # Return connect information
147 | return {'dashboard_port_nr': dashboard_port_nr,
148 | 'manager_host': DASHBOARD_MANAGER_CONNECTION_DETAILS.host or socket.gethostname(),
149 | 'manager_port_nr': DASHBOARD_MANAGER_CONNECTION_DETAILS.port}
150 |
151 | else:
152 | raise RuntimeError("You already have a running dashboard")
153 |
154 |
155 | @atexit.register
156 | def shutdown_dashboard() -> None:
157 | """ Shuts down the dashboard """
158 | if DASHBOARD_STARTED_EVENT.is_set():
159 | global _server_process, _DASHBOARD_MANAGER, _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT
160 | if _server_process is not None:
161 | # Send SIGINT to the server process, which is the only way to stop it without causing semaphore leaks
162 | os.kill(_server_process.pid, signal.SIGINT)
163 | _server_process.join()
164 | shutdown_manager_server(_DASHBOARD_MANAGER)
165 | _DASHBOARD_MANAGER = None
166 | _DASHBOARD_TQDM_DICT = None
167 | _DASHBOARD_TQDM_DETAILS_DICT = None
168 | DASHBOARD_STARTED_EVENT.reset()
169 |
170 |
171 | def connect_to_dashboard(manager_port_nr: int, manager_host: Optional[Union[bytes, str]] = None) -> None:
172 | """
173 | Connects to an existing MPIRE dashboard
174 |
175 | :param manager_port_nr: Port to use when connecting to a manager
176 | :param manager_host: Host to use when connecting to a manager. If ``None`` it will use localhost
177 | """
178 | global _DASHBOARD_MANAGER, DASHBOARD_MANAGER_CONNECTION_DETAILS
179 |
180 | if DASHBOARD_STARTED_EVENT.is_set():
181 | raise RuntimeError("You're already connected to a running dashboard")
182 |
183 | # Set connection variables so we can connect to the right manager
184 | manager_host = manager_host or "127.0.0.1"
185 | DASHBOARD_MANAGER_CONNECTION_DETAILS.host = manager_host
186 | DASHBOARD_MANAGER_CONNECTION_DETAILS.port = manager_port_nr
187 |
188 | # Try to connect
189 | try:
190 | get_manager_client_dicts()
191 | except ConnectionRefusedError:
192 | raise ConnectionRefusedError("Could not connect to dashboard manager at "
193 | f"{manager_host.decode()}:{manager_port_nr}")
194 |
195 | DASHBOARD_STARTED_EVENT.set()
196 |
197 |
198 | def _run(started: Event, dashboard_port_nr: int, manager_client_dicts: Tuple[BaseProxy, BaseProxy, BaseProxy]) -> None:
199 | """
200 | Starts a dashboard server
201 |
202 | :param started: Event that signals the dashboard server has started
203 | :param manager_host: Dashboard manager host
204 | :param manager_port_nr: Dashboard manager port number
205 | :param dashboard_port_nr: Dashboard port number
206 | """
207 | global _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT
208 | _DASHBOARD_TQDM_DICT, _DASHBOARD_TQDM_DETAILS_DICT, _ = manager_client_dicts
209 |
210 | # Start server
211 | server = make_server('0.0.0.0', dashboard_port_nr, app)
212 | started.set()
213 | logger.info(f"Server started on 0.0.0.0:{dashboard_port_nr}")
214 | server.serve_forever()
215 |
--------------------------------------------------------------------------------
/mpire/dashboard/templates/progress_bar.html:
--------------------------------------------------------------------------------
1 |
2 |
{id}
3 |
- / -
4 |
5 |
6 |
7 |
8 |
9 |
-
10 |
11 | -
12 |
-
13 |
-
14 |
15 |
16 |
17 |
18 |
19 |
20 |
Task details
21 |
Function: {function_name}, on line {function_line_no}
22 | of {user}{function_filename}
23 |
Invoked on line {invoked_line_no} of {invoked_filename},
24 | through {invoked_code_context}
25 |
26 |
27 |
28 |
29 |
30 |
Insights (click to expand)
35 |
36 |
ⓘ Start up time denotes the time to spin up
37 | a worker. Init time is the time a worker spends on the initialization function, when
38 | provided. Waiting time is the time a worker needs to wait for new tasks to come in.
39 | Working time is the time a worker spends on the task at hand. Exit time is the time a
40 | worker spends on the exit function, when provided.
41 |
42 |
Global stats
43 |
44 |
45 |
46 |
47 |
Total
Mean
Std
Ratio (%)
48 |
49 |
50 |
51 |
Start up time
52 |
53 |
54 |
55 |
56 |
57 |
58 |
Init time
59 |
60 |
61 |
62 |
63 |
64 |
65 |
Waiting time
66 |
67 |
68 |
69 |
70 |
71 |
72 |
Working time
73 |
74 |
75 |
76 |
77 |
78 |
79 |
Exit time
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
Task stats
88 |
ⓘ This section shows the top 5 tasks
89 | based on duration and is updated every 2 seconds.
90 |
91 |
92 |
93 |
Time
Arguments
94 |
95 |
96 |
97 |
1.
98 |
99 |
100 |
101 |
102 |
2.
103 |
104 |
105 |
106 |
107 |
3.
108 |
109 |
110 |
111 |
112 |
4.
113 |
114 |
115 |
116 |
117 |
5.
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
Worker stats
128 |
129 |
130 |
131 |
Worker
Tasks completed
132 |
T. start up time
133 |
T. init time
134 |
T. waiting time
135 |
T. working time
136 |
T. exit time
137 |
138 |
139 | {insights_workers}
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------
/mpire/async_result.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import queue
4 | import threading
5 | from enum import Enum, auto
6 | from typing import Any, Callable, Dict, List, Optional, Union
7 |
8 | from mpire.comms import EXIT_FUNC, MAIN_PROCESS
9 |
10 | job_counter = itertools.count()
11 |
12 |
13 | class JobType(Enum):
14 | MAIN = auto()
15 | INIT = auto()
16 | MAP = auto()
17 | EXIT = auto()
18 | APPLY = auto()
19 |
20 |
21 | class AsyncResult:
22 | """Adapted from ``multiprocessing.pool.ApplyResult``."""
23 |
24 | def __init__(
25 | self,
26 | cache: Dict,
27 | callback: Optional[Callable],
28 | error_callback: Optional[Callable],
29 | job_id: Optional[int] = None,
30 | delete_from_cache: bool = True,
31 | timeout: Optional[float] = None,
32 | ) -> None:
33 | """
34 | :param cache: Cache for storing intermediate results
35 | :param callback: Callback function to call when the task is finished. The callback function receives the output
36 | of the function as its argument
37 | :param error_callback: Callback function to call when the task has failed. The callback function receives the
38 | exception as its argument
39 | :param job_id: Job ID of the task. If None, a new job ID is generated
40 | :param delete_from_cache: If True, the result is deleted from the cache when the task is finished
41 | :param timeout: Timeout in seconds for a single task. When the timeout is exceeded, MPIRE will raise a
42 | ``TimeoutError``. Use ``None`` to disable (default)
43 | """
44 | self._cache = cache
45 | self._callback = callback
46 | self._error_callback = error_callback
47 | self._delete_from_cache = delete_from_cache
48 | self._timeout = timeout
49 |
50 | self.type = JobType.APPLY
51 | self.job_id = next(job_counter) if job_id is None else job_id
52 | self._ready_event = threading.Event()
53 | self._success = None
54 | self._value = None
55 | if self.job_id in self._cache:
56 | raise ValueError(f"Job ID {job_id} already exists in cache")
57 | self._cache[self.job_id] = self
58 |
59 | def ready(self) -> bool:
60 | """
61 | :return: Returns True if the task is finished
62 | """
63 | return self._ready_event.is_set()
64 |
65 | def successful(self) -> bool:
66 | """
67 | :return: Returns True if the task has finished successfully
68 | :raises: ValueError if the task is not finished yet
69 | """
70 | if not self.ready():
71 | raise ValueError(f"{self.job_id} is not ready")
72 | return self._success
73 |
74 | def wait(self, timeout: Optional[float] = None) -> None:
75 | """
76 | Wait until the task is finished
77 |
78 | :param timeout: Timeout in seconds. If None, wait indefinitely
79 | """
80 | self._ready_event.wait(timeout)
81 |
82 | def get(self, timeout: Optional[float] = None) -> Any:
83 | """
84 | Wait until the task is finished and return the output of the function
85 |
86 | :param timeout: Timeout in seconds. If None, wait indefinitely
87 | :return: Output of the function
88 | :raises: TimeoutError if the task is not finished within the timeout. When the task has failed, the exception
89 | raised by the function is re-raised
90 | """
91 | self.wait(timeout)
92 | if not self.ready():
93 | raise TimeoutError
94 | if self._success:
95 | return self._value
96 | else:
97 | raise self._value
98 |
99 | def _set(self, success: bool, result: Any) -> None:
100 | """
101 | Set the result of the task and call any callbacks, when provided. This also removes the task from the cache, as
102 | it's no longer needed there. The user should store a reference to the result object
103 |
104 | :param success: True if the task has finished successfully
105 | :param result: Output of the function or the exception raised by the function
106 | """
107 | self._success = success
108 | self._value = result
109 |
110 | if self._callback and self._success:
111 | self._callback(self._value)
112 |
113 | if self._error_callback and not self._success:
114 | self._error_callback(self._value)
115 |
116 | self._ready_event.set()
117 | if self._delete_from_cache:
118 | del self._cache[self.job_id]
119 |
120 |
121 | class UnorderedAsyncResultIterator:
122 | """Stores results of a task and provides an iterator to obtain the results in an unordered fashion"""
123 |
124 | def __init__(
125 | self, cache: Dict, n_tasks: Optional[int], job_id: Optional[int] = None, timeout: Optional[float] = None
126 | ) -> None:
127 | """
128 | :param cache: Cache for storing intermediate results
129 | :param n_tasks: Number of tasks that will be executed. If None, we don't know the lenght yet
130 | :param job_id: Job ID of the task. If None, a new job ID is generated
131 | :param timeout: Timeout in seconds for a single task. When the timeout is exceeded, MPIRE will raise a
132 | ``TimeoutError``. Use ``None`` to disable (default)
133 | """
134 | self._cache = cache
135 | self._n_tasks = None
136 | self._timeout = timeout
137 |
138 | self.type = JobType.MAP
139 | self.job_id = next(job_counter) if job_id is None else job_id
140 | self._items = collections.deque()
141 | self._condition = threading.Condition(lock=threading.Lock())
142 | self._n_received = 0
143 | self._n_returned = 0
144 | self._exception = None
145 | self._got_exception = threading.Event()
146 | if self.job_id in self._cache:
147 | raise ValueError(f"Job ID {job_id} already exists in cache")
148 | self._cache[self.job_id] = self
149 |
150 | if n_tasks is not None:
151 | self.set_length(n_tasks)
152 |
153 | def __iter__(self) -> "UnorderedAsyncResultIterator":
154 | return self
155 |
156 | def next(self, block: bool = True, timeout: Optional[float] = None) -> Any:
157 | """
158 | Obtain the next unordered result for the task
159 |
160 | :param block: If True, wait until the next result is available. If False, raise queue.Empty if no result is
161 | available
162 | :param timeout: Timeout in seconds. If None, wait indefinitely
163 | :return: The next result
164 | """
165 | if self._items:
166 | self._n_returned += 1
167 | return self._items.popleft()
168 |
169 | if self._n_tasks is not None and self._n_returned == self._n_tasks:
170 | raise StopIteration
171 |
172 | if not block:
173 | raise queue.Empty
174 |
175 | # We still expect results. Wait until the next result is available
176 | with self._condition:
177 | while not self._items:
178 | timed_out = not self._condition.wait(timeout=timeout)
179 | if timed_out:
180 | raise queue.Empty
181 | if self._n_tasks is not None and self._n_returned == self._n_tasks:
182 | raise StopIteration
183 |
184 | self._n_returned += 1
185 | return self._items.popleft()
186 |
187 | __next__ = next
188 |
189 | def wait(self) -> None:
190 | """
191 | Wait until all results are available
192 | """
193 | with self._condition:
194 | while self._n_tasks is None or self._n_received < self._n_tasks:
195 | self._condition.wait()
196 |
197 | def _set(self, success: bool, result: Any) -> None:
198 | """
199 | Set the result of the task
200 |
201 | :param success: True if the task has finished successfully
202 | :param result: Output of the function or the exception raised by the function
203 | """
204 | if success:
205 | # Add the result to the queue and notify the iterator
206 | self._n_received += 1
207 | self._items.append(result)
208 | with self._condition:
209 | self._condition.notify()
210 | else:
211 | self._exception = result
212 | self._got_exception.set()
213 |
214 | def set_length(self, length: int) -> None:
215 | """
216 | Set the length of the iterator
217 |
218 | :param length: Length of the iterator
219 | """
220 | if self._n_tasks is not None:
221 | if self._n_tasks != length:
222 | raise ValueError(
223 | f"Length of iterator has already been set to {self._n_tasks}, but is now set to {length}"
224 | )
225 | # Length has already been set. No need to do anything
226 | return
227 |
228 | with self._condition:
229 | self._n_tasks = length
230 | self._condition.notify()
231 |
232 | def get_exception(self) -> Exception:
233 | """
234 | :return: The exception raised by the function
235 | """
236 | self._got_exception.wait()
237 | return self._exception
238 |
239 | def remove_from_cache(self) -> None:
240 | """
241 | Remove the iterator from the cache
242 | """
243 | del self._cache[self.job_id]
244 |
245 |
246 | class AsyncResultWithExceptionGetter(AsyncResult):
247 |
248 | def __init__(self, cache: Dict, job_id: int) -> None:
249 | super().__init__(
250 | cache, callback=None, error_callback=None, job_id=job_id, delete_from_cache=False, timeout=None
251 | )
252 | self.type = JobType.MAIN if job_id == MAIN_PROCESS else JobType.INIT
253 |
254 | def get_exception(self) -> Exception:
255 | """
256 | :return: The exception raised by the function
257 | """
258 | self.wait()
259 | return self._value
260 |
261 | def reset(self) -> None:
262 | """
263 | Reset the result object
264 | """
265 | self._success = None
266 | self._value = None
267 | self._ready_event.clear()
268 |
269 |
270 | class UnorderedAsyncExitResultIterator(UnorderedAsyncResultIterator):
271 |
272 | def __init__(self, cache: Dict) -> None:
273 | super().__init__(cache, n_tasks=None, job_id=EXIT_FUNC, timeout=None)
274 | self.type = JobType.EXIT
275 |
276 | def get_results(self) -> List[Any]:
277 | """
278 | :return: List of exit results
279 | """
280 | return list(self._items)
281 |
282 | def reset(self) -> None:
283 | """
284 | Reset the result object
285 | """
286 | self._n_tasks = None
287 | self._items.clear()
288 | self._n_received = 0
289 | self._n_returned = 0
290 | self._exception = None
291 | self._got_exception.clear()
292 |
293 |
294 | AsyncResultType = Union[
295 | AsyncResult, AsyncResultWithExceptionGetter, UnorderedAsyncResultIterator, UnorderedAsyncExitResultIterator
296 | ]
297 |
--------------------------------------------------------------------------------
/mpire/dashboard/static/refresh.js:
--------------------------------------------------------------------------------
1 | // Make ajax calls synchronous. Note that this is deprecated because of possible user experience problems, but in our
2 | // case this doesn't influence it. It actually makes it better
3 | $.ajaxSetup({
4 | async: false
5 | });
6 |
7 |
8 | // Enable tooltips (uses jQuery)
9 | $(function() {
10 | $(document).tooltip();
11 | });
12 |
13 |
14 | var progress_bar_animation_duration = 450;
15 | var refresh_interval = 500;
16 | var completed_pb_ids = {};
17 | refresh();
18 | setInterval(refresh, refresh_interval);
19 |
20 |
21 | // Update progress bar given an ID and a progress (between 0-1)
22 | function update_progress_bar(pb_id, progress)
23 | {
24 | $("#pb_" + pb_id).stop().css("width", $("#pb_" + pb_id).width()).animate(
25 | {
26 | width: (progress * 100) + '%',
27 | easing: 'linear'
28 | },
29 | {
30 | duration: progress_bar_animation_duration,
31 | start: function(promise)
32 | {
33 | // Set text
34 | if (progress * $("#pb_" + pb_id + "_container").width() != 0)
35 | {
36 | $(this).text(Math.round(progress * 100) + '%');
37 | }
38 | }
39 | });
40 | }
41 |
42 |
43 | /**
44 | * http://stackoverflow.com/questions/2353211/hsl-to-rgb-color-conversion
45 | *
46 | * Converts an HSL color value to RGB. Conversion formula
47 | * adapted from http://en.wikipedia.org/wiki/HSL_color_space.
48 | * Assumes h, s, and l are contained in the set [0, 1] and
49 | * returns r, g, and b in the set [0, 255].
50 | *
51 | * @param Number h The hue
52 | * @param Number s The saturation
53 | * @param Number l The lightness
54 | * @return Array The RGB representation
55 | */
56 | function hslToRgb(h, s, l)
57 | {
58 | var r, g, b;
59 |
60 | if(s == 0){
61 | r = g = b = l; // achromatic
62 | }else{
63 | function hue2rgb(p, q, t){
64 | if(t < 0) t += 1;
65 | if(t > 1) t -= 1;
66 | if(t < 1/6) return p + (q - p) * 6 * t;
67 | if(t < 1/2) return q;
68 | if(t < 2/3) return p + (q - p) * (2/3 - t) * 6;
69 | return p;
70 | }
71 |
72 | var q = l < 0.5 ? l * (1 + s) : l + s - l * s;
73 | var p = 2 * l - q;
74 | r = hue2rgb(p, q, h + 1/3);
75 | g = hue2rgb(p, q, h);
76 | b = hue2rgb(p, q, h - 1/3);
77 | }
78 |
79 | return [Math.floor(r * 255), Math.floor(g * 255), Math.floor(b * 255)];
80 | }
81 |
82 |
83 | // convert a number to a color using hsl
84 | function numberToColorHsl(i)
85 | {
86 | // as the function expects a value between 0 and 1, and red = 0° and green = 120°
87 | // we convert the input to the appropriate hue value
88 | var hue = i * 1.2 / 3.6;
89 | // we convert hsl to rgb (saturation 100%, lightness 50%)
90 | var rgb = hslToRgb(hue, 1, .7);
91 | // we format to css value and return
92 | return 'rgb(' + rgb[0] + ',' + rgb[1] + ',' + rgb[2] + ')';
93 | }
94 |
95 |
96 | // Hide part of a text if it's too long and add read more/read less functionality
97 | function AddReadMore(tag_id, char_limit, text)
98 | {
99 | // Only update when the text changes. We strip the ' ... Read more'/' ... Read less' parts (14 characters)
100 | var original_text = $("#" + tag_id).text();
101 | if (original_text.substring(0, original_text.length - 14) == text)
102 | return;
103 |
104 | if (text.length > char_limit)
105 | {
106 | var first_part = text.substring(0, char_limit);
107 | var second_part = text.substring(char_limit, text.length);
108 | var new_html = first_part + "" + second_part + " " +
109 | "... Read more";
112 | }
113 | else
114 | {
115 | var new_html = text;
116 | }
117 |
118 | $("#" + tag_id).html(new_html);
119 | }
120 |
121 |
122 | // Refresh contents
123 | function refresh()
124 | {
125 | $.getJSON($SCRIPT_ROOT + '/_progress_bar_update', {}, function(data)
126 | {
127 | var i, worker_id, worker_prefix, task_idx, task_prefix;
128 | for (i = 0; i < data.result.length; i++)
129 | {
130 | var pb = data.result[i];
131 | var is_new = false;
132 |
133 | // Check if progress-bar exists
134 | if ($('#pb_' + pb.id).length == 0)
135 | {
136 | // If not, request new HTML for progress bar and prepend it to table
137 | $.getJSON($SCRIPT_ROOT + '/_progress_bar_new',
138 | {pb_id: pb.id, has_insights: !$.isEmptyObject(pb.insights)}, function(new_data)
139 | {
140 | $('#progress-table > tbody').prepend(new_data.result);
141 | });
142 |
143 | is_new = true;
144 | }
145 |
146 | // If it's already completed, do nothing, except when this is a new progress bar (e.g., when refreshed) or
147 | // when the success status has changed
148 | if (pb.id in completed_pb_ids && completed_pb_ids[pb.id] === pb.success && !is_new)
149 | {
150 | continue;
151 | }
152 |
153 | // Set new progress
154 | update_progress_bar(pb.id, pb.percentage);
155 | $('#pb_' + pb.id + '_n').text(pb.n);
156 | $('#pb_' + pb.id + '_total').text(pb.total);
157 | $('#pb_' + pb.id + '_started').text(pb.started);
158 | $('#pb_' + pb.id + '_duration').text(pb.duration);
159 | $('#pb_' + pb.id + '_remaining').text(pb.remaining);
160 | $('#pb_' + pb.id + '_finished').text(pb.finished);
161 |
162 | // Set insights, if available
163 | if (!$.isEmptyObject(pb.insights))
164 | {
165 | $('#pb_' + pb.id + '_insights_total_start_up_time').text(pb.insights['total_start_up_time']);
166 | $('#pb_' + pb.id + '_insights_start_up_time_mean').text(pb.insights['start_up_time_mean']);
167 | $('#pb_' + pb.id + '_insights_start_up_time_std').text(pb.insights['start_up_time_std']);
168 | $('#pb_' + pb.id + '_insights_start_up_ratio').text((pb.insights['start_up_ratio'] * 100.).toFixed(2))
169 | .css('color', numberToColorHsl(1.0 - pb.insights['start_up_ratio']));
170 | $('#pb_' + pb.id + '_insights_total_init_time').text(pb.insights['total_init_time']);
171 | $('#pb_' + pb.id + '_insights_init_time_mean').text(pb.insights['init_time_mean']);
172 | $('#pb_' + pb.id + '_insights_init_time_std').text(pb.insights['init_time_std']);
173 | $('#pb_' + pb.id + '_insights_init_ratio').text((pb.insights['init_ratio'] * 100.).toFixed(2))
174 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio']));
175 | $('#pb_' + pb.id + '_insights_total_waiting_time').text(pb.insights['total_waiting_time']);
176 | $('#pb_' + pb.id + '_insights_waiting_time_mean').text(pb.insights['waiting_time_mean']);
177 | $('#pb_' + pb.id + '_insights_waiting_time_std').text(pb.insights['waiting_time_std']);
178 | $('#pb_' + pb.id + '_insights_waiting_ratio').text((pb.insights['waiting_ratio'] * 100.).toFixed(2))
179 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio']));
180 | $('#pb_' + pb.id + '_insights_total_working_time').text(pb.insights['total_working_time']);
181 | $('#pb_' + pb.id + '_insights_working_time_mean').text(pb.insights['working_time_mean']);
182 | $('#pb_' + pb.id + '_insights_working_time_std').text(pb.insights['working_time_std']);
183 | $('#pb_' + pb.id + '_insights_working_ratio').text((pb.insights['working_ratio'] * 100.).toFixed(2))
184 | .css('color', numberToColorHsl(pb.insights['working_ratio']));
185 | $('#pb_' + pb.id + '_insights_total_exit_time').text(pb.insights['total_exit_time']);
186 | $('#pb_' + pb.id + '_insights_exit_time_mean').text(pb.insights['exit_time_mean']);
187 | $('#pb_' + pb.id + '_insights_exit_time_std').text(pb.insights['exit_time_std']);
188 | $('#pb_' + pb.id + '_insights_exit_ratio').text((pb.insights['exit_ratio'] * 100.).toFixed(2))
189 | .css('color', numberToColorHsl(1.0 - pb.insights['waiting_ratio']));
190 | for (worker_id = 0; worker_id < pb.insights['n_completed_tasks'].length; worker_id++)
191 | {
192 | worker_prefix = '#pb_' + pb.id + '_insights_worker_' + worker_id;
193 | $(worker_prefix + '_tasks_completed').text(pb.insights['n_completed_tasks'][worker_id]);
194 | $(worker_prefix + '_start_up_time').text(pb.insights['start_up_time'][worker_id]);
195 | $(worker_prefix + '_init_time').text(pb.insights['init_time'][worker_id]);
196 | $(worker_prefix + '_waiting_time').text(pb.insights['waiting_time'][worker_id]);
197 | $(worker_prefix + '_working_time').text(pb.insights['working_time'][worker_id]);
198 | $(worker_prefix + '_exit_time').text(pb.insights['exit_time'][worker_id]);
199 | }
200 | for (task_idx = 0; task_idx < pb.insights['top_5_max_task_durations'].length; task_idx++)
201 | {
202 | task_prefix = '#pb_' + pb.id + '_insights_task_' + task_idx;
203 | $(task_prefix).show();
204 | $(task_prefix + '_duration').text(pb.insights['top_5_max_task_durations'][task_idx]);
205 | AddReadMore("pb_" + pb.id + "_insights_task_" + task_idx + "_args", 70,
206 | pb.insights['top_5_max_task_args'][task_idx]);
207 | }
208 | }
209 |
210 | if (pb.success)
211 | {
212 | // Success if we're at 100%
213 | if (pb.n == pb.total)
214 | {
215 | $('#pb_' + pb.id).addClass('bg-success');
216 |
217 | // Make lightsaber light up
218 | if (!(pb.id in completed_pb_ids))
219 | {
220 | $('.lightsaber').animate({color: '#00FF00'}, 300).animate({color: '#dc3545'}, 300);
221 | }
222 | completed_pb_ids[pb.id] = true;
223 | }
224 | }
225 | else
226 | {
227 | // Danger if we've encountered a failure
228 | $('#pb_' + pb.id).addClass('bg-danger');
229 |
230 | // Add traceback info
231 | $('#pb_' + pb.id + '_traceback').show().text(pb.traceback);
232 |
233 | // Add a flashing flash
234 | $('#pb_' + pb.id + '_flash').fadeIn(200).fadeOut(200).fadeIn(200).fadeOut(200).fadeIn(200);
235 |
236 | // Make lightsaber light up
237 | if (!(pb.id in completed_pb_ids))
238 | {
239 | $('.lightsaber').animate({color: '#000000'}, 300).animate({color: '#dc3545'}, 300);
240 | }
241 | completed_pb_ids[pb.id] = false;
242 | }
243 | }
244 | });
245 | return false;
246 | }
247 |
--------------------------------------------------------------------------------
/mpire/tqdm_utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import warnings
3 | from contextlib import redirect_stderr, redirect_stdout
4 | from io import StringIO
5 | from multiprocessing import Lock as mp_Lock
6 | from multiprocessing.synchronize import Lock as LockType
7 | from typing import Optional, Tuple, Type
8 |
9 | from tqdm import TqdmExperimentalWarning, tqdm as tqdm_std
10 | from tqdm.notebook import tqdm as tqdm_notebook
11 | try:
12 | from tqdm.rich import tqdm as tqdm_rich
13 | RICH_AVAILABLE = True
14 | except ImportError:
15 | tqdm_rich = None
16 | RICH_AVAILABLE = False
17 |
18 | from mpire.context import mp_dill
19 | from mpire.signal import DisableKeyboardInterruptSignal
20 | from mpire.utils import create_sync_manager
21 |
22 | PROGRESS_BAR_DEFAULT_STYLE = 'std'
23 | TqdmConnectionDetails = Tuple[LockType, "TqdmPositionRegister"]
24 |
25 | logger = logging.getLogger(__name__)
26 |
27 |
28 | class TqdmMpire:
29 | """ Abstract class for tqdm classes that are used in mpire"""
30 |
31 | main_progress_bar = False
32 |
33 | @classmethod
34 | def set_main_progress_bar(cls, main: bool) -> None:
35 | """
36 | Marks this progress bar as the main progress bar
37 |
38 | :param main: Whether this progress bar is the main progress bar
39 | """
40 | cls.main_progress_bar = main
41 |
42 | def update(self, n: int = 1) -> None:
43 | """
44 | Update the progress bar. Forces a final refresh when the progress bar is finished.
45 |
46 | :param n: Number of steps to update the progress bar with
47 | """
48 | super().update(n)
49 | if self.n == self.total:
50 | self.final_refresh()
51 |
52 | def update_total(self, total: int) -> None:
53 | """
54 | Update the total number of steps of the progress bar. Forces a refresh to show the new total.
55 |
56 | :param total: Total number of steps
57 | """
58 | self.total = total
59 | self.refresh()
60 |
61 | def final_refresh(self, highest_progress_bar_position: Optional[int] = None) -> None:
62 | """
63 | Final refresh of the progress bar. This function is called when the progress bar is finished. It should
64 | perform a final refresh of the progress bar and close it.
65 |
66 | :param highest_progress_bar_position: Highest progress bar position in case of multiple progress bars
67 | """
68 | self.refresh()
69 | self.close()
70 |
71 | @classmethod
72 | def check_options(cls, options: dict) -> None:
73 | """
74 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the
75 | options are invalid.
76 |
77 | :param options: Options passed to the tqdm class
78 | """
79 | with redirect_stderr(StringIO()), redirect_stdout(StringIO()):
80 | cls(**options)
81 |
82 |
83 | class TqdmMpireStd(TqdmMpire, tqdm_std):
84 | """ A tqdm class that shows a standard progress bar. """
85 |
86 | def final_refresh(self, highest_progress_bar_position: Optional[int] = None) -> None:
87 | """
88 | Final refresh of the progress bar. This function is called when the progress bar is finished. It should
89 | perform a final refresh.
90 |
91 | When we're using a standard progress bar and this is the main progress bar, we add as many newlines as the
92 | highest progress bar position, such that new output is added after the progress bars.
93 |
94 | :param highest_progress_bar_position: Highest progress bar position in case of multiple progress bars
95 | """
96 | self.refresh()
97 | self.disable = True
98 | if self.main_progress_bar and highest_progress_bar_position is not None:
99 | self.fp.write('\n' * (highest_progress_bar_position + 1))
100 |
101 |
102 | if RICH_AVAILABLE:
103 | class TqdmMpireRich(TqdmMpire, tqdm_rich):
104 | """ A tqdm class that shows a rich progress bar. """
105 |
106 | @classmethod
107 | def check_options(cls, options: dict) -> None:
108 | """
109 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the
110 | options are invalid.
111 |
112 | For rich progress bars we disable the progress bar, because we don't want to show the progress bar in the
113 | terminal. For some reason, redirecting stdout/stderr makes the rich progress bar not work properly afterwards.
114 |
115 | :param options: Options passed to the tqdm class
116 | """
117 | options = options.copy()
118 | if "options" not in options:
119 | options["options"] = {"disable": True}
120 | else:
121 | options["options"]["disable"] = True
122 | with warnings.catch_warnings():
123 | warnings.simplefilter("ignore", TqdmExperimentalWarning)
124 | cls(**options)
125 |
126 | def display(self, *args, **kwargs) -> None:
127 | """
128 | Display the progress bar and force a refresh of the widget. The refresh is needed to show the final update.
129 | """
130 | super().display(*args, **kwargs)
131 | self._prog.refresh()
132 |
133 | else:
134 | class TqdmMpireRich(TqdmMpire):
135 |
136 | def __init__(self, *args, **kwargs) -> None:
137 | raise ImportError("rich is not installed. Please install rich to use rich progress bars.")
138 |
139 |
140 | class TqdmMpireNotebook(TqdmMpire, tqdm_notebook):
141 | """ A tqdm class that shows a GUI widget in notebooks. """
142 |
143 | def __init__(self, *args, **kwargs) -> None:
144 | """
145 | In case we're running tqdm in a notebook we need to apply a dirty hack to get progress bars working.
146 | Solution adapted from https://github.com/tqdm/tqdm/issues/485#issuecomment-473338308
147 | """
148 | if not self.main_progress_bar:
149 | print(' ', end='', flush=True)
150 | super().__init__(*args, **kwargs)
151 |
152 | def update_total(self, total: int) -> None:
153 | """
154 | Update the total number of steps of the progress bar. Forces a refresh to show the new total.
155 |
156 | In a notebook we also need to update the max value of the progress bar widget.
157 |
158 | :param total: Total number of steps
159 | """
160 | self.container.children[1].max = total
161 | return super().update_total(total)
162 |
163 | @classmethod
164 | def check_options(cls, options: dict) -> None:
165 | """
166 | Check whether the options passed to the tqdm class are valid. This function should raise an exception when the
167 | options are invalid.
168 |
169 | For notebook progress bars we set display to false, because redirecting stdout/stderr doesn't work for notebook
170 | widgets.
171 |
172 | :param options: Options passed to the tqdm class
173 | """
174 | options = options.copy()
175 | options["display"] = False
176 | cls(**options)
177 |
178 |
179 | class TqdmMpireDashboardOnly(TqdmMpire, tqdm_std):
180 | """
181 | A tqdm class that gives no output, but will still update the internal progress-bar attributes that the
182 | dashboard relies on.
183 | """
184 |
185 | def __init__(self, *args, **kwargs) -> None:
186 | """ Set the file to a StringIO object so that no output is given """
187 | kwargs["file"] = StringIO()
188 | super().__init__(*args, **kwargs)
189 |
190 | def display(self, *args, **kwargs) -> None:
191 | """ Don't display anything """
192 | pass
193 |
194 |
195 | def get_tqdm(progress_bar_style: Optional[str]) -> Type[TqdmMpire]:
196 | """
197 | Get the tqdm class to use based on the progress bar style
198 |
199 | :param progress_bar_style: The progress bar style to use. Can be one of ``None``, ``std``, or ``notebook``
200 | :return: A tuple containing the tqdm class to use and a boolean indicating whether the progress bar is a notebook
201 | widget
202 | """
203 | if progress_bar_style is None:
204 | progress_bar_style = PROGRESS_BAR_DEFAULT_STYLE
205 | if progress_bar_style == 'std':
206 | return TqdmMpireStd
207 | elif progress_bar_style == 'rich':
208 | return TqdmMpireRich
209 | elif progress_bar_style == 'notebook':
210 | return TqdmMpireNotebook
211 | elif progress_bar_style == 'dashboard':
212 | return TqdmMpireDashboardOnly
213 | else:
214 | raise ValueError(f'Invalid progress bar style: {progress_bar_style}. '
215 | f'Use either None (=default), "std", or "notebook"')
216 |
217 |
218 | class TqdmPositionRegister:
219 |
220 | """
221 | Class that keeps track of all the registered progress bar positions. Needed to properly display multiple tqdm
222 | progress bars
223 | """
224 |
225 | def __init__(self, use_dill: bool) -> None:
226 | """
227 | :param use_dill: Whether dill is used as serialization library
228 | """
229 | self.lock = mp_dill.Lock() if use_dill else mp_Lock()
230 | self.highest_position = None
231 |
232 | def register_progress_bar_position(self, position: int) -> bool:
233 | """
234 | Register new progress bar position. Returns True when it's the first one to register
235 |
236 | :param position: Progress bar position
237 | :return: Whether this progress bar is the first one to register
238 | """
239 | with self.lock:
240 | first_one = self.highest_position is None
241 | if self.highest_position is None or position > self.highest_position:
242 | self.highest_position = position
243 |
244 | return first_one
245 |
246 | def get_highest_progress_bar_position(self) -> Optional[int]:
247 | """
248 | Obtain the highest registered progress bar position
249 |
250 | :return: Highest progress bar position
251 | """
252 | with self.lock:
253 | return self.highest_position
254 |
255 | def reset_progress_bar_positions(self) -> None:
256 | """
257 | Reset the registered progress bar positions
258 | """
259 | with self.lock:
260 | self.highest_position = None
261 |
262 |
263 | class TqdmManager:
264 |
265 | """Tqdm manager wrapper for syncing multiple progress bars, independent of process start method used."""
266 |
267 | MANAGER = None
268 | LOCK = None
269 | POSITION_REGISTER = None
270 |
271 | @classmethod
272 | def start_manager(cls, use_dill: bool) -> bool:
273 | """
274 | Sets up and starts the tqdm manager
275 |
276 | :param use_dill: Whether dill is used as serialization library
277 | :return: Whether the manager was started
278 | """
279 | # Don't do anything when there's already a tqdm manager that has started
280 | if cls.LOCK is not None:
281 | return False
282 |
283 | logger.debug("Starting TQDM manager")
284 |
285 | # Create manager
286 | with DisableKeyboardInterruptSignal():
287 | cls.MANAGER = create_sync_manager(use_dill)
288 | cls.MANAGER.register('TqdmPositionRegister', TqdmPositionRegister)
289 | cls.MANAGER.start()
290 | cls.LOCK = cls.MANAGER.Lock()
291 | cls.POSITION_REGISTER = cls.MANAGER.TqdmPositionRegister(use_dill)
292 |
293 | return True
294 |
295 | @classmethod
296 | def stop_manager(cls) -> None:
297 | """
298 | Stops the tqdm manager
299 | """
300 | cls.MANAGER.shutdown()
301 | cls.MANAGER = None
302 | cls.LOCK = None
303 | cls.POSITION_REGISTER = None
304 |
305 | @classmethod
306 | def get_connection_details(cls) -> TqdmConnectionDetails:
307 | """
308 | Obtains the connection details of the tqdm manager. These details are needed to be passed on to child process
309 | when the start method is either forkserver or spawn.
310 |
311 | :return: TQDM lock and position register
312 | """
313 | return cls.LOCK, cls.POSITION_REGISTER
314 |
315 | @classmethod
316 | def set_connection_details(cls, tqdm_connection_details: TqdmConnectionDetails) -> None:
317 | """
318 | Sets the tqdm connection details.
319 |
320 | :param tqdm_connection_details: TQDM lock and position register
321 | """
322 | cls.LOCK, cls.POSITION_REGISTER = tqdm_connection_details
323 |
--------------------------------------------------------------------------------