├── .github └── workflows │ └── scorecard.yml ├── .gitignore ├── LICENSE ├── README.md ├── SECURITY.md ├── SMP.py ├── meta.yaml ├── setup.py └── smp ├── __init__.py └── __main__.py /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | schedule: 13 | - cron: '23 11 * * 1' 14 | push: 15 | branches: [ "master" ] 16 | 17 | # Declare default permissions as read only. 18 | permissions: read-all 19 | 20 | jobs: 21 | analysis: 22 | name: Scorecard analysis 23 | runs-on: ubuntu-latest 24 | permissions: 25 | # Needed to upload the results to code-scanning dashboard. 26 | security-events: write 27 | # Needed to publish results and get a badge (see publish_results below). 28 | id-token: write 29 | # Uncomment the permissions below if installing in a private repository. 30 | # contents: read 31 | # actions: read 32 | 33 | steps: 34 | - name: "Checkout code" 35 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 36 | with: 37 | persist-credentials: false 38 | 39 | - name: "Run analysis" 40 | uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 41 | with: 42 | results_file: results.sarif 43 | results_format: sarif 44 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 45 | # - you want to enable the Branch-Protection check on a *public* repository, or 46 | # - you are installing Scorecard on a *private* repository 47 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. 48 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 49 | 50 | # Public repositories: 51 | # - Publish results to OpenSSF REST API for easy access by consumers 52 | # - Allows the repository to include the Scorecard badge. 53 | # - See https://github.com/ossf/scorecard-action#publishing-results. 54 | # For private repositories: 55 | # - `publish_results` will always be set to `false`, regardless 56 | # of the value entered here. 57 | publish_results: true 58 | 59 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 60 | # format to the repository Actions tab. 61 | # - name: "Upload artifact" 62 | # uses: actions/upload-artifact@97a0fba1372883ab732affbe8f94b823f91727db # v3.pre.node20 63 | # with: 64 | # name: SARIF file 65 | # path: results.sarif 66 | # retention-days: 5 67 | 68 | # # Upload the results to GitHub's code scanning dashboard (optional). 69 | # # Commenting out will disable upload of results to your repo's Code Scanning dashboard 70 | # - name: "Upload to code-scanning" 71 | # uses: github/codeql-action/upload-sarif@v3 72 | # with: 73 | # sarif_file: results.sarif 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Intel Corporation 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of Intel Corporation nor the names of its contributors 12 | may be used to endorse or promote products derived from this software 13 | without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Static Multi-Processing 2 | **SMP** module allows to set static affinity mask for each process inside process pool to limit total 3 | number of threads running in application: 4 | ``` 5 | python -m smp [-f ] [-p ] script.py 6 | ``` 7 | The module supports two types of process pool: **multiprocessing.pool.Pool** and 8 | **concurrent.futures.ProcessPoolExecutor**, as well as one thread pool: **multiprocessing.pool.ThreadPool**. 9 | Can be run with TBB module as well: 10 | ``` 11 | python -m smp [-f ] [-p ] -m tbb script.py 12 | ``` 13 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). -------------------------------------------------------------------------------- /SMP.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2017, Intel Corporation 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of Intel Corporation nor the names of its contributors 13 | # may be used to endorse or promote products derived from this software 14 | # without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | from smp import * 29 | from smp import __all__, __doc__ 30 | 31 | if __name__ == "__main__": 32 | from smp import _main 33 | import sys 34 | sys.exit(_main()) 35 | -------------------------------------------------------------------------------- /meta.yaml: -------------------------------------------------------------------------------- 1 | #{% set data = load_setup_py_data() %} 2 | #{% set version = data.get('version') %} 3 | {% set version = "0.1.3" %} 4 | {% set buildnumber = 1 %} 5 | 6 | package: 7 | name: smp 8 | version: {{version}} 9 | 10 | source: 11 | path: . 12 | 13 | requirements: 14 | build: 15 | - python 16 | run: 17 | - python 18 | 19 | build: 20 | number: {{buildnumber}} 21 | entry_points: 22 | - python-smp = smp:_main 23 | noarch_python: True 24 | script: python setup.py install 25 | 26 | test: 27 | requires: 28 | - intel_tester 29 | files: 30 | - intel_tester_config.py 31 | commands: 32 | - python -c "from intel_tester import test_routine; test_routine.test_exec()" # [linux] 33 | - python-smp -h # [linux] 34 | imports: 35 | - smp 36 | - SMP 37 | 38 | about: 39 | license: BSD 40 | license_file: LICENSE 41 | summary: a Static Multi-Processing module handling nested parallelism issues like oversubscription while composing different parallel components 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2017, Intel Corporation 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of Intel Corporation nor the names of its contributors 13 | # may be used to endorse or promote products derived from this software 14 | # without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | # System imports 29 | from __future__ import print_function 30 | import platform 31 | import os 32 | from smp import __version__ as ver, __doc__ as doc 33 | 34 | from distutils.core import * 35 | 36 | setup( name ="SMP", 37 | description ="Static Multi-Processing", 38 | long_description= doc, 39 | url ="https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html", 40 | author ="Intel Corporation", 41 | author_email="scripting@intel.com", 42 | license ="BSD", 43 | version = ver, 44 | classifiers =[ 45 | 'Development Status :: 4 - Beta', 46 | 'Environment :: Console', 47 | 'Environment :: Plugins', 48 | 'Intended Audience :: Developers', 49 | 'Intended Audience :: System Administrators', 50 | 'Intended Audience :: Other Audience', 51 | 'Intended Audience :: Science/Research', 52 | 'License :: OSI Approved :: BSD License', 53 | #'Operating System :: MacOS :: MacOS X', 54 | #'Operating System :: Microsoft :: Windows', 55 | 'Operating System :: POSIX :: Linux', 56 | 'Programming Language :: Python', 57 | 'Programming Language :: Python :: 2', 58 | 'Programming Language :: Python :: 3', 59 | 'Topic :: System :: Hardware :: Symmetric Multi-processing', 60 | ], 61 | keywords='multiprocessing multithreading composable parallelism affinity', 62 | packages=['smp'], 63 | py_modules=['SMP'], 64 | ) 65 | -------------------------------------------------------------------------------- /smp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2017, Intel Corporation 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of Intel Corporation nor the names of its contributors 13 | # may be used to endorse or promote products derived from this software 14 | # without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | from __future__ import print_function 29 | import os 30 | import sys 31 | from threading import Lock 32 | from multiprocessing.pool import Pool, ThreadPool 33 | from multiprocessing import cpu_count 34 | try: 35 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor 36 | except: 37 | ProcessPoolExecutor = None 38 | ThreadPoolExecutor = None 39 | from ctypes import * 40 | try: 41 | from os import sched_getaffinity, sched_setaffinity 42 | except: 43 | sched_getaffinity = None 44 | sched_setaffinity = None 45 | try: 46 | from psutil import Process as PsutilProcess 47 | except: 48 | PsutilProcess = None 49 | 50 | __version__ = "0.1.4" 51 | __all__ = ["Monkey"] 52 | __doc__ = """ 53 | Static Multi-Processing module 54 | enables composability of nested parallelism by controlling the number of threads 55 | and setting affinity mask for each Python's worker process or thread, which helps 56 | to limit total number of threads running in application. 57 | 58 | Run `python -m smp -h` for command line options. 59 | """ 60 | 61 | libc_module_name = "libc.so.6" 62 | 63 | oversubscription_factor = 2 64 | max_top_workers = 0 65 | 66 | mkl_module_name = "libmkl_rt" 67 | omp_gnu_module_name = "libgomp" 68 | omp_intel_module_name = "libiomp" 69 | found_module_name = None 70 | 71 | native_wrapper_list = dict() 72 | native_wrapper_lock = Lock() 73 | 74 | class dl_phdr_info32(Structure): 75 | _fields_ = [("dlpi_addr", c_uint32), 76 | ("dlpi_name", c_char_p), 77 | ("dlpi_phdr", c_void_p), 78 | ("dlpi_phnum", c_uint16)] 79 | 80 | class dl_phdr_info64(Structure): 81 | _fields_ = [("dlpi_addr", c_uint64), 82 | ("dlpi_name", c_char_p), 83 | ("dlpi_phdr", c_void_p), 84 | ("dlpi_phnum", c_uint16)] 85 | 86 | def callback(info, size, data): 87 | global found_module_name 88 | desired_module = cast(data, c_char_p).value.decode('utf-8') 89 | is_64bits = sys.maxsize > 2**32 90 | if is_64bits: 91 | info64 = cast(info, POINTER(dl_phdr_info64)) 92 | module_name = info64.contents.dlpi_name 93 | else: 94 | info32 = cast(info, POINTER(dl_phdr_info32)) 95 | module_name = info32.contents.dlpi_name 96 | if module_name: 97 | module_name = module_name.decode("utf-8") 98 | if module_name.find(desired_module) >= 0: 99 | found_module_name = module_name 100 | return 1 101 | return 0 102 | 103 | class NativeWrapper: 104 | def __init__(self): 105 | self._load_omp() 106 | self._load_mkl() 107 | 108 | def is_omp_found(self): 109 | if self.libomp: 110 | return True 111 | return False 112 | 113 | def omp_set_num_threads(self, n): 114 | if self.libomp: 115 | try: 116 | self.libomp.omp_set_num_threads(n) 117 | except: 118 | return 119 | 120 | def is_mkl_found(self): 121 | if self.libmkl: 122 | return True 123 | return False 124 | 125 | def mkl_set_num_threads(self, n): 126 | if self.libmkl: 127 | try: 128 | self.libmkl.MKL_Set_Num_Threads(n) 129 | except: 130 | return 131 | 132 | def _load_mkl(self): 133 | try: 134 | global found_module_name 135 | self.libc = CDLL(libc_module_name) 136 | found_module_name = None 137 | CMPFUNC = CFUNCTYPE(c_int, c_void_p, c_size_t, c_char_p) 138 | cmp_callback = CMPFUNC(callback) 139 | 140 | data = c_char_p(mkl_module_name.encode('utf-8')) 141 | res = self.libc.dl_iterate_phdr(cmp_callback, data) 142 | if res == 1 and found_module_name: 143 | self.libmkl = CDLL(found_module_name) 144 | else: 145 | self.libmkl = None 146 | except: 147 | self.libmkl = None 148 | 149 | def _load_omp(self): 150 | try: 151 | global found_module_name 152 | self.libc = CDLL(libc_module_name) 153 | found_module_name = None 154 | CMPFUNC = CFUNCTYPE(c_int, c_void_p, c_size_t, c_char_p) 155 | cmp_callback = CMPFUNC(callback) 156 | 157 | data = c_char_p(omp_gnu_module_name.encode('utf-8')) 158 | res = self.libc.dl_iterate_phdr(cmp_callback, data) 159 | if res == 1 and found_module_name: 160 | self.libomp = CDLL(found_module_name) 161 | else: 162 | data = c_char_p(omp_intel_module_name.encode('utf-8')) 163 | res = self.libc.dl_iterate_phdr(cmp_callback, data) 164 | if res == 1 and found_module_name: 165 | self.libomp = CDLL(found_module_name) 166 | else: 167 | self.libomp = None 168 | except: 169 | self.libomp = None 170 | 171 | def get_native_wrapper(): 172 | global native_wrapper_list 173 | global native_wrapper_lock 174 | 175 | native_wrapper_lock.acquire() 176 | native_wrapper = native_wrapper_list.get(os.getpid()) 177 | if not native_wrapper: 178 | native_wrapper = NativeWrapper() 179 | native_wrapper_list[os.getpid()] = native_wrapper 180 | native_wrapper_lock.release() 181 | 182 | return native_wrapper 183 | 184 | def mkl_set_num_threads(n): 185 | native_wrapper = get_native_wrapper() 186 | if native_wrapper.is_mkl_found(): 187 | native_wrapper.mkl_set_num_threads(n) 188 | 189 | def omp_set_num_threads(n): 190 | native_wrapper = get_native_wrapper() 191 | if native_wrapper.is_omp_found(): 192 | native_wrapper.omp_set_num_threads(n) 193 | 194 | def get_affinity(): 195 | if sched_getaffinity: 196 | return sched_getaffinity(0) 197 | else: 198 | if PsutilProcess: 199 | p = PsutilProcess() 200 | return p.cpu_affinity() 201 | else: 202 | return [i for i in range(cpu_count())] 203 | 204 | def set_affinity(mask): 205 | if sched_setaffinity: 206 | sched_setaffinity(0, mask) 207 | else: 208 | if PsutilProcess: 209 | p = PsutilProcess() 210 | p.cpu_affinity(mask) 211 | else: 212 | if os.name == "posix": 213 | omp_set_num_threads(len(mask)) 214 | 215 | def set_proc_affinity(process_count, process_id): 216 | if process_count == 1: 217 | return 218 | 219 | cpu_list = list(get_affinity()) 220 | cpu_count = len(cpu_list) 221 | 222 | global oversubscription_factor 223 | if cpu_count < oversubscription_factor: 224 | oversubscription_factor = cpu_count 225 | 226 | threads_per_process = oversubscription_factor 227 | if cpu_count >= process_count: 228 | threads_per_process = threads_per_process*int(round(float(cpu_count) 229 | /float(process_count))) 230 | 231 | start_cpu = (process_id*threads_per_process) % cpu_count; 232 | mask = [cpu_list[((start_cpu + i) % cpu_count)] 233 | for i in range(threads_per_process)] 234 | set_affinity(mask) 235 | 236 | if os.name == "posix": 237 | mkl_set_num_threads(threads_per_process) 238 | 239 | def affinity_worker27(inqueue, outqueue, initializer=None, initargs=(), 240 | maxtasks=None, process_count=1, process_id=0): 241 | from multiprocessing.pool import worker 242 | set_proc_affinity(process_count, process_id) 243 | worker(inqueue, outqueue, initializer, initargs, maxtasks) 244 | 245 | class AffinityPool27(Pool): 246 | def __init__(self, processes=None, initializer=None, initargs=(), 247 | maxtasksperchild=None): 248 | if max_top_workers: 249 | processes = int(max_top_workers) 250 | Pool.__init__(self, processes, initializer, initargs, 251 | maxtasksperchild) 252 | 253 | def _repopulate_pool(self): 254 | """Bring the number of pool processes up to the specified number, 255 | for use after reaping workers which have exited. 256 | """ 257 | from multiprocessing.util import debug 258 | 259 | base_id = len(self._pool); 260 | for i in range(self._processes - len(self._pool)): 261 | w = self.Process(target=affinity_worker27, 262 | args=(self._inqueue, self._outqueue, 263 | self._initializer, 264 | self._initargs, self._maxtasksperchild, 265 | self._processes, base_id + i) 266 | ) 267 | self._pool.append(w) 268 | w.name = w.name.replace('Process', 'PoolWorker') 269 | w.daemon = True 270 | w.start() 271 | debug('added worker') 272 | 273 | def affinity_worker35(inqueue, outqueue, initializer=None, initargs=(), 274 | maxtasks=None, wrap_exception=False, 275 | process_count=1, process_id=0): 276 | from multiprocessing.pool import worker 277 | set_proc_affinity(process_count, process_id) 278 | worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 279 | 280 | class AffinityPool35(Pool): 281 | def __init__(self, processes=None, initializer=None, initargs=(), 282 | maxtasksperchild=None, context=None): 283 | if max_top_workers: 284 | processes = int(max_top_workers) 285 | Pool.__init__(self, processes, initializer, initargs, 286 | maxtasksperchild, context) 287 | 288 | def _repopulate_pool(self): 289 | """Bring the number of pool processes up to the specified number, 290 | for use after reaping workers which have exited. 291 | """ 292 | from multiprocessing.util import debug 293 | 294 | base_id = len(self._pool); 295 | for i in range(self._processes - len(self._pool)): 296 | w = self.Process(self._ctx, target=affinity_worker35, 297 | args=(self._inqueue, self._outqueue, 298 | self._initializer, 299 | self._initargs, self._maxtasksperchild, 300 | self._wrap_exception, 301 | self._processes, base_id + i) 302 | ) 303 | self._pool.append(w) 304 | w.name = w.name.replace('Process', 'PoolWorker') 305 | w.daemon = True 306 | w.start() 307 | debug('added worker') 308 | 309 | def limit_num_threads(process_count, process_id): 310 | if process_count == 1: 311 | return 312 | 313 | cpu_list = list(get_affinity()) 314 | cpu_count = len(cpu_list) 315 | 316 | global oversubscription_factor 317 | if cpu_count < oversubscription_factor: 318 | oversubscription_factor = cpu_count 319 | 320 | threads_per_process = oversubscription_factor 321 | if cpu_count >= process_count: 322 | threads_per_process = threads_per_process*int(round(float(cpu_count) 323 | /float(process_count))) 324 | 325 | if os.name == "posix": 326 | omp_set_num_threads(threads_per_process) 327 | mkl_set_num_threads(threads_per_process) 328 | 329 | def limited_worker27(inqueue, outqueue, initializer=None, initargs=(), 330 | maxtasks=None, process_count=1, process_id=0): 331 | from multiprocessing.pool import worker 332 | limit_num_threads(process_count, process_id) 333 | worker(inqueue, outqueue, initializer, initargs, maxtasks) 334 | 335 | class LimitedThreadPool27(ThreadPool): 336 | def __init__(self, processes=None, initializer=None, initargs=()): 337 | if max_top_workers: 338 | processes = int(max_top_workers) 339 | Pool.__init__(self, processes, initializer, initargs) 340 | 341 | def _repopulate_pool(self): 342 | """Bring the number of pool processes up to the specified number, 343 | for use after reaping workers which have exited. 344 | """ 345 | from multiprocessing.util import debug 346 | 347 | base_id = len(self._pool) 348 | for i in range(self._processes - len(self._pool)): 349 | w = self.Process(target=limited_worker27, 350 | args=(self._inqueue, self._outqueue, 351 | self._initializer, 352 | self._initargs, self._maxtasksperchild, 353 | self._processes, base_id + i) 354 | ) 355 | self._pool.append(w) 356 | w.name = w.name.replace('Process', 'PoolWorker') 357 | w.daemon = True 358 | w.start() 359 | debug('added worker') 360 | 361 | def limited_worker35(inqueue, outqueue, initializer=None, initargs=(), 362 | maxtasks=None, wrap_exception=False, 363 | process_count=1, process_id=0): 364 | from multiprocessing.pool import worker 365 | limit_num_threads(process_count, process_id) 366 | worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 367 | 368 | class LimitedThreadPool35(ThreadPool): 369 | def __init__(self, processes=None, initializer=None, initargs=()): 370 | if max_top_workers: 371 | processes = int(max_top_workers) 372 | Pool.__init__(self, processes, initializer, initargs) 373 | 374 | def _repopulate_pool(self): 375 | """Bring the number of pool processes up to the specified number, 376 | for use after reaping workers which have exited. 377 | """ 378 | from multiprocessing.util import debug 379 | 380 | base_id = len(self._pool); 381 | for i in range(self._processes - len(self._pool)): 382 | w = self.Process(target=limited_worker35, 383 | args=(self._inqueue, self._outqueue, 384 | self._initializer, 385 | self._initargs, self._maxtasksperchild, 386 | self._wrap_exception, 387 | self._processes, base_id + i) 388 | ) 389 | self._pool.append(w) 390 | w.name = w.name.replace('Process', 'PoolWorker') 391 | w.daemon = True 392 | w.start() 393 | debug('added worker') 394 | 395 | if ProcessPoolExecutor: 396 | 397 | def affinity_process_worker(call_queue, result_queue, 398 | process_count=1, process_id=0): 399 | from concurrent.futures.process import _process_worker 400 | set_proc_affinity(process_count, process_id) 401 | _process_worker(call_queue, result_queue) 402 | 403 | class AffinityProcessPoolExecutor(ProcessPoolExecutor): 404 | def __init__(self, max_workers=None): 405 | if max_top_workers: 406 | max_workers = int(max_top_workers) 407 | ProcessPoolExecutor.__init__(self, max_workers) 408 | 409 | def _adjust_process_count(self): 410 | import multiprocessing 411 | base_id = len(self._processes); 412 | for i in range(len(self._processes), self._max_workers): 413 | p = multiprocessing.Process( 414 | target=affinity_process_worker, 415 | args=(self._call_queue, 416 | self._result_queue, 417 | self._max_workers, base_id + i)) 418 | p.start() 419 | self._processes[p.pid] = p 420 | 421 | if ThreadPoolExecutor: 422 | 423 | def limited_thread_worker(executor_reference, work_queue, 424 | process_count=1, process_id=0): 425 | from concurrent.futures.thread import _worker 426 | limit_num_threads(process_count, process_id) 427 | _worker(executor_reference, work_queue, None, []) 428 | 429 | class LimitedThreadPoolExecutor(ThreadPoolExecutor): 430 | def __init__(self, max_workers=None): 431 | if max_top_workers: 432 | max_workers = int(max_top_workers) 433 | ThreadPoolExecutor.__init__(self, max_workers) 434 | 435 | def _adjust_thread_count(self): 436 | import threading, weakref 437 | from concurrent.futures.thread import _threads_queues 438 | def weakref_cb(_, q=self._work_queue): 439 | q.put(None) 440 | if len(self._threads) < self._max_workers: 441 | t = threading.Thread(target=limited_thread_worker, 442 | args=(weakref.ref(self, weakref_cb), 443 | self._work_queue, 444 | self._max_workers, 0)) 445 | t.daemon = True 446 | t.start() 447 | self._threads.add(t) 448 | _threads_queues[t] = self._work_queue 449 | 450 | class Monkey(): 451 | """ 452 | Context manager which hooks such standard library classes as 453 | 454 | Pool, ThreadPool, and ProcessPoolExecutor 455 | 456 | It controls number of threads and thread affinity for libraries running 457 | nested parallel regions. Example: 458 | 459 | with smp.Monkey(oversubscription_factor = 1): 460 | run_my_parallel_numpy_code() 461 | 462 | """ 463 | _items = {"Pool" : None, 464 | "ThreadPool" : None, 465 | "ProcessPoolExecutor" : None} 466 | _modules = {"Pool" : None, 467 | "ThreadPool" : None, 468 | "ProcessPoolExecutor" : None} 469 | 470 | def __init__(self, oversubscription_factor=oversubscription_factor, max_top_workers=max_top_workers): 471 | self.oversubscription_factor = int(oversubscription_factor) 472 | self.max_top_workers = int(max_top_workers) 473 | pass 474 | 475 | def _patch(self, class_name, module_name, object): 476 | self._modules[class_name] = __import__(module_name, globals(), 477 | locals(), [class_name]) 478 | if self._modules[class_name] == None: 479 | return 480 | oldattr = getattr(self._modules[class_name], class_name, None) 481 | if oldattr == None: 482 | self._modules[class_name] = None 483 | return 484 | self._items[class_name] = oldattr 485 | setattr(self._modules[class_name], class_name, object) 486 | 487 | def __enter__(self): 488 | global oversubscription_factor, max_top_workers 489 | oversubscription_factor = self.oversubscription_factor 490 | max_top_workers = self.max_top_workers 491 | if sys.version_info.major == 2 and sys.version_info.minor >= 7: 492 | self._patch("Pool", "multiprocessing.pool", AffinityPool27) 493 | self._patch("ThreadPool", "multiprocessing.pool", 494 | LimitedThreadPool27) 495 | elif sys.version_info.major == 3 and sys.version_info.minor >= 5: 496 | self._patch("Pool", "multiprocessing.pool", AffinityPool35) 497 | self._patch("ThreadPool", "multiprocessing.pool", 498 | LimitedThreadPool35) 499 | if ProcessPoolExecutor: 500 | self._patch("ProcessPoolExecutor", "concurrent.futures", 501 | AffinityProcessPoolExecutor) 502 | if ThreadPoolExecutor: 503 | self._patch("ThreadPoolExecutor", "concurrent.futures", 504 | LimitedThreadPoolExecutor) 505 | return self 506 | 507 | def __exit__(self, exc_type, exc_value, traceback): 508 | for name in self._items.keys(): 509 | if self._items[name]: 510 | setattr(self._modules[name], name, self._items[name]) 511 | 512 | def _process_test(n): 513 | cpu_list = list(get_affinity()) 514 | cpu_count = len(cpu_list) 515 | return cpu_count 516 | 517 | def _test(): 518 | target_factor = 1 519 | target_proc_num = 4 520 | success = True 521 | 522 | cpu_list = list(get_affinity()) 523 | cpu_count = len(cpu_list) 524 | if cpu_count < target_factor: 525 | target_factor = cpu_count 526 | target_thread_num = target_factor 527 | if cpu_count >= target_proc_num: 528 | target_thread_num = target_thread_num*int(round(float(cpu_count) 529 | /float(target_proc_num))) 530 | if not (sched_getaffinity or sched_setaffinity or PsutilProcess): 531 | target_thread_num = cpu_count 532 | 533 | with Monkey(oversubscription_factor = target_factor, max_top_workers = target_proc_num): 534 | p = getattr(__import__("multiprocessing.pool", globals(), locals(), ["Pool"]), "Pool", None)() 535 | actual_thread_num = p.map(_process_test, [0 for i in range(target_proc_num)]) 536 | for item in actual_thread_num: 537 | if item != target_thread_num: 538 | print("Expected thread number = {0}, actual = {1}".format( 539 | target_thread_num, item)) 540 | success = False 541 | if success: 542 | print("done") 543 | return 0 if success else 1 544 | 545 | def _main(): 546 | global oversubscription_factor 547 | global max_top_workers 548 | 549 | if not sys.platform.startswith('linux'): 550 | raise "Only linux is currently supported" 551 | 552 | import argparse 553 | parser = argparse.ArgumentParser(prog="python -m smp", description=__doc__, 554 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 555 | parser.add_argument('-f', '--oversubscription-factor', default=oversubscription_factor, metavar='N', 556 | help="Limits maximal number of threads as available CPU*N", type=int) 557 | parser.add_argument('-t', '--kmp-blocktime', default=os.environ.get("KMP_BLOCKTIME", 0), metavar='N', 558 | help="Sets KMP_BLOCKTIME as a time of busy wait of worker threads", type=int) 559 | parser.add_argument('-o', '--kmp-composability', action='store_true', 560 | help="Disables monkey, uses KMP_COMPOSABILITY=mode=counting instead" 561 | " to enable composability mode of libiomp") 562 | parser.add_argument('-p', '--max-top-workers', default=max_top_workers, metavar='P', type=int, 563 | help="Limits outermost parallelism by controlling number of thread or " 564 | "processes workers created by Python pools") 565 | parser.add_argument('-m', action='store_true', dest='module', 566 | help="Executes following as a module") 567 | parser.add_argument('name', help="Script or module name") 568 | parser.add_argument('args', nargs=argparse.REMAINDER, 569 | help="Command line arguments") 570 | args = parser.parse_args() 571 | sys.argv = [args.name] + args.args 572 | if '_' + args.name in globals(): 573 | return globals()['_' + args.name](*args.args) 574 | elif args.kmp_composability: 575 | os.environ["KMP_COMPOSABILITY"] = "mode=counting" 576 | os.environ["MKL_THREADING_LAYER"] = "INTEL" 577 | import runpy 578 | runf = runpy.run_module if args.module else runpy.run_path 579 | runf(args.name, run_name='__main__') 580 | else: 581 | os.environ["KMP_BLOCKTIME"] = str(args.kmp_blocktime) 582 | import runpy 583 | runf = runpy.run_module if args.module else runpy.run_path 584 | with Monkey(oversubscription_factor = args.oversubscription_factor, 585 | max_top_workers = args.max_top_workers): 586 | runf(args.name, run_name='__main__') 587 | 588 | -------------------------------------------------------------------------------- /smp/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2017, Intel Corporation 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of Intel Corporation nor the names of its contributors 13 | # may be used to endorse or promote products derived from this software 14 | # without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | from . import _main 29 | from sys import exit 30 | exit(_main()) 31 | --------------------------------------------------------------------------------