├── .coveragerc ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGES.rst ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── compyle ├── __init__.py ├── api.py ├── array.py ├── ast_utils.py ├── capture_stream.py ├── config.py ├── cuda.py ├── cython_generator.py ├── ext_module.py ├── extern.py ├── jit.py ├── low_level.py ├── opencl.py ├── parallel.py ├── profile.py ├── sort.py ├── template.py ├── tests │ ├── __init__.py │ ├── py3_code.py │ ├── test_array.py │ ├── test_ast_utils.py │ ├── test_capture_stream.py │ ├── test_change_backend.py │ ├── test_config.py │ ├── test_cuda.py │ ├── test_cython_generator.py │ ├── test_ext_module.py │ ├── test_gpu_struct.py │ ├── test_jit.py │ ├── test_low_level.py │ ├── test_parallel.py │ ├── test_profile.py │ ├── test_template.py │ ├── test_translator.py │ ├── test_transpiler.py │ ├── test_types.py │ └── test_utils.py ├── thrust │ ├── __init__.py │ └── sort.pyx ├── translator.py ├── transpiler.py ├── types.py └── utils.py ├── docs ├── Makefile ├── requirements.txt └── source │ ├── conf.py │ ├── details.rst │ ├── index.rst │ ├── installation.rst │ └── overview.rst ├── examples ├── axpb.py ├── axpb_jit.py ├── bench_vm.py ├── julia_set.py ├── laplace.py ├── molecular_dynamics │ ├── 3D │ │ ├── compare_results.py │ │ ├── hoomd_periodic.py │ │ ├── md_nnps.py │ │ ├── md_nnps_periodic.py │ │ ├── md_simple.py │ │ ├── nnps.py │ │ ├── nnps_kernels.py │ │ └── performance_comparison.py │ ├── README.rst │ ├── md_nnps.py │ ├── md_simple.py │ ├── nnps.py │ ├── nnps_kernels.py │ └── performance_comparison.py ├── vm_elementwise.py ├── vm_elementwise_jit.py ├── vm_kernel.py └── vm_numba.py ├── pyproject.toml ├── requirements.txt └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = compyle 4 | omit = 5 | */tests/* 6 | compyle/api.py 7 | 8 | [report] 9 | exclude_lines = 10 | # Have to re-enable the standard pragma 11 | pragma: no cover 12 | except ImportError: 13 | raise NotImplementedError() 14 | if __name__ == .__main__.: 15 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | schedule: 6 | # Run Test at 0400 UTC on Saturday. 7 | - cron: '0 4 * * 6' 8 | # Run test at 0400 UTC on day 1 of every month to create auto-generated 9 | # code afresh and cache it. 10 | - cron: '0 4 1 * *' # Ref https://crontab.guru/#0_4_1_*_* 11 | 12 | jobs: 13 | tests: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest, macos-latest, windows-latest] 18 | python-version: [3.11, 3.12] 19 | 20 | runs-on: ${{ matrix.os }} 21 | defaults: 22 | run: 23 | shell: bash -l {0} 24 | 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: conda-incubator/setup-miniconda@v3 29 | with: 30 | auto-update-conda: true 31 | python-version: ${{ matrix.python-version }} 32 | channels: defaults, conda-forge 33 | channel-priority: flexible 34 | - name: Install dependencies on Linux/MacOS 35 | run: | 36 | conda info 37 | conda install pocl pyopencl 38 | python -c 'import pyopencl as cl' 39 | if: ${{ runner.os != 'Windows' }} 40 | - name: Setup compyle config on MacOS to use openmp enabled clang from homebrew 41 | run: | 42 | brew install libomp 43 | mkdir -p ~/.compyle 44 | touch ~/.compyle/config.py 45 | echo "import os" >> ~/.compyle/config.py 46 | echo "os.environ['CC'] = '$(brew --prefix llvm@15)/bin/clang'" >> ~/.compyle/config.py 47 | echo "os.environ['CXX'] = '$(brew --prefix llvm@15)/bin/clang++'" >> ~/.compyle/config.py 48 | export CPPFLAGS="-I$(brew --prefix libomp)/include -I$(brew --prefix llvm@15)/include -Xclang -fopenmp" 49 | export LDFLAGS="-L$(brew --prefix libomp)/lib -L$(brew --prefix llvm@15)/lib -lomp" 50 | python -c "import os; OMP_CFLAGS=os.environ.get('CPPFLAGS').split(' '); print(f'{OMP_CFLAGS=}')" >> ~/.compyle/config.py 51 | python -c "import os; OMP_LINK=os.environ.get('LDFLAGS').split(' '); print(f'{OMP_LINK=}')" >> ~/.compyle/config.py 52 | cat ~/.compyle/config.py 53 | if: ${{ runner.os == 'macOS' }} 54 | - name: Install dependencies 55 | run: | 56 | conda info 57 | conda install numpy cython 58 | python -m pip install -r requirements.txt 59 | python -m pip install coverage codecov 60 | python -m pip install -e ".[dev]" 61 | # Cache auto-generated code. Cache key changes every month. 62 | # Thanks https://stackoverflow.com/a/60942437 63 | - name: Get month to use as cache key. 64 | id: month 65 | run: echo "month=$(date +'%m')" >> $GITHUB_OUTPUT 66 | - name: Deal with auto-generated code cache 67 | uses: actions/cache@v4 68 | with: 69 | path: | 70 | ~/.compyle 71 | key: ${{ runner.os }}-${{ steps.month.outputs.month }}-${{ matrix.python-version }} 72 | - name: Run tests 73 | run: | 74 | coverage erase 75 | coverage run -m pytest -v 76 | - name: Report 77 | if: ${{ success() }} 78 | run: coverage report 79 | - name: Upload Coverage to Codecov 80 | uses: codecov/codecov-action@v4 81 | with: 82 | env_vars: ${{ matrix.os }}, ${{ matrix.python-version }} 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | *.c 4 | *.cpp 5 | *~ 6 | *.so 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | .pytest_cache -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-24.04 10 | tools: 11 | python: "3.12" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/source/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | 0.9.1 2 | ~~~~~~ 3 | 4 | * Release date: 23 May, 2025. 5 | * Fix issue with unnecessary files to make a smaller source dist. 6 | 7 | 8 | 0.9 9 | ~~~~ 10 | 11 | * Release date: 23 May, 2025. 12 | * Allow user to pass a cython include directory when using ``ExtModule``. 13 | * Fix error with Cython compilation error messages not being shown. 14 | * Fix issue with the root log level being set when building an extension module. 15 | * Use cdivision always as that is usually the intent when using compyle. 16 | * Add a ``COMPYLE_DEBUG`` environment variable to print debug information. 17 | * Explicitly type any float literals for single precision to improve GPU performance. 18 | * Fix bug with the directory where the sources were saved. 19 | * Support for NumPy 2 and Cython 3.x. 20 | * Drop Python 2 support. 21 | * Do late binding so the backend can be changed. 22 | * Fix NumPy deprecation errors. 23 | 24 | 25 | 0.8.1 26 | ~~~~~~ 27 | 28 | * Release date: 7th November, 2021. 29 | * Fix issue with accidental file in sdist. 30 | 31 | 32 | 0.8 33 | ~~~~ 34 | 35 | * Release date: 7th November, 2021. 36 | * Improve array module to support more numpy like functionality. 37 | * Improve profile output so it works in a distributed setting. 38 | * Add support for a configuration file in ~/.compyle/config.py 39 | * Added `atomic_dec` support. 40 | * Fix output capturing on jupyter notebooks. 41 | * Fix issues due to ast changes in Python 3.9.x. 42 | * Fix tests on 32bit architectures. 43 | * Fix several bugs and issues. 44 | 45 | 46 | 0.7 47 | ~~~~ 48 | 49 | * Release date: 1st October, 2020. 50 | * Add convenient option to profile execution of code. 51 | * Add a convenient argument parser for scripts. 52 | * Add easy way to see generated sources. 53 | * Fix bug with installation of previous version. 54 | * Fix several bugs and issues. 55 | * Update the documentation. 56 | 57 | 0.6 58 | ~~~~ 59 | 60 | * Release date: 15th June, 2020. 61 | * Add some non-trivial examples showcasing the package. 62 | * Document how one can use clang + OpenMP. 63 | * Add sorting, align, and other functions to array module. 64 | * Support for mapping structs on a GPU with CUDA. 65 | * Add address, cast, and address low-level functions. 66 | * Support for mako-templates for reducing repetitive code. 67 | * Bitwise operator support. 68 | * Attempt to auto-declare variables when possible. 69 | * Fix several bugs and issues. 70 | 71 | 72 | 73 | 0.5 74 | ~~~~ 75 | 76 | * Release date: 3rd, December 2018 77 | * First public release. 78 | * Support for elementwise, scan, and reduction operations on CPU and GPU using 79 | Cython, OpenCL and CUDA. 80 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Unless otherwise specified by LICENSE.txt files in individual 2 | directories, all code is 3 | 4 | Copyright (c) 2009-2018, the PySPH developers 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | 1. Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in 15 | the documentation and/or other materials provided with the 16 | distribution. 17 | 3. Neither the name of the copyright holder nor the names of its contributors 18 | may be used to endorse or promote products derived from this software 19 | without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in *.py *.rst *.yml *.txt *.toml 2 | recursive-include compyle *.pyx 3 | recursive-exclude compyle *.cpp 4 | recursive-include docs *.* 5 | recursive-include examples *.* 6 | recursive-exclude docs/build *.* 7 | recursive-exclude examples/ *.png __pycache__/* .DS_Store 8 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Compyle: execute a subset of Python on HPC platforms 2 | ====================================================== 3 | 4 | |CI Status| |Coverage Status| |Documentation Status| 5 | 6 | 7 | .. |CI Status| image:: https://github.com/pypr/compyle/actions/workflows/tests.yml/badge.svg 8 | :target: https://github.com/pypr/compyle/actions/workflows/tests.yml 9 | .. |Documentation Status| image:: https://readthedocs.org/projects/compyle/badge/?version=latest 10 | :target: https://compyle.readthedocs.io/en/latest/?badge=latest 11 | :alt: Documentation Status 12 | .. |Coverage Status| image:: https://codecov.io/gh/pypr/compyle/branch/main/graph/badge.svg 13 | :target: https://codecov.io/gh/pypr/compyle 14 | 15 | Compyle allows users to execute a restricted subset of Python (almost similar 16 | to C) on a variety of HPC platforms. Currently we support multi-core CPU 17 | execution using Cython, and for GPU devices we use OpenCL or CUDA. 18 | 19 | Users start with code implemented in a very restricted Python syntax, this code 20 | is then automatically transpiled, compiled and executed to run on either one CPU 21 | core, or multiple CPU cores (via OpenMP_) or on a GPU. Compyle offers 22 | source-to-source transpilation, making it a very convenient tool for writing HPC 23 | libraries. 24 | 25 | Some simple yet powerful parallel utilities are provided which can allow you 26 | to solve a remarkably large number of interesting HPC problems. Compyle also 27 | features JIT transpilation making it easy to use. 28 | 29 | Documentation and learning material is also available in the form of: 30 | 31 | - Documentation at: https://compyle.readthedocs.io 32 | 33 | - An introduction to compyle in the context of writing a parallel molecular 34 | dynamics simulator is in our `SciPy 2020 paper 35 | `_. 36 | 37 | - `Compyle poster presentation `_ 38 | 39 | - You may also try Compyle online for free on a `Google Colab notebook`_. 40 | 41 | While Compyle seems simple it is not a toy and is used heavily by the PySPH_ 42 | project where Compyle has its origins. 43 | 44 | .. _PySPH: https://github.com/pypr/pysph 45 | .. _Google Colab notebook: https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing 46 | 47 | 48 | Installation 49 | ------------- 50 | 51 | Compyle is itself largely pure Python but depends on numpy_ and requires 52 | either Cython_ or PyOpenCL_ or PyCUDA_ along with the respective backends of a 53 | C/C++ compiler, OpenCL and CUDA. If you are only going to execute code on a 54 | CPU then all you need is Cython. 55 | 56 | You should be able to install Compyle by doing:: 57 | 58 | $ pip install compyle 59 | 60 | 61 | .. _PyOpenCL: https://documen.tician.de/pyopencl/ 62 | .. _OpenCL: https://www.khronos.org/opencl/ 63 | .. _Cython: http://www.cython.org 64 | .. _numpy: http://www.numpy.org 65 | .. _OpenMP: http://openmp.org/ 66 | .. _PyCUDA: https://documen.tician.de/pycuda/ 67 | 68 | A simple example 69 | ---------------- 70 | 71 | Here is a very simple example:: 72 | 73 | from compyle.api import Elementwise, annotate, wrap, get_config 74 | import numpy as np 75 | 76 | @annotate 77 | def axpb(i, x, y, a, b): 78 | y[i] = a*sin(x[i]) + b 79 | 80 | x = np.linspace(0, 1, 10000) 81 | y = np.zeros_like(x) 82 | a, b = 2.0, 3.0 83 | 84 | backend = 'cython' 85 | get_config().use_openmp = True 86 | x, y = wrap(x, y, backend=backend) 87 | e = Elementwise(axpb, backend=backend) 88 | e(x, y, a, b) 89 | 90 | This will execute the elementwise operation in parallel using OpenMP with 91 | Cython. The code is auto-generated, compiled and called for you transparently. 92 | The first time this runs, it will take a bit of time to compile everything but 93 | the next time, this is cached and will run much faster. 94 | 95 | If you just change the ``backend = 'opencl'``, the same exact code will be 96 | executed using PyOpenCL_ and if you change the backend to ``'cuda'``, it will 97 | execute via CUDA without any other changes to your code. This is obviously a 98 | very trivial example, there are more complex examples available as well. 99 | 100 | 101 | Examples 102 | --------- 103 | 104 | Some simple examples and benchmarks are available in the `examples 105 | `_ directory. 106 | 107 | You may also run these examples on the `Google Colab notebook`_ 108 | -------------------------------------------------------------------------------- /compyle/__init__.py: -------------------------------------------------------------------------------- 1 | # See PEP 440 for more on suitable version numbers. 2 | __version__ = '0.10.dev0' 3 | -------------------------------------------------------------------------------- /compyle/api.py: -------------------------------------------------------------------------------- 1 | from .array import Array, wrap 2 | from .ast_utils import (get_symbols, get_assigned, 3 | get_unknown_names_and_calls, has_return, has_node) 4 | from .config import get_config, set_config, use_config, Config 5 | from .cython_generator import ( 6 | CythonGenerator, get_func_definition 7 | ) 8 | from .ext_module import ExtModule 9 | from .extern import Extern 10 | from .low_level import Kernel, LocalMem, Cython, cast 11 | from .parallel import ( 12 | Elementwise, Reduction, Scan, elementwise 13 | ) 14 | from .profile import ( 15 | get_profile_info, named_profile, profile, profile_ctx, print_profile, 16 | profile_kernel, ProfileContext, profile2csv 17 | ) 18 | from .translator import ( 19 | CConverter, CStructHelper, OpenCLConverter, detect_type, ocl_detect_type, 20 | py2c 21 | ) 22 | from .types import KnownType, annotate, declare 23 | from .utils import ArgumentParser 24 | -------------------------------------------------------------------------------- /compyle/ast_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities to work with the Python AST. 2 | """ 3 | 4 | import ast 5 | import sys 6 | 7 | PY_VER = sys.version_info.major 8 | 9 | basestring = str if PY_VER > 2 else basestring 10 | 11 | 12 | class NameLister(ast.NodeVisitor): 13 | """Utility class to collect the Names in an AST. 14 | """ 15 | def __init__(self, ctx=(ast.Load, ast.Store)): 16 | self.names = set() 17 | self.ctx = ctx 18 | 19 | def visit_Name(self, node): 20 | if isinstance(node.ctx, self.ctx): 21 | self.names.add(node.id) 22 | self.generic_visit(node) 23 | 24 | 25 | class SymbolParser(ast.NodeVisitor): 26 | """Utility class to gather the used symbols in a block of code. We look at 27 | assignments, augmented assignments, function calls, and any Names. These 28 | are all parsed in one shot and collected. 29 | 30 | Note that this works best for a single function that is parsed rather than 31 | for a collection of functions. 32 | 33 | """ 34 | def __init__(self): 35 | self.names = set() 36 | self.assign = set() 37 | self.calls = set() 38 | self.funcargs = set() 39 | self.func_name = '' 40 | self.ctx = (ast.Load, ast.Store) 41 | 42 | def visit_Name(self, node): 43 | if isinstance(node.ctx, self.ctx): 44 | self.names.add(node.id) 45 | self.generic_visit(node) 46 | 47 | def visit_AugAssign(self, node): 48 | if isinstance(node.target, ast.Name): 49 | self.assign.add(node.target.id) 50 | elif isinstance(node.target, ast.Subscript): 51 | v = node.target.value 52 | while not isinstance(v, ast.Name): 53 | v = v.value 54 | self.assign.add(v.id) 55 | self.generic_visit(node) 56 | 57 | def visit_Assign(self, node): 58 | for target in node.targets: 59 | if isinstance(target, ast.Name): 60 | self.assign.add(target.id) 61 | elif isinstance(target, ast.Subscript): 62 | n = target.value 63 | while not isinstance(n, ast.Name): 64 | n = n.value 65 | self.assign.add(n.id) 66 | elif isinstance(target, (ast.List, ast.Tuple)): 67 | for n in target.elts: 68 | if isinstance(n, ast.Name): 69 | self.assign.add(n.id) 70 | self.generic_visit(node) 71 | 72 | def visit_Call(self, node): 73 | if isinstance(node.func, ast.Name): 74 | self.calls.add(node.func.id) 75 | self.generic_visit(node) 76 | 77 | def visit_FunctionDef(self, node): 78 | self.func_name = node.name 79 | if PY_VER == 2: 80 | self.funcargs.update(x.id for x in node.args.args) 81 | if node.args.vararg: 82 | self.funcargs.add(node.args.vararg) 83 | if node.args.kwarg: 84 | self.funcargs.add(node.args.kwarg) 85 | else: 86 | self.funcargs.update(x.arg for x in node.args.args) 87 | if node.args.vararg: 88 | self.funcargs.add(node.args.vararg.arg) 89 | if node.args.kwarg: 90 | self.funcargs.add(node.args.kwarg.arg) 91 | if node.args.kwonlyargs: 92 | self.funcargs.update(x.arg for x in node.args.kwonlyargs) 93 | for arg in node.body: 94 | self.visit(arg) 95 | 96 | 97 | def _get_tree(code): 98 | return ast.parse(code) if isinstance(code, basestring) else code 99 | 100 | 101 | def get_symbols(code, ctx=(ast.Load, ast.Store)): 102 | """Given an AST or code string return the symbols used therein. 103 | 104 | Parameters 105 | ---------- 106 | 107 | code: A code string or the result of an ast.parse. 108 | 109 | ctx: The context of the names, can be one of ast.Load, ast.Store, ast.Del. 110 | """ 111 | tree = _get_tree(code) 112 | n = NameLister(ctx=ctx) 113 | n.visit(tree) 114 | return n.names 115 | 116 | 117 | def get_assigned(code): 118 | """Given an AST or code string return the symbols that are augmented 119 | assigned or assigned. 120 | 121 | Parameters 122 | ---------- 123 | 124 | code: A code string or the result of an ast.parse. 125 | 126 | """ 127 | tree = _get_tree(code) 128 | p = SymbolParser() 129 | p.visit(tree) 130 | return p.assign 131 | 132 | 133 | def get_unknown_names_and_calls(code): 134 | """Given an AST or code string return the unknown variables and calls in 135 | the code. The function returns two sets, ``names, calls``. 136 | 137 | Parameters 138 | ---------- 139 | 140 | code: A code string or the result of an ast.parse. 141 | 142 | """ 143 | tree = ast.parse(code) if isinstance(code, basestring) else code 144 | p = SymbolParser() 145 | p.visit(tree) 146 | funcargs = p.funcargs 147 | if len(p.func_name) > 0: 148 | funcargs.add(p.func_name) 149 | names = p.names - funcargs - p.calls - p.assign 150 | calls = p.calls 151 | return names, calls 152 | 153 | 154 | def has_node(code, node): 155 | """Given an AST or code string returns True if the code contains 156 | any particular node statement. 157 | 158 | Parameters 159 | ---------- 160 | 161 | code: A code string or the result of an ast.parse. 162 | 163 | node: A node type or tuple of node types to check for. If a tuple 164 | is passed it returns True if any one of them is in the code. 165 | """ 166 | tree = _get_tree(code) 167 | for n in ast.walk(tree): 168 | if isinstance(n, node): 169 | return True 170 | return False 171 | 172 | 173 | def has_return(code): 174 | """Returns True of the node has a return statement. 175 | """ 176 | return has_node(code, ast.Return) 177 | -------------------------------------------------------------------------------- /compyle/capture_stream.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import sys 4 | from tempfile import mktemp 5 | 6 | 7 | def get_ipython_capture(): 8 | try: 9 | # This will work inside IPython but not outside it. 10 | name = get_ipython().__class__.__name__ 11 | if name.startswith('ZMQ'): 12 | from IPython.utils.capture import capture_output 13 | return capture_output 14 | else: 15 | return None 16 | except NameError: 17 | return None 18 | 19 | 20 | class CaptureStream(object): 21 | """A context manager which captures any errors on a given stream (like 22 | sys.stderr). The stream is captured and the outputs can be used. 23 | 24 | We treat sys.stderr and stdout specially as very often these are 25 | overridden by nose or IPython. We always wrap the underlying file 26 | descriptors in this case as this is the intent of this context manager. 27 | 28 | This is somewhat based on this question: 29 | http://stackoverflow.com/questions/7018879/disabling-output-when-compiling-with-distutils 30 | 31 | Examples 32 | -------- 33 | 34 | See the tests in tests/test_capture_stream.py for example usage. 35 | """ 36 | 37 | def __init__(self, stream=sys.stderr): 38 | self.stream = stream 39 | if stream is sys.stderr: 40 | self.fileno = 2 41 | elif stream is sys.stdout: 42 | self.fileno = 1 43 | else: 44 | self.fileno = stream.fileno() 45 | self.orig_stream = None 46 | self.tmp_stream = None 47 | self.tmp_path = '' 48 | self._cached_output = None 49 | 50 | def __enter__(self): 51 | if sys.platform.startswith('win32') and sys.version_info[:2] > (3, 5): 52 | return self 53 | self.orig_stream = os.dup(self.fileno) 54 | self.tmp_path = mktemp() 55 | self.tmp_stream = io.open(self.tmp_path, 'w+', encoding='utf-8') 56 | os.dup2(self.tmp_stream.fileno(), self.fileno) 57 | return self 58 | 59 | def __exit__(self, type, value, tb): 60 | if sys.platform.startswith('win32') and sys.version_info[:2] > (3, 5): 61 | return 62 | if self.orig_stream is not None: 63 | os.dup2(self.orig_stream, self.fileno) 64 | if self.tmp_stream is not None: 65 | self._cache_output() 66 | self.tmp_stream.close() 67 | os.remove(self.tmp_path) 68 | 69 | def _cache_output(self): 70 | if self._cached_output is not None: 71 | return 72 | tmp_stream = self.tmp_stream 73 | result = '' 74 | if tmp_stream is not None: 75 | tmp_stream.flush() 76 | tmp_stream.seek(0) 77 | result = tmp_stream.read() 78 | self._cached_output = result 79 | 80 | def get_output(self): 81 | """Return the captured output. 82 | """ 83 | if self._cached_output is None: 84 | self._cache_output() 85 | return self._cached_output 86 | 87 | 88 | class CaptureMultipleStreams(object): 89 | """This lets one capture multiple streams together. 90 | """ 91 | def __init__(self, streams=None): 92 | streams = (sys.stdout, sys.stderr) if streams is None else streams 93 | self.streams = streams 94 | self.captures = [CaptureStream(x) for x in streams] 95 | cap = get_ipython_capture() 96 | if cap: 97 | self.jcap = cap(stdout=True, stderr=True, display=True) 98 | else: 99 | self.jcap = None 100 | self.joutput = None 101 | 102 | def __enter__(self): 103 | for capture in self.captures: 104 | capture.__enter__() 105 | if self.jcap: 106 | self.joutput = self.jcap.__enter__() 107 | return self 108 | 109 | def __exit__(self, type, value, tb): 110 | for capture in self.captures: 111 | capture.__exit__(type, value, tb) 112 | if self.jcap: 113 | self.jcap.__exit__(type, value, tb) 114 | 115 | def get_output(self): 116 | out = list(x.get_output() for x in self.captures) 117 | if self.joutput: 118 | out[0] += self.joutput.stdout 119 | out[1] += self.joutput.stderr 120 | return out 121 | -------------------------------------------------------------------------------- /compyle/config.py: -------------------------------------------------------------------------------- 1 | """Simple configuration options for PySPH. 2 | 3 | Do not import any PySPH specific extensions here, if you must, do the import 4 | inside the function/method. 5 | """ 6 | 7 | from contextlib import contextmanager 8 | 9 | 10 | class Config(object): 11 | def __init__(self): 12 | self._use_openmp = None 13 | self._use_opencl = None 14 | self._use_cuda = None 15 | self._use_double = None 16 | self._omp_schedule = None 17 | self._profile = None 18 | self._use_local_memory = None 19 | self._wgs = None 20 | self._suppress_warnings = None 21 | 22 | @property 23 | def suppress_warnings(self): 24 | if self._suppress_warnings is None: 25 | self._suppress_warnings = self._suppress_warnings_default() 26 | return self._suppress_warnings 27 | 28 | @suppress_warnings.setter 29 | def suppress_warnings(self, value): 30 | self._suppress_warnings = value 31 | 32 | def _suppress_warnings_default(self): 33 | return False 34 | 35 | @property 36 | def use_openmp(self): 37 | if self._use_openmp is None: 38 | self._use_openmp = self._use_openmp_default() 39 | return self._use_openmp 40 | 41 | @use_openmp.setter 42 | def use_openmp(self, value): 43 | self._use_openmp = value 44 | 45 | def _use_openmp_default(self): 46 | return False 47 | 48 | @property 49 | def omp_schedule(self): 50 | if self._omp_schedule is None: 51 | self._omp_schedule = self._omp_schedule_default() 52 | return self._omp_schedule 53 | 54 | @omp_schedule.setter 55 | def omp_schedule(self, value): 56 | if len(value) != 2 or \ 57 | value[0].lower() not in ("static", "dynamic", "guided"): 58 | raise ValueError("Invalid OpenMP Schedule: {}".format(value)) 59 | 60 | self._omp_schedule = value 61 | 62 | def set_omp_schedule(self, omp_schedule): 63 | """ 64 | Expects input to be in the format used by OMP_SCHEDULE 65 | i.e. "schedule_type, chunk_size" 66 | """ 67 | temp = omp_schedule.split(",") 68 | if len(temp) == 2: 69 | self.omp_schedule = (temp[0], int(temp[1])) 70 | else: 71 | self.omp_schedule = (temp[0], None) 72 | 73 | def _omp_schedule_default(self): 74 | return ("dynamic", 64) 75 | 76 | @property 77 | def use_opencl(self): 78 | if self._use_opencl is None: 79 | self._use_opencl = self._use_opencl_default() 80 | return self._use_opencl 81 | 82 | @use_opencl.setter 83 | def use_opencl(self, value): 84 | self._use_opencl = value 85 | 86 | def _use_opencl_default(self): 87 | return False 88 | 89 | @property 90 | def use_cuda(self): 91 | if self._use_cuda is None: 92 | self._use_cuda = self._use_cuda_default() 93 | return self._use_cuda 94 | 95 | @use_cuda.setter 96 | def use_cuda(self, value): 97 | self._use_cuda = value 98 | 99 | def _use_cuda_default(self): 100 | return False 101 | 102 | @property 103 | def use_double(self): 104 | """This is only used by OpenCL code. 105 | """ 106 | if self._use_double is None: 107 | self._use_double = self._use_double_default() 108 | return self._use_double 109 | 110 | @use_double.setter 111 | def use_double(self, value): 112 | """This is only used by OpenCL code. 113 | """ 114 | self._use_double = value 115 | 116 | def _use_double_default(self): 117 | return False 118 | 119 | @property 120 | def profile(self): 121 | if self._profile is None: 122 | self._profile = self._profile_default() 123 | return self._profile 124 | 125 | @profile.setter 126 | def profile(self, value): 127 | self._profile = value 128 | 129 | def _profile_default(self): 130 | return False 131 | 132 | @property 133 | def use_local_memory(self): 134 | if self._use_local_memory is None: 135 | self._use_local_memory = self._use_local_memory_default() 136 | return self._use_local_memory 137 | 138 | @use_local_memory.setter 139 | def use_local_memory(self, value): 140 | self._use_local_memory = value 141 | 142 | def _use_local_memory_default(self): 143 | return False 144 | 145 | @property 146 | def wgs(self): 147 | if self._wgs is None: 148 | self._wgs = self._wgs_default() 149 | return self._wgs 150 | 151 | @wgs.setter 152 | def wgs(self, value): 153 | self._wgs = value 154 | 155 | def _wgs_default(self): 156 | return 32 157 | 158 | 159 | _config = None 160 | 161 | 162 | def get_config(): 163 | global _config 164 | if _config is None: 165 | _config = Config() 166 | return _config 167 | 168 | 169 | def set_config(config): 170 | global _config 171 | _config = config 172 | 173 | 174 | @contextmanager 175 | def use_config(**kw): 176 | """A context manager for the configuration. 177 | 178 | One can do the following:: 179 | 180 | with use_config(use_openmp=True) as cfg: 181 | do_something() 182 | cfg.use_opencl = True 183 | do_something_else() 184 | 185 | The configuration will be restored to the original when one exits the 186 | context. Inside the scope of the with statement the configuration ``cfg`` 187 | is the one operational and so can be changed. 188 | """ 189 | orig_cfg = get_config() 190 | cfg = Config() 191 | for k, v in kw.items(): 192 | setattr(cfg, k, v) 193 | 194 | set_config(cfg) 195 | 196 | try: 197 | yield cfg 198 | finally: 199 | set_config(orig_cfg) 200 | -------------------------------------------------------------------------------- /compyle/extern.py: -------------------------------------------------------------------------------- 1 | class Extern(object): 2 | """A simple way to support external functions and symbols. 3 | """ 4 | def link(self, backend): 5 | """Return a list of extra link args.""" 6 | return [] 7 | 8 | def code(self, backend): 9 | """Return suitable code as a string. 10 | 11 | This code is injected at the top of the generated code. 12 | """ 13 | raise NotImplementedError() 14 | 15 | def __call__(self, *args, **kw): 16 | """Implement for a pure Python implementation if needed. 17 | """ 18 | raise NotImplementedError() 19 | 20 | 21 | class _printf(Extern): 22 | def code(self, backend): 23 | # Always available so no need but in Cython we explicitly define it as 24 | # an example. 25 | 26 | if backend == 'cython': 27 | return 'from libc.studio cimport printf' 28 | return '' 29 | 30 | def __call__(self, *args): 31 | print(args[0] % args[1:]) 32 | 33 | 34 | # Now make it available publicly. 35 | printf = _printf() 36 | 37 | # More examples are available in the low_level.py module. 38 | 39 | 40 | def get_extern_code(externs, backend): 41 | links = [] 42 | code = [] 43 | for ex in externs: 44 | link = ex.link(backend) 45 | if link: 46 | links.extend(link) 47 | c = ex.code(backend) 48 | if c: 49 | code.append(c) 50 | 51 | return links, code 52 | -------------------------------------------------------------------------------- /compyle/opencl.py: -------------------------------------------------------------------------------- 1 | """Common OpenCL related functionality. 2 | """ 3 | from __future__ import print_function 4 | import pyopencl as cl 5 | 6 | from .config import get_config 7 | from .profile import profile_kernel, named_profile 8 | 9 | _ctx = None 10 | _queue = None 11 | 12 | 13 | class DeviceWGSException(Exception): 14 | pass 15 | 16 | 17 | def get_context(): 18 | global _ctx 19 | if _ctx is None: 20 | _ctx = cl.create_some_context() 21 | return _ctx 22 | 23 | 24 | def set_context(ctx): 25 | global _ctx 26 | _ctx = ctx 27 | 28 | 29 | def get_queue(): 30 | global _queue 31 | if _queue is None: 32 | kwargs = dict() 33 | if get_config().profile: 34 | kwargs['properties'] = cl.command_queue_properties.PROFILING_ENABLE 35 | _queue = cl.CommandQueue(get_context(), **kwargs) 36 | return _queue 37 | 38 | 39 | def set_queue(q): 40 | global _queue 41 | _queue = q 42 | 43 | 44 | class SimpleKernel(object): 45 | """ElementwiseKernel substitute that supports a custom work group size. 46 | """ 47 | 48 | def __init__(self, ctx, args, operation, wgs, 49 | name="", preamble="", options=[]): 50 | self.args = args 51 | self.operation = operation 52 | self.name = name 53 | self.preamble = preamble 54 | self.options = options 55 | 56 | self.prg = cl.Program(ctx, self._generate()).build(options) 57 | self.knl = getattr(self.prg, name) 58 | 59 | if self.get_max_wgs() < wgs: 60 | raise DeviceWGSException("") 61 | 62 | def _massage_arg(self, arg): 63 | if '*' in arg: 64 | return "__global " + arg 65 | return arg 66 | 67 | def _generate(self): 68 | args = [self._massage_arg(arg) for arg in self.args.split(",")] 69 | 70 | source = r""" 71 | %(preamble)s 72 | 73 | __kernel void %(name)s(%(args)s) 74 | { 75 | int lid = get_local_id(0); 76 | int gsize = get_global_size(0); 77 | int work_group_start = get_local_size(0)*get_group_id(0); 78 | long i = get_global_id(0); 79 | 80 | %(body)s 81 | } 82 | """ % { 83 | "args": ",".join(args), 84 | "name": self.name, 85 | "preamble": self.preamble, 86 | "body": self.operation 87 | } 88 | 89 | return source 90 | 91 | def get_max_wgs(self): 92 | return self.knl.get_work_group_info( 93 | cl.kernel_work_group_info.WORK_GROUP_SIZE, 94 | get_queue().device 95 | ) 96 | 97 | def __call__(self, *args, **kwargs): 98 | wait_for = kwargs.pop("wait_for", None) 99 | queue = kwargs.pop("queue", None) 100 | gs = kwargs.pop("gs", None) 101 | ls = kwargs.pop("ls", None) 102 | 103 | if queue is None or gs is None or ls is None: 104 | raise ValueError("queue, gs and ls can not be empty") 105 | 106 | if kwargs: 107 | raise TypeError("unknown keyword arguments: '%s'" 108 | % ", ".join(kwargs)) 109 | 110 | def unwrap(arg): 111 | return arg.data if isinstance(arg, cl.array.Array) else arg 112 | 113 | self.knl.set_args(*[unwrap(arg) for arg in args]) 114 | return cl.enqueue_nd_range_kernel(queue, self.knl, gs, ls, 115 | wait_for=wait_for) 116 | -------------------------------------------------------------------------------- /compyle/profile.py: -------------------------------------------------------------------------------- 1 | """ Utils for profiling kernels 2 | """ 3 | 4 | from contextlib import contextmanager 5 | from collections import defaultdict 6 | import time 7 | from .config import get_config 8 | 9 | 10 | def _make_default(): 11 | return dict(calls=0, time=0.0) 12 | 13 | 14 | _current_level = 0 15 | _profile_info = defaultdict( 16 | lambda: defaultdict(_make_default) 17 | ) 18 | 19 | 20 | def _record_profile(name, time): 21 | global _profile_info, _current_level 22 | li = _profile_info[_current_level] 23 | li[name]['time'] += time 24 | li[name]['calls'] += 1 25 | 26 | 27 | @contextmanager 28 | def profile_ctx(name): 29 | """ Context manager for profiling 30 | 31 | For profiling a function f, it can be used as follows:: 32 | 33 | with profile_ctx('f'): 34 | f() 35 | """ 36 | global _current_level 37 | _current_level += 1 38 | start = time.time() 39 | try: 40 | yield start 41 | end = time.time() 42 | finally: 43 | _current_level -= 1 44 | _record_profile(name, end - start) 45 | 46 | 47 | def profile(method=None, name=None): 48 | """Decorator for profiling a function. Can be used as follows:: 49 | 50 | @profile 51 | def f(): 52 | pass 53 | 54 | 55 | If explicitly passed a name, with @profile(name='some name'), it will use 56 | the given name. Otherwise, if the function is a class method, and the class 57 | has a `self.name` attribute, it will use that. Otherwise, it will use the 58 | method's qualified name to record the profile. 59 | 60 | """ 61 | def make_wrapper(method): 62 | def wrapper(*args, **kwargs): 63 | self = args[0] if len(args) else None 64 | if name is None: 65 | if hasattr(self, method.__name__) and hasattr(self, 'name'): 66 | p_name = self.name 67 | else: 68 | p_name = getattr(method, '__qualname__', method.__name__) 69 | else: 70 | p_name = name 71 | with profile_ctx(p_name): 72 | return method(*args, **kwargs) 73 | wrapper.__doc__ = method.__doc__ 74 | return wrapper 75 | if method is None: 76 | return make_wrapper 77 | else: 78 | return make_wrapper(method) 79 | 80 | 81 | class ProfileContext: 82 | """Used for a low-level profiling context. 83 | 84 | This is typically useful in Cython code where decorators are not usable and 85 | using a context manager makes the code hard to read. 86 | 87 | Example 88 | ------- 89 | 90 | p = ProfileContext('some_func') 91 | do_something() 92 | p.stop() 93 | 94 | """ 95 | def __init__(self, name): 96 | self.name = name 97 | global _current_level 98 | _current_level += 1 99 | self.start = time.time() 100 | 101 | def stop(self): 102 | global _current_level 103 | _current_level -= 1 104 | _record_profile(self.name, time.time() - self.start) 105 | 106 | 107 | def get_profile_info(): 108 | global _profile_info 109 | return _profile_info 110 | 111 | 112 | def print_profile(): 113 | global _profile_info 114 | hr = '-'*70 115 | print(hr) 116 | if len(_profile_info) == 0: 117 | print("No profiling information available") 118 | print(hr) 119 | return 120 | print("Profiling info:") 121 | print( 122 | "{:<6} {:<40} {:<10} {:<10}".format( 123 | 'Level', 'Function', 'N calls', 'Time') 124 | ) 125 | tot_time = 0 126 | for level in range(0, min(len(_profile_info), 2)): 127 | profile_data = sorted( 128 | _profile_info[level].items(), key=lambda x: x[1]['time'], 129 | reverse=True 130 | ) 131 | for kernel, data in profile_data: 132 | print("{:<6} {:<40} {:<10} {:<10.3g}".format( 133 | level, kernel, data['calls'], data['time']) 134 | ) 135 | if level == 0: 136 | tot_time += data['time'] 137 | print("Total profiled time: %g secs" % tot_time) 138 | print(hr) 139 | 140 | 141 | def profile2csv(fname, info=None): 142 | '''Write profile info to a CSV file. 143 | 144 | If the optional info argument is passed, it is used as the profile info. 145 | The `info` argument is a list, potentially one for each rank (for a 146 | parallel simulation). 147 | ''' 148 | if info is None: 149 | info = [get_profile_info()] 150 | with open(fname, 'w') as f: 151 | f.write("{0},{1},{2},{3},{4}\n".format( 152 | 'rank', 'level', 'function', 'calls', 'time') 153 | ) 154 | for rank in range(len(info)): 155 | pdata = info[rank] 156 | for level in sorted(pdata.keys()): 157 | profile_data = sorted( 158 | pdata[level].items(), key=lambda x: x[1]['time'], 159 | reverse=True 160 | ) 161 | for name, data in profile_data: 162 | f.write("{0},{1},{2},{3},{4}\n".format( 163 | rank, level, name, data['calls'], data['time'] 164 | )) 165 | 166 | 167 | def profile_kernel(kernel, name, backend=None): 168 | """For profiling raw PyCUDA/PyOpenCL kernels or cython functions 169 | """ 170 | from compyle.array import get_backend 171 | backend = get_backend(backend) 172 | 173 | def _profile_knl(*args, **kwargs): 174 | if backend == 'opencl': 175 | start = time.time() 176 | event = kernel(*args, **kwargs) 177 | event.wait() 178 | end = time.time() 179 | _record_profile(name, end - start) 180 | return event 181 | elif backend == 'cuda': 182 | exec_time = kernel(*args, **kwargs, time_kernel=True) 183 | _record_profile(name, exec_time) 184 | return exec_time 185 | else: 186 | start = time.time() 187 | kernel(*args, **kwargs) 188 | end = time.time() 189 | _record_profile(name, end - start) 190 | 191 | if get_config().profile: 192 | wgi = getattr(kernel, 'get_work_group_info', None) 193 | if wgi is not None: 194 | _profile_knl.get_work_group_info = wgi 195 | return _profile_knl 196 | else: 197 | return kernel 198 | 199 | 200 | def named_profile(name, backend=None): 201 | """Decorator for profiling raw PyOpenCL/PyCUDA kernels or cython functions. 202 | This can be used on a function that returns a raw PyCUDA/PyOpenCL kernel 203 | 204 | For example:: 205 | 206 | @named_profile('prefix_sum') 207 | def _get_prefix_sum(ctx): 208 | return GenericScanKernel(ctx, np.int32, 209 | arguments="__global int *ary", 210 | input_expr="ary[i]", 211 | scan_expr="a+b", neutral="0", 212 | output_statement="ary[i] = prev_item") 213 | """ 214 | from compyle.array import get_backend 215 | backend = get_backend(backend) 216 | 217 | def _decorator(f): 218 | if name is None: 219 | n = f.__name__ 220 | else: 221 | n = name 222 | 223 | def _profiled_kernel_generator(*args, **kwargs): 224 | kernel = f(*args, **kwargs) 225 | return profile_kernel(kernel, n, backend=backend) 226 | 227 | return _profiled_kernel_generator 228 | 229 | return _decorator 230 | -------------------------------------------------------------------------------- /compyle/sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .config import get_config 4 | from .cython_generator import get_parallel_range, CythonGenerator 5 | from .transpiler import Transpiler, convert_to_float_if_needed 6 | from .types import dtype_to_ctype, annotate 7 | from .parallel import Scan 8 | from .template import Template 9 | 10 | from . import array 11 | 12 | 13 | class OutputSortBit(Template): 14 | def __init__(self, name, num_arys): 15 | super(OutputSortBit, self).__init__(name=name) 16 | self.num_arys = num_arys 17 | 18 | def extra_args(self): 19 | args = ['inp_%s' % num for num in range(self.num_arys)] 20 | args += ['out_%s' % num for num in range(self.num_arys)] 21 | return args, {} 22 | 23 | def template(self, i, item, prev_item, last_item, bit_number, indices, 24 | sorted_indices): 25 | ''' 26 | key_bit = (inp_0[i] >> bit_number) & 1 27 | t = last_item + i - prev_item 28 | idx = t if key_bit else prev_item 29 | 30 | sorted_indices[idx] = indices[i] 31 | % for num in range(obj.num_arys): 32 | out_${num}[idx] = inp_${num}[i] 33 | % endfor 34 | ''' 35 | 36 | 37 | @annotate 38 | def input_sort_bit(i, inp_0, bit_number): 39 | return 1 if (inp_0[i] >> bit_number) & 1 == 0 else 0 40 | 41 | 42 | def radix_sort(ary_list, out_list=None, max_key_bits=None, backend=None): 43 | keys = ary_list[0] 44 | backend = array.get_backend(backend) 45 | if not np.issubdtype(keys.dtype, np.integer): 46 | raise ValueError("RadixSort can only sort integer types") 47 | if max_key_bits is None: 48 | max_key_bits = 8 * keys.dtype.itemsize 49 | 50 | # temp arrays 51 | sorted_indices = array.zeros(keys.length, np.int32, backend=backend) 52 | temp_indices = array.zeros_like(sorted_indices) 53 | 54 | indices = array.arange(0, keys.length, 1, backend=backend) 55 | 56 | # allocate temp arrays 57 | if out_list: 58 | temp_ary_list = out_list 59 | else: 60 | temp_ary_list = [array.zeros_like(ary) for ary in ary_list] 61 | sorted_ary_list = [array.zeros_like(ary) for ary in ary_list] 62 | 63 | # kernel 64 | output_sort_bit = OutputSortBit('output_sort_bit', len(ary_list)) 65 | 66 | sort_bit_knl = Scan(input_sort_bit, output_sort_bit.function, 67 | 'a+b', dtype=keys.dtype, backend=backend) 68 | 69 | for bit_number in range(max_key_bits): 70 | if bit_number == 0: 71 | inp_indices = indices 72 | inp_ary_list = ary_list 73 | else: 74 | inp_indices = temp_indices 75 | inp_ary_list = temp_ary_list 76 | 77 | args = {'bit_number': bit_number, 'indices': indices, 78 | 'sorted_indices': sorted_indices} 79 | args.update({'inp_%i' % i: ary for i, ary in enumerate(inp_ary_list)}) 80 | args.update({'out_%i' % 81 | i: ary for i, ary in enumerate(sorted_ary_list)}) 82 | 83 | sort_bit_knl(**args) 84 | 85 | temp_indices, sorted_indices = sorted_indices, temp_indices 86 | temp_ary_list, sorted_ary_list = sorted_ary_list, temp_ary_list 87 | 88 | return temp_ary_list, temp_indices 89 | -------------------------------------------------------------------------------- /compyle/template.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import inspect 3 | from textwrap import dedent 4 | 5 | from .types import kwtype_to_annotation 6 | import mako.template 7 | 8 | 9 | getfullargspec = inspect.getfullargspec 10 | 11 | 12 | class Template(object): 13 | def __init__(self, name): 14 | self.name = name 15 | self._function = None 16 | 17 | @property 18 | def function(self): 19 | if self._function is None: 20 | self._function = self._make_function() 21 | return self._function 22 | 23 | def _make_function(self): 24 | src, annotations = self._get_code() 25 | self._source = src 26 | namespace = {} 27 | exec(src, namespace) 28 | f = namespace[self.name] 29 | f.__module__ = self.__module__ 30 | f.is_jit = len(annotations) == 0 31 | try: 32 | f.__annotations__ = annotations 33 | except AttributeError: 34 | f.im_func.__annotations__ = annotations 35 | f.source = src 36 | return f 37 | 38 | def _get_code(self): 39 | m = ast.parse(dedent(inspect.getsource(self.template))) 40 | argspec = getfullargspec(self.template) 41 | args = argspec.args 42 | if args[0] == 'self': 43 | args = args[1:] 44 | extra_args, extra_annotations = self.extra_args() 45 | args += extra_args 46 | arg_string = ', '.join(args) 47 | body = m.body[0].body 48 | template = body[-1].value.s 49 | docstring = body[0].value.s if len(body) == 2 else '' 50 | name = self.name 51 | sig = 'def {name}({args}):\n """{docs}\n """'.format( 52 | name=name, args=arg_string, docs=docstring 53 | ) 54 | src = sig + self.render(template) 55 | annotations = getattr(self.template, '__annotations__', {}) 56 | data = kwtype_to_annotation(extra_annotations) 57 | annotations.update(data) 58 | return src, annotations 59 | 60 | def inject(self, func, indent=1): 61 | '''Returns the source code of the body of `func`. 62 | 63 | The optional `indent` parameter is the indentation to be used for the 64 | code. When indent is 1, 4 spaces are added to each line. 65 | 66 | This is meant to be used from the mako template. The idea is that one 67 | can define the code to be injected as a method and have the body be 68 | directly injected. 69 | ''' 70 | lines = inspect.getsourcelines(func)[0] 71 | src = dedent(''.join(lines)) 72 | m = ast.parse(src) 73 | # We do this so as to not inject any docstrings. 74 | body_start_index = 1 if isinstance(m.body[0].body[0], ast.Expr) else 0 75 | body_start = m.body[0].body[body_start_index].lineno - 1 76 | body_lines = lines[body_start:] 77 | first = body_lines[0] 78 | leading = first.index(first.lstrip()) 79 | diff = indent*4 - leading 80 | if diff < 0: 81 | indented_body = [x[-diff:] for x in body_lines] 82 | else: 83 | indented_body = [' '*diff + x for x in body_lines] 84 | return ''.join(indented_body) 85 | 86 | def render(self, src): 87 | t = mako.template.Template(text=src) 88 | return t.render(obj=self) 89 | 90 | def extra_args(self): 91 | '''Override this to provide configurable arguments. 92 | 93 | Return a list of strings which are the arguments and a dictionary with 94 | the type annotations. 95 | 96 | ''' 97 | return [], {} 98 | 99 | def template(self): 100 | '''Override this to write your mako template. 101 | 102 | `obj` is mapped to self. 103 | ''' 104 | ''' 105 | ## Mako code here. 106 | ''' 107 | -------------------------------------------------------------------------------- /compyle/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pypr/compyle/bc858bab005f2cc9990267448c5b873d4b5f8635/compyle/tests/__init__.py -------------------------------------------------------------------------------- /compyle/tests/py3_code.py: -------------------------------------------------------------------------------- 1 | # Python3 specific code for some tests. 2 | 3 | from ..types import int_, declare 4 | 5 | 6 | def py3_f(x: int_) -> int_: 7 | y = declare('int') 8 | y = x + 1 9 | return x*y 10 | -------------------------------------------------------------------------------- /compyle/tests/test_ast_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import ast 3 | import sys 4 | from textwrap import dedent 5 | import unittest 6 | 7 | from ..ast_utils import ( 8 | get_assigned, get_symbols, get_unknown_names_and_calls, 9 | has_node, has_return 10 | ) 11 | 12 | 13 | class TestASTUtils(unittest.TestCase): 14 | def test_get_symbols(self): 15 | code = ''' 16 | x = 1 17 | d_x[d_idx] += s_x[s_idx] 18 | ''' 19 | tree = ast.parse(dedent(code)) 20 | result = list(get_symbols(tree)) 21 | result.sort() 22 | expect = ['d_idx', 'd_x', 's_idx', 's_x', 'x'] 23 | self.assertEqual(result, expect) 24 | 25 | # Test if it parses with the code itself instead of a tree. 26 | result = list(get_symbols(dedent(code))) 27 | result.sort() 28 | self.assertEqual(result, expect) 29 | 30 | result = list(get_symbols(tree, ctx=ast.Store)) 31 | result.sort() 32 | self.assertEqual(result, ['x']) 33 | 34 | def test_has_return(self): 35 | code = dedent(''' 36 | x = 1 37 | ''') 38 | self.assertFalse(has_return(code)) 39 | code = dedent(''' 40 | def f(): 41 | pass 42 | ''') 43 | self.assertFalse(has_return(code)) 44 | code = dedent(''' 45 | def f(x): 46 | return x+1 47 | ''') 48 | self.assertTrue(has_return(code)) 49 | 50 | def test_has_node(self): 51 | code = dedent(''' 52 | x = 1 53 | ''') 54 | self.assertFalse(has_node(code, (ast.Return, ast.AugAssign))) 55 | code = dedent(''' 56 | def f(): 57 | pass 58 | ''') 59 | self.assertTrue(has_node(code, (ast.AugAssign, ast.FunctionDef))) 60 | 61 | def test_assigned_values(self): 62 | code = dedent(''' 63 | u[0] = 0.0 64 | x = 1 65 | y = sin(x)*theta 66 | z += 1 67 | ''') 68 | assigned = list(sorted(get_assigned(code))) 69 | # sin or theta should not be detected. 70 | expect = ['u', 'x', 'y', 'z'] 71 | self.assertEqual(assigned, expect) 72 | 73 | def test_assigned_tuple_expansion(self): 74 | code = dedent(''' 75 | u, v = 0.0, 1.0 76 | [x, y] = 0.0, 1.0 77 | ''') 78 | assigned = list(sorted(get_assigned(code))) 79 | expect = ['u', 'v', 'x', 'y'] 80 | self.assertEqual(assigned, expect) 81 | 82 | def test_get_unknown_names_and_calls(self): 83 | code = dedent(''' 84 | def f(x): 85 | g(h(x)) 86 | y = x + SIZE 87 | for i in range(y): 88 | x += func(JUNK) 89 | sin(x) 90 | ''') 91 | 92 | # When 93 | names, calls = get_unknown_names_and_calls(code) 94 | 95 | # Then. 96 | e_names = {'SIZE', 'i', 'JUNK'} 97 | e_calls = {'g', 'h', 'range', 'func', 'sin'} 98 | self.assertSetEqual(names, e_names) 99 | self.assertSetEqual(calls, e_calls) 100 | 101 | @unittest.skipIf(sys.version_info < (3, 4), 102 | reason='Test requires Python 3.') 103 | def test_get_unknown_names_and_calls_with_py3_annotation(self): 104 | code = dedent(''' 105 | from compyle import types as T 106 | 107 | def f(x: T.doublep, n: T.int_)-> T.double: 108 | s = declare('double') 109 | for i in range(n): 110 | s += func(x) 111 | return s 112 | ''') 113 | 114 | # When 115 | names, calls = get_unknown_names_and_calls(code) 116 | 117 | # Then. 118 | e_names = {'i'} 119 | e_calls = {'declare', 'func', 'range'} 120 | self.assertSetEqual(names, e_names) 121 | self.assertSetEqual(calls, e_calls) 122 | 123 | 124 | if __name__ == '__main__': 125 | unittest.main() 126 | -------------------------------------------------------------------------------- /compyle/tests/test_capture_stream.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import unittest 4 | 5 | import pytest 6 | 7 | from ..capture_stream import CaptureMultipleStreams, CaptureStream 8 | 9 | if sys.platform.startswith("win32") and sys.version_info[:2] > (3, 5): 10 | pytest.skip("skipping capture tests on windows", allow_module_level=True) 11 | 12 | 13 | def write_stderr(): 14 | subprocess.call( 15 | [sys.executable, "-S", "-s", "-c", 16 | "import sys;sys.stderr.write('stderr')"] 17 | ) 18 | 19 | 20 | def write_stdout(): 21 | subprocess.call( 22 | [sys.executable, "-S", "-s", "-c", 23 | "import sys;sys.stdout.write('stdout')"] 24 | ) 25 | 26 | 27 | class TestCaptureStream(unittest.TestCase): 28 | def test_that_stderr_is_captured_by_default(self): 29 | # Given 30 | # When 31 | with CaptureStream() as stream: 32 | write_stderr() 33 | # Then 34 | self.assertEqual(stream.get_output(), "stderr") 35 | 36 | def test_that_stdout_can_be_captured(self): 37 | # Given 38 | # When 39 | with CaptureStream(sys.stdout) as stream: 40 | write_stdout() 41 | # Then 42 | self.assertEqual(stream.get_output(), "stdout") 43 | 44 | def test_that_output_is_available_in_context_and_outside(self): 45 | # Given 46 | # When 47 | with CaptureStream(sys.stderr) as stream: 48 | write_stderr() 49 | # Then 50 | self.assertEqual(stream.get_output(), "stderr") 51 | 52 | # Then 53 | self.assertEqual(stream.get_output(), "stderr") 54 | 55 | 56 | class TestCaptureMultipleStreams(unittest.TestCase): 57 | def test_that_stdout_stderr_are_captured_by_default(self): 58 | # Given 59 | # When 60 | with CaptureMultipleStreams() as stream: 61 | write_stderr() 62 | write_stdout() 63 | # Then 64 | outputs = stream.get_output() 65 | self.assertEqual(outputs[0], "stdout") 66 | self.assertEqual(outputs[1], "stderr") 67 | 68 | def test_that_order_is_preserved(self): 69 | # Given 70 | # When 71 | with CaptureMultipleStreams((sys.stderr, sys.stdout)) as stream: 72 | write_stderr() 73 | write_stdout() 74 | # Then 75 | outputs = stream.get_output() 76 | self.assertEqual(outputs[0], "stderr") 77 | self.assertEqual(outputs[1], "stdout") 78 | 79 | 80 | if __name__ == '__main__': 81 | unittest.main() 82 | -------------------------------------------------------------------------------- /compyle/tests/test_change_backend.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from pytest import importorskip 5 | 6 | from ..config import use_config 7 | from ..array import wrap 8 | from ..types import annotate 9 | from ..parallel import elementwise, Reduction, Scan 10 | 11 | 12 | class TestChangeBackend(unittest.TestCase): 13 | def test_elementwise_late_binding(self): 14 | # Given/When 15 | @elementwise 16 | @annotate 17 | def axpb(i, y, x, a, b): 18 | y[i] = a*x[i] + b 19 | 20 | # Then 21 | self.assertIsNone(axpb.elementwise) 22 | 23 | def test_reduction_late_binding(self): 24 | # Given/When 25 | r = Reduction('a+b') 26 | 27 | # Then 28 | self.assertIsNone(r.reduction) 29 | 30 | def test_scan_late_binding(self): 31 | # Given/When 32 | @annotate 33 | def output_f(i, last_item, item, ary): 34 | ary[i] = item + last_item 35 | 36 | scan = Scan(output=output_f, scan_expr='a+b', 37 | dtype=np.int32) 38 | 39 | # Then 40 | self.assertIsNone(scan.scan) 41 | 42 | def test_elementwise_supports_changing_backend(self): 43 | importorskip("pyopencl") 44 | 45 | # Given/When 46 | @elementwise 47 | @annotate 48 | def axpb(i, y, x, a, b): 49 | y[i] = a*x[i] + b 50 | 51 | # When 52 | a, b = 2.0, 1.5 53 | x = np.linspace(0, 2*np.pi, 100) 54 | y = np.zeros_like(x) 55 | y0 = a*x + b 56 | with use_config(use_opencl=True): 57 | x, y = wrap(x, y) 58 | axpb(y, x, a, b) 59 | y.pull() 60 | 61 | # Then 62 | np.testing.assert_array_almost_equal(y.data, y0) 63 | self.assertEqual(axpb.elementwise.backend, 'opencl') 64 | 65 | # When 66 | x, y = wrap(x.data, y.data) 67 | axpb.set_backend('cython') 68 | axpb(y, x, a, b) 69 | # Then 70 | np.testing.assert_array_almost_equal(y.data, y0) 71 | self.assertEqual(axpb.elementwise.backend, 'cython') 72 | 73 | def test_reduction_supports_changing_backend(self): 74 | importorskip("pyopencl") 75 | 76 | # Given 77 | r = Reduction('a+b') 78 | 79 | # When 80 | x = np.linspace(0, 1, 1000) / 1000 81 | x_orig = x.copy() 82 | expect = 0.5 83 | 84 | with use_config(use_opencl=True): 85 | x = wrap(x) 86 | result = r(x) 87 | 88 | # Then 89 | self.assertAlmostEqual(result, expect, 6) 90 | 91 | # When 92 | x = wrap(x_orig) 93 | r.set_backend('cython') 94 | result = r(x) 95 | 96 | # Then 97 | self.assertAlmostEqual(result, expect, 6) 98 | 99 | def test_scan_supports_changing_backend(self): 100 | importorskip("pyopencl") 101 | 102 | # Given/When 103 | @annotate 104 | def input_f(i, ary): 105 | return ary[i] 106 | 107 | @annotate 108 | def output_f(i, item, ary): 109 | ary[i] = item 110 | 111 | scan = Scan(input_f, output_f, 'a+b', dtype=np.int32) 112 | 113 | # When 114 | a = np.arange(10000, dtype=np.int32) 115 | data = a.copy() 116 | expect = np.cumsum(a) 117 | 118 | with use_config(use_opencl=True): 119 | a = wrap(a) 120 | scan(input=a, ary=a) 121 | a.pull() 122 | 123 | # Then 124 | np.testing.assert_array_almost_equal(a.data, expect) 125 | 126 | # When 127 | a = wrap(data) 128 | scan.set_backend('cython') 129 | scan(input=a, ary=a) 130 | a.pull() 131 | 132 | # Then 133 | np.testing.assert_array_almost_equal(a.data, expect) 134 | 135 | def test_wrap_is_identity_on_arrays_with_same_backend(self): 136 | # Given 137 | x = np.linspace(0, 1, 100) 138 | 139 | # When 140 | xw = wrap(x) 141 | 142 | res = wrap(xw) 143 | 144 | # Then 145 | self.assertIs(res, xw) 146 | 147 | def test_wrap_can_wrap_array_to_different_backend(self): 148 | importorskip("pyopencl") 149 | # Given 150 | x = np.linspace(0, 1, 100) 151 | 152 | # When 153 | xc = wrap(x) 154 | with use_config(use_opencl=True): 155 | xocl = wrap(xc) 156 | 157 | # Then 158 | self.assertEqual(xc.backend, 'cython') 159 | self.assertEqual(xocl.backend, 'opencl') 160 | np.testing.assert_array_almost_equal(xocl.data, xc.data) 161 | -------------------------------------------------------------------------------- /compyle/tests/test_config.py: -------------------------------------------------------------------------------- 1 | """Tests for the configuration. 2 | """ 3 | from unittest import TestCase, main 4 | 5 | from ..config import Config, get_config, set_config, use_config 6 | 7 | 8 | class ConfigTestCase(TestCase): 9 | 10 | def setUp(self): 11 | # Unset any default configuration. 12 | set_config(None) 13 | self.config = Config() 14 | 15 | def tearDown(self): 16 | # Unset any default configuration. 17 | set_config(None) 18 | 19 | def test_use_openmp_config_default(self): 20 | # Given 21 | config = self.config 22 | # When 23 | # Then 24 | self.assertFalse(config.use_openmp) 25 | 26 | def test_set_get_use_openmp_config(self): 27 | # Given 28 | config = self.config 29 | # When 30 | config.use_openmp = 10 31 | # Then 32 | self.assertEqual(config.use_openmp, 10) 33 | 34 | def test_set_get_omp_schedule_config(self): 35 | # Given 36 | config = self.config 37 | # When 38 | config.omp_schedule = ("static", 10) 39 | # Then 40 | self.assertEqual(config.omp_schedule, ("static", 10)) 41 | 42 | def test_set_string_omp_schedule(self): 43 | # Given 44 | config = self.config 45 | # When 46 | config.set_omp_schedule("dynamic,20") 47 | # Then 48 | self.assertEqual(config.omp_schedule, ("dynamic", 20)) 49 | 50 | def test_set_omp_schedule_config_exception(self): 51 | # Given 52 | config = self.config 53 | # When 54 | # Then 55 | with self.assertRaises(ValueError): 56 | config.omp_schedule = ("random", 20) 57 | 58 | def test_use_opencl_config_default(self): 59 | # Given 60 | config = self.config 61 | # When 62 | # Then 63 | self.assertFalse(config.use_opencl) 64 | 65 | def test_set_get_use_opencl_config(self): 66 | # Given 67 | config = self.config 68 | # When 69 | config.use_opencl = 10 70 | # Then 71 | self.assertEqual(config.use_opencl, 10) 72 | 73 | def test_use_double_config_default(self): 74 | # Given 75 | config = self.config 76 | # When 77 | # Then 78 | self.assertFalse(config.use_double) 79 | 80 | def test_set_get_use_double_config(self): 81 | # Given 82 | config = self.config 83 | # When 84 | config.use_double = 10 85 | # Then 86 | self.assertEqual(config.use_double, 10) 87 | 88 | def test_default_global_config_is_really_global(self): 89 | # Given. 90 | config = get_config() 91 | self.assertTrue(isinstance(config, Config)) 92 | 93 | # When 94 | config.use_openmp = 100 95 | 96 | # Then. 97 | config1 = get_config() 98 | self.assertEqual(config1.use_openmp, 100) 99 | 100 | def test_set_global(self): 101 | # Given. 102 | self.config.use_openmp = 200 103 | set_config(self.config) 104 | 105 | # When 106 | config = get_config() 107 | 108 | # Then. 109 | self.assertEqual(config.use_openmp, 200) 110 | 111 | def test_use_config(self): 112 | # Given 113 | self.config.use_openmp = 200 114 | set_config(self.config) 115 | 116 | # When/Then 117 | with use_config(use_openmp=300) as cfg: 118 | config = get_config() 119 | self.assertEqual(config.use_openmp, 300) 120 | self.assertEqual(cfg.use_openmp, 300) 121 | cfg.use_openmp = 100 122 | cfg.use_double = False 123 | self.assertEqual(config.use_openmp, 100) 124 | self.assertEqual(config.use_double, False) 125 | 126 | # Then 127 | self.assertEqual(get_config().use_openmp, 200) 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /compyle/tests/test_cuda.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pytest.importorskip('pycuda') 4 | 5 | from compyle.array import wrap 6 | from compyle.thrust.sort import argsort 7 | import numpy as np 8 | 9 | 10 | def test_sort(): 11 | length = 100 12 | a = np.array(np.random.rand(length), dtype=np.float32) 13 | b = wrap(a, backend='cuda') 14 | res_gpu = argsort(b).get() 15 | res_cpu = np.argsort(a) 16 | assert np.all(res_gpu == res_cpu) 17 | -------------------------------------------------------------------------------- /compyle/tests/test_ext_module.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from io import open as io_open 3 | import os 4 | from os.path import join, exists 5 | import shutil 6 | import sys 7 | import tempfile 8 | from textwrap import dedent 9 | from multiprocessing import Pool 10 | import pytest 11 | from unittest import TestCase, main, SkipTest 12 | 13 | try: 14 | from unittest import mock 15 | except ImportError: 16 | import mock 17 | 18 | import compyle.ext_module 19 | 20 | from ..ext_module import (get_md5, ExtModule, get_ext_extension, 21 | get_config_file_opts, get_openmp_flags) 22 | 23 | 24 | def _check_write_source(root): 25 | """Used to create an ExtModule and test if a file was opened. 26 | 27 | It returns the number of times "open" was called. 28 | """ 29 | m = mock.mock_open() 30 | orig_side_effect = m.side_effect 31 | 32 | def _side_effect(*args, **kw): 33 | with io_open(*args, **kw) as fp: 34 | fp.write("junk") 35 | return orig_side_effect(*args, **kw) 36 | m.side_effect = _side_effect 37 | 38 | with mock.patch('compyle.ext_module.io.open', m, create=True): 39 | s = ExtModule("print('hello')", root=root) 40 | s.write_source() 41 | return m.call_count 42 | 43 | 44 | def _check_compile(root): 45 | with mock.patch('shutil.copy') as m: 46 | s = ExtModule("print('hello')", root=root) 47 | s.write_and_build() 48 | if m.called: 49 | # If it was called, do the copy to mimic the action. 50 | shutil.copy(*m.call_args[0]) 51 | return m.call_count 52 | 53 | 54 | def test_get_config_file_opts(): 55 | # Given 56 | cfg = dedent(''' 57 | OMP_CFLAGS = ['-fxxx'] 58 | OMP_LINK = ['-fyyy'] 59 | ''') 60 | m = mock.mock_open(read_data=cfg) 61 | with mock.patch('compyle.ext_module.open', m), \ 62 | mock.patch('compyle.ext_module.exists') as mock_exists: 63 | # When 64 | mock_exists.return_value = False 65 | opts = get_config_file_opts() 66 | print(opts) 67 | 68 | # Then 69 | assert 'OMP_CFLAGS' not in opts 70 | assert 'OMP_LINK' not in opts 71 | 72 | # When 73 | mock_exists.return_value = True 74 | opts = get_config_file_opts() 75 | 76 | # Then 77 | assert opts['OMP_CFLAGS'] == ['-fxxx'] 78 | assert opts['OMP_LINK'] == ['-fyyy'] 79 | 80 | 81 | def test_get_openmp_flags(): 82 | # Given/When 83 | f = get_openmp_flags() 84 | 85 | # Then 86 | assert f[0] != ['-fxxx'] 87 | assert f[1] != ['-fyyy'] 88 | assert len(f[0]) > 0 89 | 90 | # Given 91 | m = dict(OMP_CFLAGS=['-fxxx'], OMP_LINK=['-fyyy']) 92 | 93 | with mock.patch.object(compyle.ext_module, 'CONFIG_OPTS', m): 94 | # When 95 | f = get_openmp_flags() 96 | 97 | # Then 98 | assert f[0] == ['-fxxx'] 99 | assert f[1] == ['-fyyy'] 100 | 101 | 102 | class TestMiscExtMod(TestCase): 103 | def test_md5(self): 104 | data = "hello world" 105 | # Two calls with same data produce same result 106 | self.assertEqual(get_md5(data), get_md5(data)) 107 | # Two calls with different data produce different md5sums. 108 | self.assertNotEqual(get_md5(data), get_md5(data + ' ')) 109 | 110 | 111 | @pytest.fixture(scope="function") 112 | def use_capsys(request, capsys): 113 | request.instance.capsys = capsys 114 | 115 | 116 | class TestExtModule(TestCase): 117 | def setUp(self): 118 | self.root = tempfile.mkdtemp() 119 | self.data = dedent('''\ 120 | # cython: language_level=3 121 | def f(): 122 | return "hello world" 123 | ''') 124 | 125 | def tearDown(self): 126 | if sys.platform.startswith('win'): 127 | try: 128 | shutil.rmtree(self.root) 129 | except WindowsError: 130 | pass 131 | else: 132 | shutil.rmtree(self.root) 133 | 134 | def test_constructor(self): 135 | data = self.data 136 | s = ExtModule(data, root=self.root) 137 | self.assertTrue(exists(join(self.root, 'build'))) 138 | 139 | self.assertEqual(s.hash, get_md5(data)) 140 | self.assertEqual(s.code, data) 141 | expect_name = 'm_%s' % (s.hash) 142 | self.assertEqual(s.name, expect_name) 143 | self.assertEqual(s.src_path, join(self.root, expect_name + '.pyx')) 144 | self.assertEqual(s.ext_path, 145 | join(self.root, expect_name + get_ext_extension())) 146 | 147 | s.write_source() 148 | self.assertTrue(exists(s.src_path)) 149 | self.assertEqual(data, open(s.src_path).read()) 150 | 151 | def test_default_root(self): 152 | try: 153 | data = self.data 154 | s = ExtModule(data) 155 | s.write_source() 156 | self.assertTrue(exists(join(s.root, 'build'))) 157 | self.assertEqual(s.hash, get_md5(data)) 158 | self.assertEqual(s.code, data) 159 | self.assertTrue(exists(s.src_path)) 160 | self.assertEqual(data, open(s.src_path).read()) 161 | finally: 162 | os.unlink(s.src_path) 163 | 164 | def test_load_module(self): 165 | data = self.data 166 | s = ExtModule(data, root=self.root) 167 | mod = s.load() 168 | self.assertEqual(mod.f(), "hello world") 169 | self.assertTrue(exists(s.ext_path)) 170 | 171 | @pytest.mark.usefixtures("use_capsys") 172 | def test_compiler_errors_are_captured(self): 173 | # Given 174 | src = dedent('''\ 175 | # cython: language_level=3 176 | def f(): 177 | print(bug) 178 | ''') 179 | s = ExtModule(src, root=self.root) 180 | 181 | # When 182 | self.assertRaises(SystemExit, s.write_and_build) 183 | 184 | # Then 185 | captured = self.capsys.readouterr() 186 | err = captured.out + captured.err 187 | print(err) 188 | self.assertTrue('Error compiling Cython file' in err) 189 | self.assertTrue('def f()' in err) 190 | 191 | def _create_dummy_module(self): 192 | code = "# cython: language_level=3\ndef hello(): return 'hello'" 193 | modname = 'test_rebuild.py' 194 | f = join(self.root, modname) 195 | with open(f, 'w') as fp: 196 | fp.write(code) 197 | return f 198 | 199 | @contextmanager 200 | def _add_root_to_sys_path(self): 201 | import sys 202 | if self.root not in sys.path: 203 | sys.path.insert(0, self.root) 204 | try: 205 | yield 206 | finally: 207 | sys.path.remove(self.root) 208 | 209 | def test_rebuild_when_dependencies_change(self): 210 | # Given. 211 | data = self.data 212 | depends = ["test_rebuild"] 213 | s = ExtModule(data, root=self.root, depends=depends) 214 | fname = self._create_dummy_module() 215 | f_stat = os.stat(fname) 216 | 217 | with self._add_root_to_sys_path(): 218 | # When 219 | self.assertTrue(s.should_recompile()) 220 | s.write_and_build() 221 | 222 | # Then. 223 | self.assertFalse(s.should_recompile()) 224 | 225 | # Now lets re-create the module and try again. 226 | 227 | # When. 228 | fname = self._create_dummy_module() 229 | # Update the timestamp to make it newer, otherwise we need to 230 | # sleep. 231 | os.utime(fname, (f_stat.st_atime, f_stat.st_mtime + 10)) 232 | 233 | # Then. 234 | self.assertTrue(s.should_recompile()) 235 | 236 | def test_that_multiple_writes_do_not_occur_for_same_source(self): 237 | if (sys.platform.startswith("win32") and 238 | sys.version_info[:2] == (3, 11)): 239 | raise SkipTest('Fails on Python 3.11') 240 | 241 | # Given 242 | n_proc = 5 243 | p = Pool(n_proc) 244 | 245 | # When 246 | 247 | # Note that _create_extension cannot be defined here or even in the 248 | # class as a nested function or instance method cannot be pickled. 249 | 250 | result = p.map(_check_write_source, [self.root]*n_proc) 251 | p.close() 252 | 253 | # Then 254 | # The file should have been opened only once. 255 | self.assertEqual(sum(result), 1) 256 | 257 | def test_that_multiple_compiles_do_not_occur_for_same_source(self): 258 | # Given 259 | n_proc = 5 260 | p = Pool(n_proc) 261 | 262 | # When 263 | 264 | # Note that _check_compile cannot be defined here or even in the 265 | # class as a nested function or instance method cannot be pickled. 266 | 267 | result = p.map(_check_compile, [self.root]*n_proc) 268 | p.close() 269 | 270 | # Then 271 | # The shutil.copy should have been run only once. 272 | self.assertEqual(sum(result), 1) 273 | 274 | 275 | if __name__ == '__main__': 276 | main() 277 | -------------------------------------------------------------------------------- /compyle/tests/test_gpu_struct.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pytest 3 | 4 | import numpy as np 5 | 6 | 7 | class TestStructMapping(unittest.TestCase): 8 | 9 | @classmethod 10 | def setUpClass(cls): 11 | print("SetupClass") 12 | pytest.importorskip("pycuda") 13 | from compyle.cuda import set_context 14 | set_context() 15 | 16 | def test_cuda_struct_mapping(self): 17 | from compyle.cuda import match_dtype_to_c_struct 18 | from pycuda import gpuarray 19 | # Given 20 | dtype = np.dtype([('l', np.int64), 21 | ('i', np.uint8), 22 | ('x', np.float32)]) 23 | a = np.empty(1, dtype) 24 | a['l'] = 1.0 25 | a['i'] = 2 26 | a['x'] = 1.23 27 | 28 | # When 29 | gs1, code1 = match_dtype_to_c_struct(None, "junk", a.dtype) 30 | a_ga = a.astype(gs1) 31 | ga = gpuarray.to_gpu(a_ga) 32 | 33 | # Then 34 | result = ga.get() 35 | np.testing.assert_almost_equal(result.tolist(), a.tolist()) 36 | self.assertFalse(a.dtype.fields == gs1.fields) 37 | -------------------------------------------------------------------------------- /compyle/tests/test_low_level.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from pytest import importorskip 5 | 6 | from ..config import use_config 7 | from ..array import wrap 8 | from ..types import annotate, declare 9 | from ..low_level import ( 10 | Cython, Kernel, LocalMem, local_barrier, GID_0, LDIM_0, LID_0, 11 | nogil, prange, parallel, cast 12 | ) 13 | 14 | 15 | class TestKernel(unittest.TestCase): 16 | def test_simple_kernel_opencl(self): 17 | importorskip('pyopencl') 18 | 19 | # Given 20 | @annotate(gdoublep='x, y', a='float', size='int') 21 | def knl(x, y, a, size): 22 | i = declare('int') 23 | i = GID_0*LDIM_0 + LID_0 24 | if i < size: 25 | y[i] = x[i]*a 26 | 27 | x = np.linspace(0, 1, 1000) 28 | y = np.zeros_like(x) 29 | x, y = wrap(x, y, backend='opencl') 30 | 31 | # When 32 | k = Kernel(knl, backend='opencl') 33 | a = 21.0 34 | k(x, y, a, 1000) 35 | 36 | # Then 37 | y.pull() 38 | self.assertTrue(np.allclose(y.data, x.data * a)) 39 | 40 | def test_simple_kernel_cuda(self): 41 | importorskip('pycuda') 42 | 43 | # Given 44 | @annotate(gdoublep='x, y', a='float', size='int') 45 | def knl(x, y, a, size): 46 | i = declare('int') 47 | i = GID_0*LDIM_0 + LID_0 48 | if i < size: 49 | y[i] = x[i]*a 50 | 51 | x = np.linspace(0, 1, 1000) 52 | y = np.zeros_like(x) 53 | x, y = wrap(x, y, backend='cuda') 54 | 55 | # When 56 | k = Kernel(knl, backend='cuda') 57 | a = 21.0 58 | k(x, y, a, 1000) 59 | 60 | # Then 61 | y.pull() 62 | self.assertTrue(np.allclose(y.data, x.data * a)) 63 | 64 | def test_kernel_with_local_memory_opencl(self): 65 | importorskip('pyopencl') 66 | 67 | # Given 68 | @annotate(gdoublep='x, y', xc='ldoublep', a='float') 69 | def knl(x, y, xc, a): 70 | i, lid = declare('int', 2) 71 | lid = LID_0 72 | i = GID_0 * LDIM_0 + lid 73 | 74 | xc[lid] = x[i] 75 | 76 | local_barrier() 77 | 78 | y[i] = xc[lid] * a 79 | 80 | x = np.linspace(0, 1, 1024) 81 | y = np.zeros_like(x) 82 | xc = LocalMem(1, backend='opencl') 83 | 84 | x, y = wrap(x, y, backend='opencl') 85 | 86 | # When 87 | k = Kernel(knl, backend='opencl') 88 | a = 21.0 89 | k(x, y, xc, a) 90 | 91 | # Then 92 | y.pull() 93 | self.assertTrue(np.allclose(y.data, x.data * a)) 94 | 95 | def test_kernel_with_local_memory_cuda(self): 96 | importorskip('pycuda') 97 | 98 | # Given 99 | @annotate(gdoublep='x, y', xc='ldoublep', a='float') 100 | def knl(x, y, xc, a): 101 | i, lid = declare('int', 2) 102 | lid = LID_0 103 | i = GID_0 * LDIM_0 + lid 104 | 105 | xc[lid] = x[i] 106 | 107 | local_barrier() 108 | 109 | y[i] = xc[lid] * a 110 | 111 | x = np.linspace(0, 1, 1024) 112 | y = np.zeros_like(x) 113 | xc = LocalMem(1, backend='cuda') 114 | 115 | x, y = wrap(x, y, backend='cuda') 116 | 117 | # When 118 | k = Kernel(knl, backend='cuda') 119 | a = 21.0 120 | k(x, y, xc, a) 121 | 122 | # Then 123 | y.pull() 124 | self.assertTrue(np.allclose(y.data, x.data * a)) 125 | 126 | 127 | @annotate(double='x, y, a', return_='double') 128 | def func(x, y, a): 129 | return x * y * a 130 | 131 | 132 | @annotate(doublep='x, y', a='double', n='int', return_='double') 133 | def knl(x, y, a, n): 134 | i = declare('int') 135 | s = declare('double') 136 | s = 0.0 137 | for i in range(n): 138 | s += func(x[i], y[i], a) 139 | return s 140 | 141 | 142 | @annotate(n='int', doublep='x, y', a='double') 143 | def cy_extern(x, y, a, n): 144 | i = declare('int') 145 | with nogil, parallel(): 146 | for i in prange(n): 147 | y[i] = x[i] * a 148 | 149 | 150 | @annotate(int='num, return_') 151 | def _factorial(num): 152 | if num == 0: 153 | return 1 154 | else: 155 | return num*_factorial(num - 1) 156 | 157 | 158 | class TestCython(unittest.TestCase): 159 | def test_cython_code_with_return_and_nested_call(self): 160 | # Given 161 | n = 1000 162 | x = np.linspace(0, 1, n) 163 | y = x.copy() 164 | a = 2.0 165 | 166 | # When 167 | cy = Cython(knl) 168 | result = cy(x, y, a, n) 169 | 170 | # Then 171 | self.assertAlmostEqual(result, np.sum(x * y * a)) 172 | 173 | def test_cython_with_externs(self): 174 | # Given 175 | n = 1000 176 | x = np.linspace(0, 1, n) 177 | y = np.zeros_like(x) 178 | a = 2.0 179 | 180 | # When 181 | with use_config(use_openmp=True): 182 | cy = Cython(cy_extern) 183 | 184 | cy(x, y, a, n) 185 | 186 | # Then 187 | self.assertTrue(np.allclose(y, x * a)) 188 | 189 | def test_recursive_function(self): 190 | # Given/when 191 | fac = Cython(_factorial) 192 | 193 | # Then 194 | self.assertEqual(fac(0), 1) 195 | self.assertEqual(fac(1), 1) 196 | self.assertEqual(fac(3), 6) 197 | 198 | 199 | def test_cast_works_in_pure_python(): 200 | x = cast(1.23, "int") 201 | assert x == 1 202 | 203 | y = cast(2, "float") 204 | assert y == 2.0 205 | -------------------------------------------------------------------------------- /compyle/tests/test_profile.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from pytest import importorskip 5 | 6 | from ..config import get_config, use_config 7 | from ..array import wrap, zeros, ones 8 | from ..profile import ( 9 | get_profile_info, named_profile, profile, profile_ctx, ProfileContext 10 | ) 11 | 12 | 13 | def axpb(): 14 | a, b = 7, 13 15 | x = np.random.rand(1000) 16 | return a * x + b 17 | 18 | 19 | class A: 20 | @profile 21 | def f(self): 22 | pass 23 | 24 | 25 | class B: 26 | def __init__(self): 27 | self.name = 'my_name' 28 | 29 | @profile 30 | def f(self): 31 | pass 32 | 33 | @profile(name='explicit_name') 34 | def named(self): 35 | pass 36 | 37 | 38 | @profile 39 | def profiled_axpb(): 40 | axpb() 41 | 42 | 43 | @profile 44 | def nested(): 45 | profiled_axpb() 46 | 47 | 48 | @named_profile('prefix_sum', backend='opencl') 49 | def get_prefix_sum_knl(): 50 | from ..opencl import get_queue, get_context 51 | from pyopencl.scan import GenericScanKernel 52 | ctx = get_context() 53 | queue = get_queue() 54 | return GenericScanKernel(ctx, np.int32, 55 | arguments="__global int *ary", 56 | input_expr="ary[i]", 57 | scan_expr="a+b", neutral="0", 58 | output_statement="ary[i] = prev_item") 59 | 60 | 61 | def test_profile_ctx(): 62 | with profile_ctx('axpb'): 63 | axpb() 64 | 65 | profile_info = get_profile_info() 66 | assert profile_info[0]['axpb']['calls'] == 1 67 | 68 | 69 | def test_profile(): 70 | for i in range(100): 71 | profiled_axpb() 72 | 73 | profile_info = get_profile_info() 74 | assert profile_info[0]['profiled_axpb']['calls'] == 100 75 | 76 | 77 | def test_profile_method(): 78 | # Given 79 | a = A() 80 | b = B() 81 | 82 | # When 83 | for i in range(5): 84 | a.f() 85 | b.f() 86 | b.named() 87 | 88 | # Then 89 | profile_info = get_profile_info() 90 | assert profile_info[0]['A.f']['calls'] == 5 91 | 92 | # For b.f(), b.name is my_name. 93 | assert profile_info[0]['my_name']['calls'] == 5 94 | 95 | # profile was given an explicit name for b.named() 96 | assert profile_info[0]['explicit_name']['calls'] == 5 97 | 98 | 99 | def test_named_profile(): 100 | importorskip('pyopencl') 101 | get_config().profile = True 102 | knl = get_prefix_sum_knl() 103 | x = ones(100, np.int32, backend='opencl') 104 | knl(x.dev) 105 | 106 | profile_info = get_profile_info() 107 | assert profile_info[0]['prefix_sum']['calls'] == 1 108 | 109 | 110 | def test_nesting_and_context(): 111 | # When 112 | p = ProfileContext('main') 113 | nested() 114 | p.stop() 115 | 116 | # Then 117 | prof = get_profile_info() 118 | assert len(prof) == 3 119 | assert prof[0]['main']['calls'] == 1 120 | assert prof[1]['nested']['calls'] == 1 121 | assert prof[2]['profiled_axpb']['calls'] == 1 122 | -------------------------------------------------------------------------------- /compyle/tests/test_template.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | import numpy as np 4 | 5 | from ..array import wrap 6 | from ..types import annotate, KnownType 7 | from ..template import Template 8 | from ..parallel import Elementwise 9 | 10 | 11 | class SimpleTemplate(Template): 12 | def __init__(self, name, cond=False): 13 | super(SimpleTemplate, self).__init__(name=name) 14 | self.cond = cond 15 | 16 | def template(self, x, y): 17 | '''Docstring text''' 18 | ''' 19 | % for i in range(5): 20 | print(${i}) 21 | % endfor 22 | % if obj.cond: 23 | return 'hello' 24 | % else: 25 | return 'bye' 26 | % endif 27 | ''' 28 | 29 | 30 | class Dummy(Template): 31 | def template(self): 32 | '''Docs''' 33 | ''' 34 | print(123) 35 | ''' 36 | 37 | 38 | class ParallelExample(Template): 39 | @annotate(i='int', x='doublep', y='doublep') 40 | def template(self, i, x, y): 41 | ''' 42 | y[i] = x[i]*2.0 43 | ''' 44 | 45 | 46 | class ExtraArgs(Template): 47 | def extra_args(self): 48 | return ['x'], {'x': 'int'} 49 | 50 | def template(self): 51 | ''' 52 | return x + 1 53 | ''' 54 | 55 | 56 | def test_simple_template(): 57 | # Given 58 | t = SimpleTemplate(name='simple') 59 | 60 | # When 61 | simple = t.function 62 | x = simple(1, 2) 63 | 64 | # Then 65 | assert x == 'bye' 66 | 67 | # Given 68 | t = SimpleTemplate(name='simple', cond=True) 69 | 70 | # When 71 | simple = t.function 72 | x = simple(1, 2) 73 | 74 | # Then 75 | assert x == 'hello' 76 | 77 | 78 | def test_that_source_code_is_available(): 79 | # Given/When 80 | dummy = Dummy('dummy').function 81 | 82 | # Then 83 | expect = dedent('''\ 84 | def dummy(): 85 | """Docs 86 | """ 87 | print(123) 88 | ''') 89 | assert dummy.source.strip() == expect.strip() 90 | assert dummy.is_jit is True 91 | 92 | 93 | def test_template_usable_in_code_generation(): 94 | # Given 95 | twice = ParallelExample('twice').function 96 | 97 | x = np.linspace(0, 1, 10) 98 | y = np.zeros_like(x) 99 | x, y = wrap(x, y) 100 | 101 | # When 102 | e = Elementwise(twice) 103 | e(x, y) 104 | 105 | # Then 106 | y.pull() 107 | np.testing.assert_almost_equal(y, 2.0*x.data) 108 | assert twice.is_jit is False 109 | 110 | 111 | def test_template_with_extra_args(): 112 | # Given 113 | extra = ExtraArgs('extra').function 114 | 115 | # When 116 | result = extra(1) 117 | 118 | # Then 119 | assert result == 2 120 | assert extra.__annotations__ == {'x': KnownType('int')} 121 | 122 | 123 | def test_template_inject_works(): 124 | # Given 125 | def f(x): 126 | '''Docs 127 | ''' 128 | for i in range(5): 129 | x += i 130 | return x + 1 131 | 132 | # When 133 | t = Template('t') 134 | result = t.inject(f, indent=1) 135 | 136 | # Then 137 | lines = ['for i in range(5):\n', ' x += i\n', 'return x + 1\n'] 138 | expect = ''.join([' '*4 + x for x in lines]) 139 | assert result == expect 140 | 141 | # When 142 | result = t.inject(f, indent=2) 143 | 144 | # Then 145 | lines = ['for i in range(5):\n', ' x += i\n', 'return x + 1\n'] 146 | expect = ''.join([' '*8 + x for x in lines]) 147 | assert result == expect 148 | 149 | # When 150 | result = t.inject(f, indent=0) 151 | 152 | # Then 153 | lines = ['for i in range(5):\n', ' x += i\n', 'return x + 1\n'] 154 | expect = ''.join(lines) 155 | assert result == expect 156 | -------------------------------------------------------------------------------- /compyle/tests/test_transpiler.py: -------------------------------------------------------------------------------- 1 | from math import sin 2 | import unittest 3 | 4 | from ..transpiler import get_external_symbols_and_calls, Transpiler 5 | from ..extern import printf 6 | 7 | SIZE = 10 8 | 9 | my_printf = printf 10 | 11 | 12 | def h(x=0.0): 13 | return sin(x) + 1 14 | 15 | 16 | def f(x=0.0): 17 | return h(x*2+1) 18 | 19 | 20 | def g(x=0.0): 21 | return f(x*2) 22 | 23 | 24 | def implicit_f(x, y): 25 | # These should be ignored. 26 | j = LID_0 + GID_0 + LDIM_0 + GDIM_0 27 | s = y[SIZE-1] 28 | for i in range(SIZE): 29 | s += sin(x[i]) 30 | 31 | my_printf("%f", s) 32 | return s 33 | 34 | 35 | def undefined_call(x): 36 | # An intentional error that should be caught. 37 | foo(x) 38 | 39 | 40 | def _factorial(num): 41 | if num == 0: 42 | return 1 43 | else: 44 | return num*_factorial(num - 1) 45 | 46 | 47 | class TestTranspiler(unittest.TestCase): 48 | def test_get_external_symbols_and_calls(self): 49 | # Given/When 50 | syms, implicit, calls, ext = get_external_symbols_and_calls( 51 | g, 'cython' 52 | ) 53 | 54 | # Then 55 | expect = [f] 56 | self.assertEqual(syms, {}) 57 | self.assertEqual(expect, calls) 58 | self.assertEqual(ext, []) 59 | 60 | # Given/When 61 | syms, implicit, calls, ext = get_external_symbols_and_calls( 62 | implicit_f, 'cython' 63 | ) 64 | 65 | # Then 66 | self.assertEqual(syms, {'SIZE': 10}) 67 | self.assertEqual(implicit, {'i'}) 68 | self.assertEqual(calls, []) 69 | self.assertEqual(ext, [my_printf]) 70 | 71 | # Given/When 72 | self.assertRaises(NameError, get_external_symbols_and_calls, 73 | undefined_call, 'cython') 74 | 75 | def test_get_external_symbols_and_calls_handles_recursion(self): 76 | # Given/When 77 | syms, implicit, calls, ext = get_external_symbols_and_calls( 78 | _factorial, 'cython' 79 | ) 80 | 81 | # Then 82 | self.assertEqual(syms, {}) 83 | self.assertEqual(calls, []) 84 | self.assertEqual(implicit, set()) 85 | self.assertEqual(ext, []) 86 | 87 | def test_transpiler(self): 88 | # Given 89 | t = Transpiler(backend='cython') 90 | 91 | # When 92 | t.add(g) 93 | 94 | # Then 95 | for func in (g, f, h): 96 | self.assertTrue(func in t.blocks) 97 | 98 | expect = [h, f, g] 99 | self.assertListEqual([x.obj for x in t.blocks], expect) 100 | -------------------------------------------------------------------------------- /compyle/tests/test_types.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from ..types import KnownType, declare, annotate 6 | 7 | 8 | class TestDeclare(unittest.TestCase): 9 | def test_declare(self): 10 | self.assertEqual(declare('int'), 0) 11 | self.assertEqual(declare('long'), 0) 12 | self.assertEqual(declare('double'), 0.0) 13 | self.assertEqual(declare('float'), 0.0) 14 | 15 | self.assertEqual(declare('int', 2), (0, 0)) 16 | self.assertEqual(declare('long', 3), (0, 0, 0)) 17 | self.assertEqual(declare('double', 2), (0.0, 0.0)) 18 | self.assertEqual(declare('float', 3), (0.0, 0.0, 0.0)) 19 | 20 | res = declare('matrix(3)') 21 | self.assertTrue(np.all(res == np.zeros(3))) 22 | res = declare('matrix(3)', 3) 23 | for i in range(3): 24 | self.assertTrue(np.all(res[0] == np.zeros(3))) 25 | res = declare('matrix((3,))') 26 | self.assertTrue(np.all(res == np.zeros(3))) 27 | res = declare('matrix((3, 3))') 28 | self.assertTrue(np.all(res == np.zeros((3, 3)))) 29 | 30 | def test_declare_with_type(self): 31 | res = declare('matrix(3, "int")') 32 | self.assertTrue(np.all(res == np.zeros(3))) 33 | self.assertEqual(res.dtype, np.int32) 34 | 35 | res = declare('matrix((2, 2), "unsigned int")') 36 | self.assertTrue(np.all(res == np.zeros((2, 2)))) 37 | self.assertEqual(res.dtype, np.uint32) 38 | 39 | res = declare('matrix((3,), "float")') 40 | self.assertTrue(np.all(res == np.zeros((3,)))) 41 | self.assertEqual(res.dtype, np.float32) 42 | 43 | def test_declare_with_address_space(self): 44 | self.assertEqual(declare('LOCAL_MEM int', 2), (0, 0)) 45 | self.assertEqual(declare('GLOBAL_MEM float', 2), (0.0, 0.0)) 46 | 47 | res = declare('LOCAL_MEM matrix(3)') 48 | self.assertTrue(np.all(res == np.zeros(3))) 49 | 50 | res = declare('GLOBAL_MEM matrix(3)') 51 | self.assertTrue(np.all(res == np.zeros(3))) 52 | 53 | 54 | class TestAnnotate(unittest.TestCase): 55 | def test_simple_annotation(self): 56 | # Given/When 57 | @annotate(i='int', x='floatp', return_='float') 58 | def f(i, x): 59 | return x[i]*2.0 60 | 61 | # Then 62 | result = f.__annotations__ 63 | self.assertEqual(result['return'], KnownType('float')) 64 | self.assertEqual(result['i'], KnownType('int')) 65 | self.assertEqual(result['x'], KnownType('float*', 'float')) 66 | 67 | def test_reversed_annotation(self): 68 | # Given/When 69 | @annotate(i='int', floatp='x, y', return_='float') 70 | def f(i, x, y): 71 | return x[i]*y[i] 72 | 73 | # Then 74 | result = f.__annotations__ 75 | self.assertEqual(result['return'], KnownType('float')) 76 | self.assertEqual(result['i'], KnownType('int')) 77 | self.assertEqual(result['x'], KnownType('float*', 'float')) 78 | self.assertEqual(result['y'], KnownType('float*', 'float')) 79 | 80 | def test_decorator_accepts_known_type_instance(self): 81 | # Given/When 82 | @annotate(x=KnownType('Thing')) 83 | def f(x): 84 | x.f() 85 | 86 | # Then 87 | result = f.__annotations__ 88 | self.assertEqual(result['x'], KnownType('Thing')) 89 | 90 | def test_decorator_raises_error_for_unknown_error(self): 91 | def f(x): 92 | pass 93 | 94 | self.assertRaises(TypeError, annotate, f, x='alpha') 95 | -------------------------------------------------------------------------------- /compyle/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from textwrap import dedent 3 | from unittest import TestCase 4 | 5 | from .. import utils 6 | 7 | 8 | def func(x): 9 | return x 10 | 11 | 12 | class TestUtils(TestCase): 13 | def test_getsource_works_with_normal_function(self): 14 | # Given/When 15 | src = utils.getsource(func) 16 | 17 | # Then 18 | self.assertEqual(src, inspect.getsource(func)) 19 | 20 | def test_getsource_works_with_generated_function(self): 21 | # Given 22 | src = dedent(''' 23 | def gfunc(x): 24 | return x 25 | ''') 26 | ns = {} 27 | exec(src, ns) 28 | gfunc = ns['gfunc'] 29 | gfunc.source = src 30 | 31 | # When 32 | result = utils.getsource(gfunc) 33 | 34 | # Then 35 | self.assertEqual(result, src) 36 | 37 | def test_getsourcelines_works_with_normal_function(self): 38 | # Given/When 39 | result = utils.getsourcelines(func) 40 | 41 | # Then 42 | self.assertEqual(result, inspect.getsourcelines(func)) 43 | 44 | def test_getsourcelines_works_with_generated_function(self): 45 | # Given 46 | src = dedent(''' 47 | def gfunc(x): 48 | return x 49 | ''') 50 | ns = {} 51 | exec(src, ns) 52 | gfunc = ns['gfunc'] 53 | gfunc.source = src 54 | 55 | # When 56 | result = utils.getsourcelines(gfunc) 57 | 58 | # Then 59 | self.assertEqual(result, (src.splitlines(True), 0)) 60 | -------------------------------------------------------------------------------- /compyle/thrust/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pypr/compyle/bc858bab005f2cc9990267448c5b873d4b5f8635/compyle/thrust/__init__.py -------------------------------------------------------------------------------- /compyle/thrust/sort.pyx: -------------------------------------------------------------------------------- 1 | import cupy.cuda.thrust as thrust 2 | from libcpp.vector cimport vector 3 | import compyle.array as carr 4 | import numpy as np 5 | 6 | 7 | cpdef argsort(array, keys=None): 8 | idx_array = carr.empty(array.length, np.intp, backend='cuda') 9 | 10 | cdef vector[int] shape 11 | shape.push_back( array.length) 12 | 13 | cdef size_t keys_ptr 14 | if keys: 15 | keys_ptr = keys.dev.ptr 16 | else: 17 | keys_ptr = 0 18 | 19 | thrust.argsort(array.dtype, idx_array.dev.ptr, array.dev.ptr, keys_ptr, shape) 20 | 21 | return idx_array 22 | -------------------------------------------------------------------------------- /compyle/types.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import platform 3 | import sys 4 | import numpy as np 5 | 6 | 7 | BITS = platform.architecture()[0] 8 | 9 | 10 | def declare(type, num=1): 11 | """Declare the variable to be of the given type. 12 | 13 | The additional optional argument num is the number of items to return. 14 | 15 | Normally, the declare function only defines a variable when compiled, 16 | however, this function here is a pure Python implementation so that the 17 | same code can be executed in Python. 18 | 19 | Parameters 20 | ---------- 21 | 22 | type: str: String representing the type. 23 | num: int: the number of values to return 24 | 25 | Examples 26 | -------- 27 | 28 | >>> declare('int') 29 | 0 30 | >>> declare('int', 3) 31 | 0, 0, 0 32 | """ 33 | if num == 1: 34 | return _declare(type) 35 | else: 36 | return tuple(_declare(type) for i in range(num)) 37 | 38 | 39 | def get_declare_info(arg): 40 | """Given the first argument to the declare function, return the 41 | (kind, address_space, type, shape), information. 42 | 43 | kind: is a string, 'primitive' or 'matrix' 44 | address_space: is the address space string. 45 | type: is the c data type to use. 46 | shape: is a tuple with the shape of the matrix. It is None for primitives. 47 | """ 48 | address_space = '' 49 | shape = None 50 | if arg.startswith(('LOCAL_MEM', 'GLOBAL_MEM')): 51 | idx = arg.index(' ') 52 | address_space = arg[:idx] 53 | arg = arg[idx + 1:] 54 | if arg.startswith('matrix'): 55 | kind = 'matrix' 56 | m_arg = ast.literal_eval(arg[7:-1]) 57 | if isinstance(m_arg, tuple) and \ 58 | len(m_arg) > 1 and \ 59 | isinstance(m_arg[1], str): 60 | shape = m_arg[0] 61 | type = m_arg[1] 62 | else: 63 | shape = m_arg 64 | type = 'double' 65 | else: 66 | kind = 'primitive' 67 | type = arg 68 | 69 | return kind, address_space, type, shape 70 | 71 | 72 | def _declare(arg): 73 | kind, address_space, ctype, shape = get_declare_info(arg) 74 | if kind == 'matrix': 75 | dtype = C_NP_TYPE_MAP[ctype] 76 | return np.zeros(shape, dtype=dtype) 77 | else: 78 | if ctype in ['double', 'float']: 79 | return 0.0 80 | else: 81 | return 0 82 | 83 | 84 | class Undefined(object): 85 | pass 86 | 87 | 88 | class KnownType(object): 89 | """Simple object to specify a known type as a string. 90 | 91 | Smells but is convenient as the type may be one available only inside 92 | Cython without a corresponding Python type. 93 | """ 94 | 95 | def __init__(self, type_str, base_type=''): 96 | """Constructor 97 | 98 | The ``base_type`` argument is optional and used to represent the base 99 | type, i.e. the type_str may be 'Foo*' but the base type will be 'Foo' 100 | if specified. 101 | 102 | Parameters 103 | ---------- 104 | type_str: str: A string representation of how the type is declared. 105 | base_type: str: The base type of this entity. (optional) 106 | 107 | """ 108 | self.type = type_str 109 | self.base_type = base_type 110 | 111 | def __repr__(self): 112 | if self.base_type: 113 | return 'KnownType("%s", "%s")' % (self.type, self.base_type) 114 | else: 115 | return 'KnownType("%s")' % self.type 116 | 117 | def __eq__(self, other): 118 | return self.type == other.type and self.base_type == other.base_type 119 | 120 | 121 | TYPES = dict( 122 | float=KnownType('float'), 123 | double=KnownType('double'), 124 | int=KnownType('int'), 125 | long=KnownType('long'), 126 | uint=KnownType('unsigned int'), 127 | ulong=KnownType('unsigned long'), 128 | 129 | floatp=KnownType('float*', 'float'), 130 | doublep=KnownType('double*', 'double'), 131 | intp=KnownType('int*', 'int'), 132 | longp=KnownType('long*', 'long'), 133 | uintp=KnownType('unsigned int*', 'unsigned int'), 134 | ulongp=KnownType('unsigned long*', 'unsigned long'), 135 | 136 | gfloatp=KnownType('GLOBAL_MEM float*', 'float'), 137 | gdoublep=KnownType('GLOBAL_MEM double*', 'double'), 138 | gintp=KnownType('GLOBAL_MEM int*', 'int'), 139 | glongp=KnownType('GLOBAL_MEM long*', 'long'), 140 | guintp=KnownType('GLOBAL_MEM unsigned int*', 'unsigned int'), 141 | gulongp=KnownType('GLOBAL_MEM unsigned long*', 'unsigned long'), 142 | 143 | lfloatp=KnownType('LOCAL_MEM float*', 'float'), 144 | ldoublep=KnownType('LOCAL_MEM double*', 'double'), 145 | lintp=KnownType('LOCAL_MEM int*', 'int'), 146 | llongp=KnownType('LOCAL_MEM long*', 'long'), 147 | luintp=KnownType('LOCAL_MEM unsigned int*', 'unsigned int'), 148 | lulongp=KnownType('LOCAL_MEM unsigned long*', 'unsigned long'), 149 | ) 150 | 151 | 152 | def _inject_types_in_module(): 153 | g = globals() 154 | for name, type in TYPES.items(): 155 | if name in ['int', 'long', 'float']: 156 | name = name + '_' 157 | g[name] = type 158 | 159 | 160 | # A convenience so users can import types directly from the module. 161 | _inject_types_in_module() 162 | 163 | NP_C_TYPE_MAP = { 164 | np.dtype(bool): 'char', 165 | np.dtype(np.float32): 'float', np.dtype(np.float64): 'double', 166 | np.dtype(np.int8): 'char', np.dtype(np.uint8): 'unsigned char', 167 | np.dtype(np.int16): 'short', np.dtype(np.uint16): 'unsigned short', 168 | np.dtype(np.int32): 'int', np.dtype(np.uint32): 'unsigned int', 169 | np.dtype(np.int64): 'long', np.dtype(np.uint64): 'unsigned long' 170 | } 171 | 172 | C_NP_TYPE_MAP = { 173 | 'bool': bool, 174 | 'char': np.int8, 175 | 'double': np.float64, 176 | 'float': np.float32, 177 | 'int': np.int32, 178 | 'long': np.int64, 179 | 'short': np.int16, 180 | 'unsigned char': np.uint8, 181 | 'unsigned int': np.uint32, 182 | 'unsigned long': np.uint64, 183 | 'unsigned short': np.uint16 184 | } 185 | 186 | if sys.platform.startswith('win') or BITS.startswith('32bit'): 187 | NP_C_TYPE_MAP[np.dtype(np.int64)] = 'long long' 188 | NP_C_TYPE_MAP[np.dtype(np.uint64)] = 'unsigned long long' 189 | C_NP_TYPE_MAP['long long'] = np.int64 190 | C_NP_TYPE_MAP['unsigned long long'] = np.uint64 191 | TYPES['long long'] = KnownType('long long') 192 | TYPES['glonglongp'] = KnownType('GLOBAL_MEM long long*', 'long long') 193 | TYPES['gulonglongp'] = KnownType('GLOBAL_MEM unsigned long long*', 194 | 'unsigned long long') 195 | TYPES['llonglongp'] = KnownType('LOCAL_MEM long long*', 'long long') 196 | TYPES['lulonglongp'] = KnownType('LOCAL_MEM unsigned long long*', 197 | 'unsigned long long') 198 | 199 | 200 | NP_TYPE_LIST = list(C_NP_TYPE_MAP.values()) 201 | 202 | 203 | def dtype_to_ctype(dtype, backend=None): 204 | if backend in ('opencl', 'cuda'): 205 | try: 206 | from pyopencl.compyte.dtypes import \ 207 | dtype_to_ctype as d2c_opencl 208 | return d2c_opencl(dtype) 209 | except (ValueError, ImportError): 210 | pass 211 | dtype = np.dtype(dtype) 212 | return NP_C_TYPE_MAP[dtype] 213 | 214 | 215 | def ctype_to_dtype(ctype): 216 | return np.dtype(C_NP_TYPE_MAP[ctype]) 217 | 218 | 219 | def knowntype_to_ctype(knowntype): 220 | knowntype_obj = TYPES.get(knowntype, None) 221 | if knowntype_obj: 222 | return knowntype_obj.type 223 | else: 224 | raise ValueError("Not a vaild known type") 225 | 226 | 227 | def dtype_to_knowntype(dtype, address='scalar', backend=None): 228 | ctype = dtype_to_ctype(dtype, backend=backend) 229 | if 'unsigned' in ctype: 230 | ctype = 'u%s' % ctype.replace('unsigned ', '') 231 | knowntype = ctype.replace(' ', '') 232 | if address == 'ptr': 233 | knowntype = '%sp' % knowntype 234 | elif address == 'global': 235 | knowntype = 'g%sp' % knowntype 236 | elif address == 'local': 237 | knowntype = 'l%sp' % knowntype 238 | elif address != 'scalar': 239 | raise ValueError("address can only be scalar," 240 | " ptr, global or local") 241 | 242 | return knowntype 243 | if knowntype in TYPES: 244 | return knowntype 245 | else: 246 | raise TypeError("Not a vaild KnownType") 247 | 248 | 249 | def annotate(func=None, **kw): 250 | """A decorator to specify the types of a function. These types are injected 251 | into the functions, `__annotations__` attribute. 252 | 253 | An example describes this best: 254 | 255 | @annotate(i='int', x='floatp', return_='float') 256 | def f(i, x): 257 | return x[i]*2.0 258 | 259 | One could also do: 260 | 261 | @annotate(i='int', floatp='x, y', return_='float') 262 | def f(i, x, y): 263 | return x[i]*y[i] 264 | 265 | """ 266 | data = {} 267 | 268 | if not kw: 269 | def wrapper(func): 270 | func.is_jit = True 271 | return func 272 | else: 273 | data = kwtype_to_annotation(kw) 274 | 275 | def wrapper(func): 276 | # For jitted functions, we should retain 277 | # the is_jit attribute when we annotate the function. 278 | func.is_jit = getattr(func, 'is_jit', False) 279 | try: 280 | func.__annotations__ = data 281 | except AttributeError: 282 | func.im_func.__annotations__ = data 283 | return func 284 | 285 | if func is None: 286 | return wrapper 287 | else: 288 | return wrapper(func) 289 | 290 | 291 | def _clean_name(name): 292 | return 'return' if name == 'return_' else name 293 | 294 | 295 | def _get_type(type): 296 | if isinstance(type, KnownType): 297 | return type 298 | elif type in TYPES: 299 | return TYPES[type] 300 | else: 301 | msg = ('Unknown type {type}, not a KnownType and not one of ' 302 | 'the pre-declared types.'.format(type=str(type))) 303 | raise TypeError(msg) 304 | 305 | 306 | def kwtype_to_annotation(kw): 307 | """Convert type to a KnownType""" 308 | data = {} 309 | 310 | for name, type in kw.items(): 311 | if isinstance(type, str) and ',' in type: 312 | for x in type.split(','): 313 | data[_clean_name(x.strip())] = _get_type(name) 314 | else: 315 | data[_clean_name(name)] = _get_type(type) 316 | 317 | return data 318 | -------------------------------------------------------------------------------- /compyle/utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import argparse 3 | import atexit 4 | from compyle.config import get_config 5 | from compyle.profile import print_profile 6 | 7 | 8 | def getsourcelines(obj): 9 | '''Given an object return the source code that defines it as a list of 10 | lines along with the starting line. 11 | ''' 12 | try: 13 | return inspect.getsourcelines(obj) 14 | except Exception: 15 | if hasattr(obj, 'source'): 16 | return obj.source.splitlines(True), 0 17 | else: 18 | raise 19 | 20 | 21 | def getsource(obj): 22 | '''Given an object return the source that defines it. 23 | ''' 24 | try: 25 | return inspect.getsource(obj) 26 | except Exception: 27 | if hasattr(obj, 'source'): 28 | return obj.source 29 | else: 30 | raise 31 | 32 | 33 | class ArgumentParser(argparse.ArgumentParser): 34 | '''Standard argument parser for compyle applications. 35 | Includes arguments for backend, openmp and use_double 36 | ''' 37 | 38 | def __init__(self, *args, **kwargs): 39 | super().__init__(*args, **kwargs) 40 | # setup standard arguments 41 | self.add_argument( 42 | '-b', '--backend', action='store', dest='backend', default='cython', 43 | choices = ['cython', 'opencl', 'cuda'], 44 | help='Choose the backend.' 45 | ) 46 | self.add_argument( 47 | '--openmp', action='store_true', dest='openmp', default=False, 48 | help='Use OpenMP.' 49 | ) 50 | self.add_argument( 51 | '--use-double', action='store_true', dest='use_double', 52 | default=False, help='Use double precision on the GPU.' 53 | ) 54 | self.add_argument( 55 | '--suppress-warnings', action='store_true', 56 | dest='suppress_warnings', 57 | default=False, help='Suppress warnings' 58 | ) 59 | self.add_argument( 60 | '--profile', action='store_true', 61 | dest='profile', 62 | default=False, help='Print profiling info' 63 | ) 64 | self.profile_registered = False 65 | 66 | def _set_config_options(self, options): 67 | get_config().use_openmp = options.openmp 68 | get_config().use_double = options.use_double 69 | get_config().suppress_warnings = options.suppress_warnings 70 | if options.backend == 'opencl': 71 | get_config().use_opencl = True 72 | if options.backend == 'cuda': 73 | get_config().use_cuda = True 74 | if options.profile and not self.profile_registered: 75 | get_config().profile = True 76 | atexit.register(print_profile) 77 | self.profile_registered = True 78 | 79 | def parse_args(self, *args, **kwargs): 80 | options = super().parse_args(*args, **kwargs) 81 | self._set_config_options(options) 82 | return options 83 | 84 | def parse_known_args(self, *args, **kwargs): 85 | options, unknown = super().parse_known_args(*args, **kwargs) 86 | self._set_config_options(options) 87 | return options, unknown 88 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = ComPyle 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-rtd-theme -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Compyle documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Dec 2 14:26:18 2018. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | from os.path import join 22 | # import sys 23 | # sys.path.insert(0, os.path.abspath('.')) 24 | 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = ['sphinx.ext.autodoc', 36 | 'sphinx.ext.mathjax', 37 | 'sphinx.ext.viewcode'] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix(es) of source filenames. 43 | # You can specify multiple suffix as a list of string: 44 | # 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = '.rst' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = 'Compyle' 53 | copyright = '2018-2021, PySPH Developers' 54 | author = 'PySPH Developers' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The version info for the project you're documenting, acts as replacement for 61 | # |version| and |release|, also used in various other places throughout the 62 | # built documents. 63 | # 64 | _d = {} 65 | fname = join(os.pardir, os.pardir, 'compyle', '__init__.py') 66 | exec(compile(open(fname).read(), fname, 'exec'), _d) 67 | version = release = _d['__version__'] 68 | 69 | # The language for content autogenerated by Sphinx. Refer to documentation 70 | # for a list of supported languages. 71 | # 72 | # This is also used if you do content translation via gettext catalogs. 73 | # Usually you set "language" from the command line for these cases. 74 | language = 'en' 75 | 76 | # List of patterns, relative to source directory, that match files and 77 | # directories to ignore when looking for source files. 78 | # This patterns also effect to html_static_path and html_extra_path 79 | exclude_patterns = [] 80 | 81 | # The name of the Pygments (syntax highlighting) style to use. 82 | pygments_style = 'sphinx' 83 | 84 | # If true, `todo` and `todoList` produce output, else they produce nothing. 85 | todo_include_todos = False 86 | 87 | 88 | # -- Options for HTML output ---------------------------------------------- 89 | 90 | # The theme to use for HTML and HTML Help pages. See the documentation for 91 | # a list of builtin themes. 92 | # 93 | html_theme = 'sphinx_rtd_theme' 94 | 95 | # Theme options are theme-specific and customize the look and feel of a theme 96 | # further. For a list of options available for each theme, see the 97 | # documentation. 98 | # 99 | # html_theme_options = {} 100 | 101 | # Add any paths that contain custom static files (such as style sheets) here, 102 | # relative to this directory. They are copied after the builtin static files, 103 | # so a file named "default.css" will overwrite the builtin "default.css". 104 | html_static_path = ['_static'] 105 | 106 | 107 | # -- Options for HTMLHelp output ------------------------------------------ 108 | 109 | # Output file base name for HTML help builder. 110 | htmlhelp_basename = 'Compyledoc' 111 | 112 | 113 | # -- Options for LaTeX output --------------------------------------------- 114 | 115 | latex_elements = { 116 | # The paper size ('letterpaper' or 'a4paper'). 117 | # 118 | # 'papersize': 'letterpaper', 119 | 120 | # The font size ('10pt', '11pt' or '12pt'). 121 | # 122 | # 'pointsize': '10pt', 123 | 124 | # Additional stuff for the LaTeX preamble. 125 | # 126 | # 'preamble': '', 127 | 128 | # Latex figure (float) alignment 129 | # 130 | # 'figure_align': 'htbp', 131 | } 132 | 133 | # Grouping the document tree into LaTeX files. List of tuples 134 | # (source start file, target name, title, 135 | # author, documentclass [howto, manual, or own class]). 136 | latex_documents = [ 137 | (master_doc, 'Compyle.tex', 'Compyle Documentation', 138 | 'PySPH Developers', 'manual'), 139 | ] 140 | 141 | 142 | # -- Options for manual page output --------------------------------------- 143 | 144 | # One entry per manual page. List of tuples 145 | # (source start file, name, description, authors, manual section). 146 | man_pages = [ 147 | (master_doc, 'compyle', 'Compyle Documentation', 148 | [author], 1) 149 | ] 150 | 151 | 152 | # -- Options for Texinfo output ------------------------------------------- 153 | 154 | # Grouping the document tree into Texinfo files. List of tuples 155 | # (source start file, target name, title, author, 156 | # dir menu entry, description, category) 157 | texinfo_documents = [ 158 | (master_doc, 'Compyle', 'Compyle Documentation', 159 | author, 'Compyle', 'One line description of project.', 160 | 'Miscellaneous'), 161 | ] 162 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Compyle documentation master file, created by 2 | sphinx-quickstart on Sun Dec 2 14:26:18 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Compyle's documentation! 7 | =================================== 8 | 9 | Compyle allows users to execute a restricted subset of Python (almost similar 10 | to C) on a variety of HPC platforms. Currently we support multi-core CPU 11 | execution using Cython, and support GPU devices using OpenCL and CUDA. 12 | 13 | You can try Compyle online on a `Google Colab notebook `_. 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | :caption: Contents: 18 | 19 | overview.rst 20 | installation.rst 21 | details.rst 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`modindex` 29 | * :ref:`search` 30 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============== 3 | 4 | ComPyle is itself pure Python but depends on numpy_ and requires either Cython_ 5 | or PyOpenCL_ or PyCUDA_ along with the respective backends of a C/C++ compiler, 6 | OpenCL and CUDA. If you are only going to execute code on a CPU then all you 7 | need is Cython_. The full list of requirements is shown in the 8 | ``requirements.txt`` file on the repository. 9 | 10 | You should be able to install ComPyle by doing:: 11 | 12 | $ pip install compyle 13 | 14 | 15 | Note that when executing code on a CPU, you will need to have a C/C++ compiler 16 | that is compatible with your Python installation. In addition, if you need to 17 | use OpenMP you will need to make sure your compiler is compatible with that. 18 | Some additional information on this is included below. 19 | 20 | Installing the bleeding edge 21 | ---------------------------- 22 | 23 | Note that if you want the latest bleeding edge of compyle, clone the 24 | repository and install compyle like so:: 25 | 26 | $ git clone https://github.com/pypr/compyle 27 | $ cd compyle 28 | $ python setup.py develop 29 | # Or 30 | $ pip install -e . 31 | 32 | If you just want the latest version and do not want to clone the repository, 33 | you can also do:: 34 | 35 | $ pip install https://github.com/pypr/compyle/zipball/main 36 | 37 | 38 | .. _PyOpenCL: https://documen.tician.de/pyopencl/ 39 | .. _OpenCL: https://www.khronos.org/opencl/ 40 | .. _Cython: http://www.cython.org 41 | .. _numpy: http://www.numpy.org 42 | .. _PyCUDA: https://documen.tician.de/pycuda 43 | .. _OpenMP: http://openmp.org/ 44 | .. _CuPy: https://cupy.chainer.org/ 45 | 46 | 47 | Setting up on GNU/Linux 48 | ------------------------- 49 | 50 | This is usually very simple, just installing the standard gcc/g++ packages ought 51 | to work. OpenMP_ is typically available but if it is not, it can be installed 52 | with (on apt-compatible systems):: 53 | 54 | $ sudo apt-get install libgomp1 55 | 56 | 57 | Installation with conda on MacOS 58 | --------------------------------- 59 | 60 | Recent conda_ packages make the process of setup very easy on MacOS assuming 61 | that you have the `XCode command line utilities`_ installed. Please make sure 62 | you install this. 63 | 64 | For example with conda-forge_ the following creates a new Python 3.8 65 | environment with compyle installed and working with both OpenMP and OpenCL:: 66 | 67 | $ conda create -c conda-forge -n py38 python=3.8 numpy pyopencl 68 | $ conda activate py38 # or a suitable such invocation 69 | $ pip install compyle 70 | 71 | Note that the above implicitly installs the ``llvm-openmp`` package in the 72 | environment which works out of the box with clang and provides OpenMP support. 73 | 74 | .. _conda: https://docs.conda.io/ 75 | .. _conda-forge: https://conda-forge.org/ 76 | .. _XCode command line utilities: http://stackoverflow.com/questions/12228382/after-install-xcode-where-is-clang 77 | 78 | 79 | Possible issues on MacOS 80 | -------------------------- 81 | 82 | Ensure that you have gcc or clang installed by installing XCode. See 83 | installing `XCode command line utilities`_ if you installed XCode but can't 84 | find clang or gcc. 85 | 86 | If you are getting strange errors of the form:: 87 | 88 | lang: warning: libstdc++ is deprecated; move to libc++ with a minimum deployment target of OS X 10.9 [-Wdeprecated] 89 | ld: library not found for -lstdc++ 90 | clang: error: linker command failed with exit code 1 (use -v to see invocation) 91 | 92 | Then try this (on a bash shell):: 93 | 94 | $ export MACOSX_DEPLOYMENT_TARGET=10.9 95 | 96 | And run your command again (replace the above with a suitable line on other 97 | shells). This is necessary because your Python was compiled with an older 98 | deployment target and the current version of XCode that you have installed is 99 | not compatible with that. By setting the environment variable you allow 100 | compyle to use a newer version. If this works, it is a good idea to set this 101 | in your default environment (``.bashrc`` for bash shells) so you do not have 102 | to do this every time. You may also do this in the compyle configuration file, 103 | see :ref:`config`. 104 | 105 | 106 | 107 | OpenMP on MacOS 108 | ~~~~~~~~~~~~~~~~ 109 | 110 | These instructions are a bit old and only if you are not using conda as 111 | discussed above. 112 | 113 | │The default clang compiler available on MacOS uses an LLVM backend and does 114 | not support OpenMP_ out of the box. There are two ways to support OpenMP. The 115 | first involves installing the OpenMP support for clang. This can be done with 116 | brew_ using:: 117 | 118 | $ brew install libomp 119 | 120 | `LLVM `_ can also be installed using 121 | brew_, likewise. Once this is done, you 122 | can use the following config_:: 123 | 124 | import os 125 | os.environ['CC'] = '/opt/homebrew/opt/llvm@15/bin/clang' 126 | os.environ['CXX'] = '/opt/homebrew/opt/llvm@15/bin/clang++' 127 | OMP_CFLAGS=['-I/opt/homebrew/opt/libomp/include', '-I/opt/homebrew/opt/llvm@15/include', '-Xclang', '-fopenmp'] 128 | OMP_LINK=['-L/opt/homebrew/opt/libomp/lib', '-L/opt/homebrew/opt/llvm@15/lib', '-lomp'] 129 | 130 | The above config assumes that you have installed `llvm@15`. You can change the 131 | config according to the version at the time of installing. Once that is done, 132 | it should "just work". If you get strange errors, you can also try 133 | setting the ``MACOSX_DEPLOYMENT_TARGET`` as shown in the previous section. 134 | 135 | Another option is to install GCC for MacOS available on brew_ using :: 136 | 137 | $ brew install gcc 138 | 139 | Once this is done, you need to use this as your default compiler. The ``gcc`` 140 | formula on brew currently ships with gcc version 9. Therefore, you can 141 | tell Python to use the GCC installed by brew by setting:: 142 | 143 | $ export CC=gcc-9 144 | $ export CXX=g++-9 145 | 146 | Note that you still do need to have the command-line-tools for XCode 147 | installed, otherwise the important header files are not available. See 148 | `how-to-install-xcode-command-line-tools 149 | `_ 150 | for more details. You may also want to set these environment variables in your 151 | ``.bashrc`` so you don't have to do this every time. You may also do this in 152 | the compyle configuration file, see :ref:`config`. 153 | 154 | Once you do this, compyle will automatically use this version of GCC and will 155 | also work with OpenMP. Note that on some preliminary benchmarks, GCC's OpenMP 156 | implementation seems about 10% or so faster than the LLVM version. Your 157 | mileage may vary. 158 | 159 | .. _brew: http://brew.sh/ 160 | 161 | 162 | Setting up on Windows 163 | ---------------------- 164 | 165 | Windows will work but you need to make sure you have the right compiler 166 | installed. See this page for the details of what you need installed. 167 | 168 | https://wiki.python.org/moin/WindowsCompilers 169 | 170 | OpenMP will work if you have this installed. For recent Python versions 171 | (>=3.5), install the `Microsoft Build Tools for Visual Studio 2019 172 | `_ 173 | 174 | 175 | Setting up OpenCL/CUDA 176 | ----------------------- 177 | 178 | This is too involved a topic to discuss here, instead look at the appropriate 179 | documentation for PyOpenCL_ and PyCUDA_. Once those packages work correctly, 180 | you should be all set. Note that if you are only using OpenCL/CUDA you do not 181 | need to have Cython or a C/C++ compiler. Some features on CUDA require the use 182 | of the CuPy_ library. 183 | 184 | If you want to use OpenCL support, you will need to install the ``pyopencl`` 185 | package (``conda install -c conda-forge pyopencl`` or ``pip install 186 | pyopencl``). For CUDA Support, you will need to install ``pycuda`` and 187 | ``cupy``. Of course this assumes you have the required hardware for this. 188 | 189 | 190 | .. _config: 191 | 192 | Using the configuration file 193 | ----------------------------- 194 | 195 | Instead of setting environment variables and build options on the shell you 196 | can have them setup using a simple configuration file. 197 | 198 | The file is located in ``~/.compyle/config.py``. Here ``~`` is your home 199 | directory which on Linux is ``/home/username``, on MacOS ``/Users/username`` 200 | and on Windows the location is likely ``\Users\username``. This file is 201 | executed and certain options may be set there. 202 | 203 | For example if you wish to set the environment variables ``CC`` and ``CXX`` 204 | you could do this in the ``config.py``:: 205 | 206 | import os 207 | 208 | os.environ['CC'] = 'gcc-9' 209 | os.environ['CXX'] = 'g++-9' 210 | 211 | If you are using an atypical compiler like icc, Cray, or PGI, you can set 212 | these up here too. You may also setup custom OpenMP related flags. For 213 | example, on a Cray system you may do the following:: 214 | 215 | OMP_CFLAGS = ['-homp'] 216 | OMP_LINK = ['-homp'] 217 | 218 | The ``OMP_CFLAGS`` and ``OMP_LINK`` parameters should be lists. Other packages 219 | like pyzoltan or pysph may also use this file for customizations. 220 | -------------------------------------------------------------------------------- /docs/source/overview.rst: -------------------------------------------------------------------------------- 1 | An overview 2 | ============== 3 | 4 | Compyle allows users to execute a restricted subset of Python (almost similar 5 | to C) on a variety of HPC platforms. Currently we support multi-core execution 6 | using Cython, and OpenCL and CUDA for GPU devices. 7 | 8 | An introduction to compyle in the context of writing a molecular dynamics 9 | simulator is available in our `SciPy 2020 paper`_. You may also `try Compyle`_ 10 | online on a Google Colab notebook if you wish. 11 | 12 | Users start with code implemented in a very restricted Python syntax, this 13 | code is then automatically transpiled, compiled and executed to run on either 14 | one CPU core, or multiple CPU cores or on a GPU. Compyle offers source-to-source 15 | transpilation, making it a very convenient tool for writing HPC libraries. 16 | 17 | Compyle is not a magic bullet, 18 | 19 | - Do not expect that you may get a tremendous speedup. 20 | - Performance optimization can be hard and is platform specific. What works on 21 | the CPU may not work on the GPU and vice-versa. Compyle does not do anything 22 | to make this aspect easier. All the issues with memory bandwidth, cache, false 23 | sharing etc. still remain. Differences between memory architectures of CPUs 24 | and GPUs are not avoided at all -- you still have to deal with it. But you can 25 | do so from the comfort of one simple programming language, Python. 26 | - Compyle makes it easy to write everything in pure Python and generate the 27 | platform specific code from Python. It provides a low-level tool to make it 28 | easy for you to generate whatever appropriate code. 29 | - The restrictions Compyle imposes make it easy for you to think about your 30 | algorithms in that context and thereby allow you to build functionality that 31 | exploits the hardware as you see fit. 32 | - Compyle hides the details of the backend to the extent possible. You can write 33 | your code in Python, you can reuse your functions and decompose your problem 34 | to maximize reuse. Traditionally you would end up implementing some code in C, 35 | some in Python, some in OpenCL/CUDA, some in string fragments that you put 36 | together. Then you'd have to manage each of the runtimes yourself, worry about 37 | compilation etc. Compyle minimizes that pain. 38 | - By being written in Python, we make it easy to assemble these building blocks 39 | together to do fairly sophisticated things relatively easily from the same 40 | language. 41 | - Compyle is fairly simple and does source translation making it generally 42 | easier to understand and debug. The core code-base is less than 7k lines of 43 | code. 44 | - Compyle has relatively simple dependencies, for CPU support it requires 45 | Cython_ and a C-compiler which supports OpenMP_. On the GPU you need either 46 | PyOpenCL_ or PyCUDA_. In addition it depends on NumPy_ and Mako_. 47 | 48 | 49 | .. _Cython: http://www.cython.org 50 | .. _OpenMP: http://openmp.org/ 51 | .. _PyOpenCL: https://documen.tician.de/pyopencl/ 52 | .. _PyCUDA: https://documen.tician.de/pycuda/ 53 | .. _OpenCL: https://www.khronos.org/opencl/ 54 | .. _NumPy: http://numpy.scipy.org 55 | .. _Mako: https://pypi.python.org/pypi/Mako 56 | .. _SciPy 2020 paper: http://conference.scipy.org/proceedings/scipy2020/compyle_pr_ab.html 57 | .. _try Compyle: https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing 58 | 59 | While Compyle is simple and modest, it is quite powerful and convenient. In 60 | fact, Compyle has its origins in PySPH_ which is a powerful Python package 61 | supporting SPH, molecular dynamics, and other particle-based algorithms. The 62 | basic elements of Compyle are used in PySPH_ to automatically generate HPC code 63 | from code written in pure Python and execute it on multiple cores, and on GPUs 64 | without the user having to change any of their code. Compyle generalizes this 65 | code generation to make it available as a general tool. 66 | 67 | .. _PySPH: http://pysph.readthedocs.io 68 | 69 | 70 | These are the restrictions on the Python language that Compyle poses: 71 | 72 | - Functions with a C-syntax. 73 | - Function arguments must be declared using either type annotation or with a 74 | decorator or with default arguments. 75 | - No Python data structures, i.e. no lists, tuples, or dictionaries. 76 | - Contiguous Numpy arrays are supported but must be one dimensional. 77 | - No memory allocation is allowed inside these functions. 78 | - On OpenCL no recursion is supported. 79 | - All function calls must not use dotted names, i.e. don't use ``math.sin``, 80 | instead just use ``sin``. This is because we do not perform any kind of name 81 | mangling of the generated code to make it easier to read. 82 | 83 | Basically think of it as good old FORTRAN. 84 | 85 | Technically we do support structs internally (we use it heavily in PySPH_) but 86 | this is not yet exposed at the high-level and is very likely to be supported 87 | in the future. 88 | 89 | 90 | Simple example 91 | -------------- 92 | 93 | Enough talk, lets look at some code. Here is a very simple example:: 94 | 95 | from compyle.api import Elementwise, annotate, wrap, get_config 96 | import numpy as np 97 | 98 | @annotate(i='int', x='doublep', y='doublep', double='a,b') 99 | def axpb(i, x, y, a, b): 100 | y[i] = a*sin(x[i]) + b 101 | 102 | x = np.linspace(0, 1, 10000) 103 | y = np.zeros_like(x) 104 | a = 2.0 105 | b = 3.0 106 | 107 | backend = 'cython' 108 | get_config().use_openmp = True 109 | x, y = wrap(x, y, backend=backend) 110 | e = Elementwise(axpb, backend=backend) 111 | e(x, y, a, b) 112 | 113 | This will execute the elementwise operation in parallel using OpenMP with 114 | Cython. The code is auto-generated, compiled and called for you transparently. 115 | The first time this runs, it will take a bit of time to compile everything but 116 | the next time, this is cached and will run much faster. 117 | 118 | If you just change the ``backend = 'opencl'``, the same exact code will be 119 | executed using PyOpenCL_ and if you change the backend to ``'cuda'``, it will 120 | execute via CUDA without any other changes to your code. This is obviously a 121 | very trivial example, there are more complex examples available as well. 122 | 123 | To see the source code that is automatically generated for the above 124 | elementwise operation example use:: 125 | 126 | e.source 127 | 128 | This will contain the sources that are generated based on the user code alone. 129 | To see all the sources created, use:: 130 | 131 | e.all_source 132 | 133 | A word of warning though that this can be fairly long especially on a GPU and 134 | for other kind of operations may actually include multiple GPU kernels. This 135 | is largely for reference and debugging. 136 | 137 | 138 | More examples 139 | -------------- 140 | 141 | More complex examples (but still fairly simple) are available in the `examples 142 | `_ directory. 143 | 144 | - `axpb.py `_: the 145 | above example but for openmp and opencl compared with serial showing that in 146 | some cases serial is actually faster than parallel! 147 | 148 | - `vm_elementwise.py 149 | `_: 150 | shows a simple N-body code with two-dimensional point vortices. The code uses 151 | a simple elementwise operation and works with OpenMP and OpenCL. 152 | 153 | - `vm_numba.py 154 | `_: shows 155 | the same code written in numba for comparison. In our benchmarks, Compyle is 156 | actually faster even in serial and in parallel it can be much faster when you 157 | use all cores. 158 | 159 | - `vm_kernel.py 160 | `_: shows 161 | how one can write a low-level OpenCL kernel in pure Python and use that. This 162 | also shows how you can allocate and use local (or shared) memory which is 163 | often very important for performance on GPGPUs. This code will only run via 164 | PyOpenCL. 165 | 166 | - `bench_vm.py 167 | `_: 168 | Benchmarks the various vortex method results above for a comparison with 169 | numba. 170 | 171 | 172 | Read on for more details about Compyle. 173 | 174 | 175 | Citing Compyle 176 | --------------- 177 | 178 | If you find Compyle useful or just want to read a paper on it, please see: 179 | 180 | - Aditya Bhosale and Prabhu Ramachandran, "Compyle: Python once, parallel 181 | computing anywhere", Proceedings of the 19th Python in Science Conference 182 | (SciPy 2020), July, 2020, Austin, Texas, USA. 183 | `doi:10.25080/Majora-342d178e-005 184 | `_ **Won best poster** `SciPy 185 | 2020 Paper`_. 186 | 187 | Accompanying the paper is the 188 | 189 | - `Compyle poster presentation `_ 190 | - and the `Compyle poster video `_ 191 | -------------------------------------------------------------------------------- /examples/axpb.py: -------------------------------------------------------------------------------- 1 | from compyle.api import Elementwise, annotate, wrap, get_config 2 | import numpy as np 3 | from numpy import sin 4 | import time 5 | 6 | 7 | @annotate(i='int', doublep='x, y, a, b') 8 | def axpb(i, x, y, a, b): 9 | y[i] = a[i]*sin(x[i]) + b[i] 10 | 11 | 12 | def setup(backend, openmp=False): 13 | get_config().use_openmp = openmp 14 | e = Elementwise(axpb, backend=backend) 15 | return e 16 | 17 | 18 | def data(n, backend): 19 | x = np.linspace(0, 1, n) 20 | y = np.zeros_like(x) 21 | a = x*x 22 | b = np.sqrt(x + 1) 23 | return wrap(x, y, a, b, backend=backend) 24 | 25 | 26 | def compare(m=20): 27 | N = 2**np.arange(1, 25) 28 | backends = [['cython', False], ['cython', True]] 29 | try: 30 | import pyopencl 31 | backends.append(['opencl', False]) 32 | except ImportError as e: 33 | pass 34 | 35 | try: 36 | import pycuda 37 | backends.append(['cuda', False]) 38 | except ImportError as e: 39 | pass 40 | 41 | timing = [] 42 | for backend in backends: 43 | e = setup(*backend) 44 | times = [] 45 | for n in N: 46 | args = data(n, backend[0]) 47 | t = [] 48 | for j in range(m): 49 | start = time.time() 50 | e(*args) 51 | secs = time.time() - start 52 | t.append(secs) 53 | times.append(np.average(t)) 54 | timing.append(times) 55 | 56 | return N, backends, np.array(timing) 57 | 58 | 59 | def plot_timing(n, timing, backends): 60 | from matplotlib import pyplot as plt 61 | backends[1][0] = 'openmp' 62 | for t, backend in zip(timing[1:], backends[1:]): 63 | plt.semilogx(n, timing[0]/t, label='serial/' + backend[0], marker='+') 64 | plt.grid() 65 | plt.xlabel('N') 66 | plt.ylabel('Speedup') 67 | plt.legend() 68 | plt.show() 69 | 70 | 71 | if __name__ == '__main__': 72 | n, backends, times = compare() 73 | plot_timing(n, times, backends) 74 | -------------------------------------------------------------------------------- /examples/axpb_jit.py: -------------------------------------------------------------------------------- 1 | """Shows the use of annotate without any type information. 2 | The type information is extracted from the arguments passed 3 | and the function is annotated and compiled at runtime. 4 | """ 5 | 6 | from compyle.api import annotate, Elementwise, wrap, get_config, declare 7 | import numpy as np 8 | from numpy import sin 9 | 10 | 11 | @annotate 12 | def axpb(i, x, y, a, b): 13 | xi = x[i] 14 | y[i] = a * sin(xi) + b 15 | 16 | 17 | x = np.linspace(0, 1, 10000) 18 | y = np.zeros_like(x) 19 | a = 2.0 20 | b = 3.0 21 | 22 | backend = 'opencl' 23 | get_config().use_openmp = True 24 | x, y = wrap(x, y, backend=backend) 25 | e = Elementwise(axpb, backend=backend) 26 | e(x, y, a, b) 27 | -------------------------------------------------------------------------------- /examples/bench_vm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | 4 | from compyle.config import get_config 5 | import vm_numba as VN 6 | import vm_elementwise as VE 7 | import vm_kernel as VK 8 | 9 | 10 | def setup(mod, backend, openmp): 11 | get_config().use_openmp = openmp 12 | if mod == VE: 13 | e = VE.Elementwise(VE.velocity, backend) 14 | elif mod == VN: 15 | e = VN.velocity 16 | elif mod == VK: 17 | e = VK.Kernel(VK.velocity, backend) 18 | 19 | return e 20 | 21 | 22 | def data(n, mod, backend): 23 | if mod == VN: 24 | args = mod.make_vortices(n) 25 | else: 26 | args = mod.make_vortices(n, backend) 27 | return args 28 | 29 | 30 | def compare(m=5): 31 | # Warm up the jit to prevent the timing from going off for the first point. 32 | VN.velocity(*VN.make_vortices(100)) 33 | N = np.array([10, 50, 100, 200, 500, 1000, 2000, 4000, 6000, 34 | 8000, 10000, 15000, 20000]) 35 | backends = [(VN, '', False), (VE, 'cython', False), (VE, 'cython', True), 36 | (VE, 'opencl', False), (VK, 'opencl', False)] 37 | timing = [] 38 | for backend in backends: 39 | e = setup(*backend) 40 | times = [] 41 | for n in N: 42 | args = data(n, backend[0], backend[1]) 43 | t = [] 44 | for j in range(m): 45 | start = time.time() 46 | e(*args) 47 | t.append(time.time() - start) 48 | times.append(np.min(t)) 49 | timing.append(times) 50 | 51 | return N, np.array(timing) 52 | 53 | 54 | def plot_timing(n, timing): 55 | from matplotlib import pyplot as plt 56 | plt.plot(n, timing[0]/timing[1], label='numba/cython', marker='+') 57 | plt.plot(n, timing[0]/timing[2], label='numba/openmp', marker='+') 58 | plt.plot(n, timing[0]/timing[3], label='numba/opencl', marker='+') 59 | plt.plot(n, timing[0]/timing[4], label='numba/opencl local', marker='+') 60 | plt.grid() 61 | plt.xlabel('N') 62 | plt.ylabel('Speedup') 63 | plt.legend() 64 | plt.figure() 65 | gflop = 12*n*n/1e9 66 | plt.plot(n, gflop/timing[0], label='numba', marker='+') 67 | plt.plot(n, gflop/timing[1], label='Cython', marker='+') 68 | plt.plot(n, gflop/timing[2], label='OpenMP', marker='+') 69 | plt.plot(n, gflop/timing[3], label='OpenCL', marker='+') 70 | plt.plot(n, gflop/timing[4], label='OpenCL Local', marker='+') 71 | plt.grid() 72 | plt.xlabel('N') 73 | plt.ylabel('GFLOPS') 74 | plt.legend() 75 | plt.show() 76 | best = timing[:, -1].min() 77 | print("Fastest time for n=", n[-1], best, "secs") 78 | 79 | 80 | if __name__ == '__main__': 81 | n, t = compare() 82 | plot_timing(n, t) 83 | -------------------------------------------------------------------------------- /examples/julia_set.py: -------------------------------------------------------------------------------- 1 | import time 2 | from math import cos, sin 3 | import numpy as np 4 | from compyle.api import annotate, elementwise, get_config, wrap 5 | 6 | 7 | @annotate 8 | def julia(i, z, xa, ya, t): 9 | c0 = 0.7885*cos(t) 10 | c1 = 0.7885*sin(t) 11 | x = xa[i] 12 | y = ya[i] 13 | iters = 0 14 | while (x*x + y*y) < 400 and iters < 50: 15 | xn = x*x - y*y + c0 16 | y = x*y*2.0 + c1 17 | x = xn 18 | iters += 1 19 | z[i] = 1.0 - iters*0.02 20 | 21 | 22 | def timer(x, y, z): 23 | s = time.perf_counter() 24 | n = 2000 25 | dt = 4*np.pi/n 26 | for i in range(n): 27 | julia(z, x, y, -dt*i) 28 | print("Took", time.perf_counter() - s, "seconds") 29 | 30 | 31 | def plot(x, y, z, nx, ny): 32 | from mayavi import mlab 33 | mlab.figure(size=(600, 600)) 34 | xmin, xmax = np.min(x.data), np.max(x.data) 35 | ymin, ymax = np.min(y.data), np.max(y.data) 36 | s = mlab.imshow(z.data.reshape((nx, ny)), 37 | extent=[xmin, xmax, ymin, ymax, 0, 0], 38 | colormap='jet') 39 | s.scene.z_plus_view() 40 | n = 2000 41 | dt = 4*np.pi/n 42 | for i in range(n): 43 | julia(z, x, y, -dt*i) 44 | z.pull() 45 | s.mlab_source.scalars = z.data.reshape((nx, ny)) 46 | if i % 3 == 0: 47 | mlab.process_ui_events() 48 | mlab.show() 49 | 50 | 51 | def save(x, y, z, gif_path='julia_set.gif'): 52 | import imageio as iio 53 | n = 250 54 | dt = 2*np.pi/n 55 | print(f"Writing {gif_path}") 56 | with iio.get_writer(gif_path, mode='I') as writer: 57 | for i in range(n): 58 | julia(z, x, y, -dt*i) 59 | z.pull() 60 | writer.append_data( 61 | (z.data.reshape((nx, ny))*255).astype(np.uint8) 62 | ) 63 | print(f"{i}/{n}", end='\r') 64 | print("Done. ") 65 | try: 66 | from pygifsicle import optimize 67 | optimize(gif_path) 68 | except ImportError: 69 | print("Install pygifsicle for an optimized GIF") 70 | 71 | 72 | if __name__ == '__main__': 73 | from compyle.utils import ArgumentParser 74 | p = ArgumentParser() 75 | p.add_argument('-n', action='store', type=int, dest='n', 76 | default=512, help='Number of grid points in y.') 77 | p.add_argument( 78 | '--show', action='store_true', dest='show', 79 | default=False, help='Show animation (requires mayavi)' 80 | ) 81 | p.add_argument( 82 | '--gif', action='store_true', 83 | default=False, help='Make a gif animation (requires imageio)' 84 | ) 85 | cfg = get_config() 86 | cfg.suppress_warnings = True 87 | o = p.parse_args() 88 | julia = elementwise(julia) 89 | ny = o.n 90 | nx = int(4*ny//3) 91 | x, y = np.mgrid[-2:2:nx*1j, -1.5:1.5:ny*1j] 92 | x, y = x.ravel(), y.ravel() 93 | z = np.zeros_like(x) 94 | x, y, z = wrap(x, y, z) 95 | 96 | timer(x, y, z) 97 | 98 | if o.show: 99 | plot(x, y, z, nx, ny) 100 | if o.gif: 101 | save(x, y, z) 102 | -------------------------------------------------------------------------------- /examples/laplace.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.config import get_config 6 | from compyle.api import declare, annotate 7 | from compyle.parallel import Elementwise 8 | from compyle.array import get_backend, wrap 9 | from compyle.low_level import cast 10 | 11 | import compyle.array as carr 12 | 13 | 14 | def bc(x, y): 15 | return np.sin(np.pi * (x + y)) 16 | 17 | 18 | @annotate 19 | def laplace_step(i, u, res, err, nx, ny, dx2, dy2, dnr_inv): 20 | xid = cast(i % nx, "int") 21 | yid = cast(i / nx, "int") 22 | 23 | if xid == 0 or xid == nx - 1 or yid == 0 or yid == ny - 1: 24 | return 25 | 26 | res[i] = ((u[i - 1] + u[i + 1]) * dx2 + 27 | (u[i - nx] + u[i + nx]) * dy2) * dnr_inv 28 | 29 | diff = res[i] - u[i] 30 | 31 | err[i] = diff * diff 32 | 33 | 34 | class Grid(object): 35 | def __init__(self, nx=10, ny=10, xmin=0., xmax=1., 36 | ymin=0., ymax=1., bc=lambda x: 0, backend=None): 37 | self.backend = get_backend(backend) 38 | self.xmin, self.xmax, self.ymin, self.ymax = xmin, xmax, ymin, ymax 39 | self.nx, self.ny = nx, ny 40 | self.dx = (xmax - xmin) / (nx - 1) 41 | self.dy = (ymax - ymin) / (ny - 1) 42 | self.x = np.arange(self.xmin, self.xmax + self.dx * 0.5, self.dx) 43 | self.y = np.arange(self.ymin, self.ymax + self.dy * 0.5, self.dy) 44 | self.bc = bc 45 | self.setup() 46 | 47 | def setup(self): 48 | u_host = np.zeros((self.nx, self.ny)).astype(np.float32) 49 | 50 | u_host[0, :] = self.bc(self.xmin, self.y) 51 | u_host[-1, :] = self.bc(self.xmax, self.y) 52 | u_host[:, 0] = self.bc(self.x, self.ymin) 53 | u_host[:, -1] = self.bc(self.x, self.ymax) 54 | 55 | self.u = wrap(u_host.flatten(), backend=self.backend) 56 | self.err = carr.zeros_like(self.u) 57 | 58 | def get(self): 59 | u_host = self.u.get() 60 | return np.resize(u_host, (self.nx, self.ny)) 61 | 62 | def compute_err(self): 63 | return np.sqrt(carr.dot(self.err, self.err)) 64 | 65 | def plot(self): 66 | import matplotlib.pyplot as plt 67 | plt.imshow(self.get()) 68 | plt.show() 69 | 70 | 71 | class LaplaceSolver(object): 72 | def __init__(self, grid, backend=None): 73 | self.grid = grid 74 | self.backend = get_backend(backend) 75 | self.step_method = Elementwise(laplace_step, backend=self.backend) 76 | self.res = self.grid.u.copy() 77 | 78 | def solve(self, max_iter=None, eps=1.0e-8): 79 | err = np.inf 80 | 81 | g = self.grid 82 | 83 | dx2 = g.dx ** 2 84 | dy2 = g.dy ** 2 85 | dnr_inv = 0.5 / (dx2 + dy2) 86 | 87 | count = 0 88 | 89 | while err > eps: 90 | if max_iter and count >= max_iter: 91 | return err, count 92 | self.step_method(g.u, self.res, g.err, g.nx, g.ny, 93 | dx2, dy2, dnr_inv) 94 | err = g.compute_err() 95 | 96 | tmp = g.u 97 | g.u = self.res 98 | self.res = tmp 99 | 100 | count += 1 101 | 102 | return err, count 103 | 104 | 105 | if __name__ == '__main__': 106 | from compyle.utils import ArgumentParser 107 | p = ArgumentParser() 108 | p.add_argument('--nx', action='store', type=int, dest='nx', 109 | default=100, help='Number of grid points in x.') 110 | p.add_argument('--ny', action='store', type=int, dest='ny', 111 | default=100, help='Number of grid points in y.') 112 | p.add_argument( 113 | '--show', action='store_true', dest='show', 114 | default=False, help='Show plot at the end of simulation' 115 | ) 116 | o = p.parse_args() 117 | 118 | grid = Grid(nx=o.nx, ny=o.ny, bc=bc, backend=o.backend) 119 | 120 | solver = LaplaceSolver(grid, backend=o.backend) 121 | 122 | start = time.time() 123 | err, count = solver.solve(eps=1e-6) 124 | end = time.time() 125 | 126 | print("Number of iterations = %s" % count) 127 | print("Time taken = %g secs" % (end - start)) 128 | 129 | if o.show: 130 | solver.grid.plot() 131 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/compare_results.py: -------------------------------------------------------------------------------- 1 | from hoomd_periodic import simulate 2 | from md_nnps_periodic import MDNNPSSolverPeriodic 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | def run_simulations(num_particles, tf, dt): 7 | # run hoomd simulation 8 | simulate(num_particles, dt, tf, log=True) 9 | 10 | # run compyle simulation 11 | solver = MDNNPSSolverPeriodic(num_particles) 12 | solver.solve(tf, dt, log_output=True) 13 | solver.write_log('compyle-output.log') 14 | 15 | 16 | def plot_props(hoomd_fname, comp_fname): 17 | data_hoomd = np.genfromtxt(fname=hoomd_fname, skip_header=True) 18 | data_compyle = np.genfromtxt(fname=comp_fname) 19 | 20 | 21 | plt.plot(data_hoomd[:,0], data_hoomd[:,1], label="HooMD") 22 | plt.plot(data_hoomd[:,0], data_compyle[:,1], label="Compyle") 23 | plt.xlabel("Timestep") 24 | plt.ylabel("Potential Energy") 25 | plt.legend() 26 | plt.savefig("hoomd_pe.png", dpi=300) 27 | 28 | plt.clf() 29 | 30 | plt.plot(data_hoomd[:,0], data_hoomd[:,2], label="HooMD") 31 | plt.plot(data_hoomd[:,0], data_compyle[:,2], label="Compyle") 32 | plt.xlabel("Timestep") 33 | plt.ylabel("Kinetic Energy") 34 | plt.legend() 35 | plt.savefig("hoomd_ke.png", dpi=300) 36 | 37 | 38 | if __name__ == '__main__': 39 | run_simulations(2000, 200, 0.02) 40 | plot_props('hoomd-output.log', 'compyle-output.log') 41 | 42 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/hoomd_periodic.py: -------------------------------------------------------------------------------- 1 | import hoomd 2 | import hoomd.md 3 | import numpy as np 4 | import time 5 | 6 | 7 | def setup_positions(num_particles, dx): 8 | ndim = np.ceil(num_particles ** (1 / 3.)) 9 | dim_length = ndim * dx 10 | 11 | xmax = 3 * (1 + round(dim_length * 1.5 / 3.)) 12 | ymax = 3 * (1 + round(dim_length * 1.5 / 3.)) 13 | zmax = 3 * (1 + round(dim_length * 1.5 / 3.)) 14 | 15 | print(dim_length, xmax) 16 | 17 | xmin_eff = (xmax - dim_length) / 2. 18 | xmax_eff = (xmax + dim_length) / 2. 19 | 20 | x, y, z = np.mgrid[xmin_eff:xmax_eff:dx, xmin_eff:xmax_eff:dx, 21 | xmin_eff:xmax_eff:dx] 22 | x = x.ravel().astype(np.float32)[:num_particles] 23 | y = y.ravel().astype(np.float32)[:num_particles] 24 | z = z.ravel().astype(np.float32)[:num_particles] 25 | return x, y, z, xmax 26 | 27 | 28 | def simulate(num_particles, dt, tf, profile=False, log=False): 29 | x, y, z, L = setup_positions(num_particles, 2.) 30 | positions = np.array((x, y, z)).T 31 | hoomd.context.initialize("") 32 | 33 | snapshot = hoomd.data.make_snapshot(N=len(positions), 34 | box=hoomd.data.boxdim( 35 | Lx=L, Ly=L, Lz=L), 36 | particle_types=['A'], 37 | ) 38 | # need to get automated positions... 39 | snapshot.particles.position[:] = positions - 0.5 * L 40 | 41 | snapshot.particles.typeid[:] = 0 42 | 43 | hoomd.init.read_snapshot(snapshot) 44 | 45 | nl = hoomd.md.nlist.cell(r_buff=0) 46 | lj = hoomd.md.pair.lj(r_cut=3.0, nlist=nl) 47 | lj.pair_coeff.set('A', 'A', epsilon=1.0, sigma=1.0) 48 | 49 | if log: 50 | hoomd.analyze.log(filename="hoomd-output.log", 51 | quantities=['potential_energy', 'kinetic_energy'], 52 | period=100, 53 | overwrite=True) 54 | 55 | # Create integrator and forces 56 | hoomd.md.integrate.mode_standard(dt=dt) 57 | hoomd.md.integrate.nve(group=hoomd.group.all()) 58 | 59 | nsteps = int(tf // dt) 60 | start = time.time() 61 | hoomd.run(nsteps, profile=profile) 62 | end = time.time() 63 | return end - start 64 | 65 | 66 | if __name__ == '__main__': 67 | import sys 68 | print(simulate(int(sys.argv[1]), 0.02, 200., profile=True, log=True)) 69 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/md_nnps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.config import get_config 6 | from compyle.api import declare, annotate 7 | from compyle.parallel import Elementwise, Reduction 8 | from compyle.array import get_backend, wrap 9 | import compyle.array as carr 10 | 11 | from nnps import NNPSCountingSort, NNPSRadixSort 12 | from md_simple import integrate_step1, integrate_step2, \ 13 | boundary_condition, MDSolverBase 14 | 15 | 16 | @annotate 17 | def calculate_force(i, x, y, z, fx, fy, fz, pe, nbr_starts, nbr_lengths, nbrs): 18 | start_idx = nbr_starts[i] 19 | length = nbr_lengths[i] 20 | for k in range(start_idx, start_idx + length): 21 | j = nbrs[k] 22 | if i == j: 23 | continue 24 | xij = x[i] - x[j] 25 | yij = y[i] - y[j] 26 | zij = z[i] - z[j] 27 | rij2 = xij * xij + yij * yij + zij * zij 28 | irij2 = 1.0 / rij2 29 | irij6 = irij2 * irij2 * irij2 30 | irij12 = irij6 * irij6 31 | pe[i] += (2 * (irij12 - irij6)) 32 | f_base = 24 * irij2 * (2 * irij12 - irij6) 33 | 34 | fx[i] += f_base * xij 35 | fy[i] += f_base * yij 36 | fz[i] += f_base * zij 37 | 38 | 39 | @annotate 40 | def step_method1(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax, 41 | ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths, 42 | nbrs): 43 | integrate_step1(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz) 44 | boundary_condition(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax, 45 | ymin, ymax, zmin, zmax) 46 | 47 | 48 | @annotate 49 | def step_method2(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax, 50 | ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths, 51 | nbrs): 52 | calculate_force(i, x, y, z, fx, fy, fz, pe, nbr_starts, nbr_lengths, nbrs) 53 | integrate_step2(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz) 54 | 55 | 56 | class MDNNPSSolver(MDSolverBase): 57 | def __init__(self, num_particles, x=None, y=None, z=None, 58 | vx=None, vy=None, vz=None, 59 | xmax=100., ymax=100., zmax=100., dx=2., init_T=0., 60 | backend=None, use_count_sort=False): 61 | super().__init__(num_particles, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz, 62 | xmax=xmax, ymax=ymax, zmax=zmax, dx=dx, init_T=init_T, 63 | backend=backend) 64 | self.nnps_algorithm = NNPSCountingSort \ 65 | if use_count_sort else NNPSRadixSort 66 | self.nnps = self.nnps_algorithm(self.x, self.y, self.z, 3., 0.01, 67 | self.xmax, self.ymax, self.zmax, 68 | backend=self.backend) 69 | self.init_forces = Elementwise(calculate_force, backend=self.backend) 70 | self.step1 = Elementwise(step_method1, backend=self.backend) 71 | self.step2 = Elementwise(step_method2, backend=self.backend) 72 | 73 | def solve(self, t, dt, log_output=False): 74 | num_steps = int(t // dt) 75 | self.nnps.build() 76 | self.nnps.get_neighbors() 77 | self.init_forces(self.x, self.y, self.z, self.fx, self.fy, self.fz, 78 | self.pe, self.nnps.nbr_starts, 79 | self.nnps.nbr_lengths, self.nnps.nbrs) 80 | for i in range(num_steps): 81 | self.step1(self.x, self.y, self.z, self.vx, self.vy, self.vz, 82 | self.fx, self.fy, self.fz, 83 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 84 | self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts, 85 | self.nnps.nbr_lengths, self.nnps.nbrs) 86 | self.nnps.build() 87 | self.nnps.get_neighbors() 88 | self.step2(self.x, self.y, self.z, self.vx, self.vy, self.vz, 89 | self.fx, self.fy, self.fz, 90 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 91 | self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts, 92 | self.nnps.nbr_lengths, self.nnps.nbrs) 93 | 94 | if i % 100 == 0: 95 | self.post_step(i, log_output=log_output) 96 | 97 | 98 | if __name__ == '__main__': 99 | from compyle.utils import ArgumentParser 100 | p = ArgumentParser() 101 | p.add_argument( 102 | '--use-count-sort', action='store_true', dest='use_count_sort', 103 | default=False, help='Use count sort instead of radix sort' 104 | ) 105 | p.add_argument( 106 | '--show', action='store_true', dest='show', 107 | default=False, help='Show plot' 108 | ) 109 | p.add_argument( 110 | '--log-output', action='store_true', dest='log_output', 111 | default=False, help='Log output' 112 | ) 113 | 114 | 115 | p.add_argument('-n', action='store', type=int, dest='n', 116 | default=100, help='Number of particles') 117 | 118 | p.add_argument('--tf', action='store', type=float, dest='t', 119 | default=40., help='Final time') 120 | 121 | p.add_argument('--dt', action='store', type=float, dest='dt', 122 | default=0.02, help='Time step') 123 | 124 | o = p.parse_args() 125 | 126 | solver = MDNNPSSolver( 127 | o.n, 128 | backend=o.backend, 129 | use_count_sort=o.use_count_sort) 130 | 131 | start = time.time() 132 | solver.solve(o.t, o.dt, log_output=o.log_output) 133 | end = time.time() 134 | print("Time taken for N = %i is %g secs" % (o.n, (end - start))) 135 | if o.log_output: 136 | solver.write_log('nnps_log.log') 137 | if o.show: 138 | solver.pull() 139 | solver.plot() 140 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/md_nnps_periodic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.config import get_config 6 | from compyle.api import declare, annotate 7 | from compyle.parallel import Elementwise, Reduction 8 | from compyle.array import get_backend, wrap 9 | from compyle.low_level import cast 10 | import compyle.array as carr 11 | 12 | from nnps import NNPSCountingSortPeriodic, NNPSRadixSortPeriodic 13 | from md_simple import integrate_step1, integrate_step2, MDSolverBase 14 | 15 | 16 | @annotate 17 | def calculate_force(i, x, y, z, xmax, ymax, zmax, fx, fy, fz, pe, 18 | nbr_starts, nbr_lengths, nbrs): 19 | start_idx = nbr_starts[i] 20 | length = nbr_lengths[i] 21 | halfx = 0.5 * xmax 22 | halfy = 0.5 * ymax 23 | halfz = 0.5 * zmax 24 | for k in range(start_idx, start_idx + length): 25 | j = nbrs[k] 26 | if i == j: 27 | continue 28 | xij = x[i] - x[j] 29 | yij = y[i] - y[j] 30 | zij = z[i] - z[j] 31 | signx = 1 if xij > 0 else -1 32 | signy = 1 if yij > 0 else -1 33 | signz = 1 if zij > 0 else -1 34 | xij = xij if abs(xij) < halfx else xij - signx * xmax 35 | yij = yij if abs(yij) < halfy else yij - signy * ymax 36 | zij = zij if abs(zij) < halfz else zij - signz * zmax 37 | rij2 = xij * xij + yij * yij + zij * zij 38 | irij2 = 1.0 / rij2 39 | irij6 = irij2 * irij2 * irij2 40 | irij12 = irij6 * irij6 41 | pe[i] += (2 * (irij12 - irij6)) 42 | f_base = 24 * irij2 * (2 * irij12 - irij6) 43 | 44 | fx[i] += f_base * xij 45 | fy[i] += f_base * yij 46 | fz[i] += f_base * zij 47 | 48 | 49 | @annotate 50 | def step_method1(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax, 51 | ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths, 52 | nbrs): 53 | integrate_step1(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz) 54 | boundary_condition(i, x, y, z, fx, fy, fz, pe, xmin, xmax, 55 | ymin, ymax, zmin, zmax) 56 | 57 | 58 | @annotate 59 | def step_method2(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax, 60 | ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths, 61 | nbrs): 62 | calculate_force(i, x, y, z, xmax, ymax, zmax, fx, fy, fz, pe, 63 | nbr_starts, nbr_lengths, nbrs) 64 | integrate_step2(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz) 65 | 66 | 67 | @annotate 68 | def boundary_condition(i, x, y, z, fx, fy, fz, pe, xmin, xmax, ymin, ymax, 69 | zmin, zmax): 70 | fx[i] = 0. 71 | fy[i] = 0. 72 | fz[i] = 0. 73 | pe[i] = 0. 74 | 75 | xwidth = xmax - xmin 76 | ywidth = ymax - ymin 77 | zwidth = zmax - zmin 78 | 79 | xoffset = cast(floor(x[i] / xmax), "int") 80 | yoffset = cast(floor(y[i] / ymax), "int") 81 | zoffset = cast(floor(z[i] / zmax), "int") 82 | 83 | x[i] -= xoffset * xwidth 84 | y[i] -= yoffset * ywidth 85 | z[i] -= zoffset * zwidth 86 | 87 | 88 | class MDNNPSSolverPeriodic(MDSolverBase): 89 | def __init__(self, num_particles, x=None, y=None, z=None, 90 | vx=None, vy=None, vz=None, 91 | xmax=100., ymax=100., zmax=100., dx=2., init_T=0., 92 | backend=None, use_count_sort=False): 93 | super().__init__(num_particles, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz, 94 | xmax=xmax, ymax=ymax, zmax=zmax, dx=dx, init_T=init_T, 95 | backend=backend) 96 | self.nnps_algorithm = NNPSCountingSortPeriodic \ 97 | if use_count_sort else NNPSRadixSortPeriodic 98 | self.nnps = self.nnps_algorithm(self.x, self.y, self.z, 3., 0.01, 99 | self.xmax, self.ymax, self.zmax, 100 | backend=self.backend) 101 | self.init_forces = Elementwise(calculate_force, backend=self.backend) 102 | self.step1 = Elementwise(step_method1, backend=self.backend) 103 | self.step2 = Elementwise(step_method2, backend=self.backend) 104 | 105 | def solve(self, t, dt, log_output=False): 106 | num_steps = int(t // dt) 107 | self.nnps.build() 108 | self.nnps.get_neighbors() 109 | self.init_forces(self.x, self.y, self.z, self.xmax, self.ymax, 110 | self.zmax, self.fx, self.fy, self.fz, 111 | self.pe, self.nnps.nbr_starts, 112 | self.nnps.nbr_lengths, self.nnps.nbrs) 113 | for i in range(num_steps): 114 | self.step1(self.x, self.y, self.z, self.vx, self.vy, self.vz, 115 | self.fx, self.fy, self.fz, 116 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 117 | self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts, 118 | self.nnps.nbr_lengths, self.nnps.nbrs) 119 | self.nnps.build() 120 | self.nnps.get_neighbors() 121 | self.step2(self.x, self.y, self.z, self.vx, self.vy, self.vz, 122 | self.fx, self.fy, self.fz, 123 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 124 | self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts, 125 | self.nnps.nbr_lengths, self.nnps.nbrs) 126 | 127 | if i % 100 == 0: 128 | self.post_step(i, log_output=log_output) 129 | 130 | 131 | if __name__ == '__main__': 132 | from compyle.utils import ArgumentParser 133 | p = ArgumentParser() 134 | p.add_argument( 135 | '--use-count-sort', action='store_true', dest='use_count_sort', 136 | default=False, help='Use count sort instead of radix sort' 137 | ) 138 | p.add_argument( 139 | '--show', action='store_true', dest='show', 140 | default=False, help='Show plot' 141 | ) 142 | p.add_argument( 143 | '--log-output', action='store_true', dest='log_output', 144 | default=False, help='Log output' 145 | ) 146 | 147 | p.add_argument('-n', action='store', type=int, dest='n', 148 | default=100, help='Number of particles') 149 | 150 | p.add_argument('--tf', action='store', type=float, dest='t', 151 | default=40., help='Final time') 152 | 153 | p.add_argument('--dt', action='store', type=float, dest='dt', 154 | default=0.02, help='Time step') 155 | 156 | o = p.parse_args() 157 | 158 | solver = MDNNPSSolverPeriodic( 159 | o.n, 160 | backend=o.backend, 161 | use_count_sort=o.use_count_sort) 162 | 163 | start = time.time() 164 | solver.solve(o.t, o.dt, o.log_output) 165 | end = time.time() 166 | print("Time taken for N = %i is %g secs" % (o.n, (end - start))) 167 | if o.log_output: 168 | solver.write_log('nnps_periodic.log') 169 | if o.show: 170 | solver.pull() 171 | solver.plot() 172 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/nnps_kernels.py: -------------------------------------------------------------------------------- 1 | from compyle.api import declare, annotate 2 | from compyle.parallel import serial 3 | from compyle.low_level import atomic_inc, cast 4 | from math import floor 5 | import numpy as np 6 | 7 | 8 | @annotate 9 | def find_cell_id(x, y, z, h, eps, c): 10 | c[0] = cast(floor((x + eps) / h), "int") 11 | c[1] = cast(floor((y + eps) / h), "int") 12 | c[2] = cast(floor((z + eps) / h), "int") 13 | 14 | 15 | @annotate 16 | def flatten(p, q, r, qmax, rmax): 17 | return (p * qmax + q) * rmax + r 18 | 19 | 20 | @serial 21 | @annotate 22 | def count_bins(i, x, y, z, h, eps, qmax, rmax, keys, bin_counts, 23 | sort_offsets): 24 | c = declare('matrix(3, "int")') 25 | find_cell_id(x[i], y[i], z[i], h, eps, c) 26 | key = flatten(c[0], c[1], c[2], qmax, rmax) 27 | keys[i] = key 28 | idx = atomic_inc(bin_counts[key]) 29 | sort_offsets[i] = idx 30 | 31 | 32 | @annotate 33 | def sort_indices(i, keys, sort_offsets, start_indices, sorted_indices): 34 | key = keys[i] 35 | offset = sort_offsets[i] 36 | start_idx = start_indices[key] 37 | sorted_indices[start_idx + offset] = i 38 | 39 | 40 | @annotate 41 | def input_start_indices(i, counts): 42 | return 0 if i == 0 else counts[i - 1] 43 | 44 | 45 | @annotate 46 | def output_start_indices(i, item, indices): 47 | indices[i] = item 48 | 49 | 50 | @annotate 51 | def fill_keys(i, x, y, z, h, eps, qmax, rmax, indices, keys): 52 | c = declare('matrix(3, "int")') 53 | find_cell_id(x[i], y[i], z[i], h, eps, c) 54 | key = flatten(c[0], c[1], c[2], qmax, rmax) 55 | keys[i] = key 56 | indices[i] = i 57 | 58 | 59 | @annotate 60 | def input_scan_keys(i, keys): 61 | return 1 if i == 0 or keys[i] != keys[i - 1] else 0 62 | 63 | 64 | @annotate 65 | def output_scan_keys(i, item, prev_item, keys, start_indices): 66 | key = keys[i] 67 | if item != prev_item: 68 | start_indices[key] = i 69 | 70 | 71 | @annotate 72 | def fill_bin_counts(i, keys, start_indices, bin_counts, num_particles): 73 | if i == num_particles - 1: 74 | last_key = keys[num_particles - 1] 75 | bin_counts[last_key] = num_particles - start_indices[last_key] 76 | if i == 0 or keys[i] == keys[i - 1]: 77 | return 78 | key = keys[i] 79 | prev_key = keys[i - 1] 80 | bin_counts[prev_key] = start_indices[key] - start_indices[prev_key] 81 | 82 | 83 | @annotate 84 | def find_neighbor_lengths_knl(i, x, y, z, h, eps, qmax, rmax, start_indices, 85 | sorted_indices, bin_counts, nbr_lengths, 86 | max_key): 87 | d = h * h 88 | q_c = declare('matrix(3, "int")') 89 | find_cell_id(x[i], y[i], z[i], h, eps, q_c) 90 | 91 | for p in range(-1, 2): 92 | for q in range(-1, 2): 93 | for r in range(-1, 2): 94 | cx = q_c[0] + p 95 | cy = q_c[1] + q 96 | cz = q_c[2] + r 97 | 98 | key = flatten(cx, cy, cz, qmax, rmax) 99 | 100 | if key >= max_key or key < 0: 101 | continue 102 | 103 | start_idx = start_indices[key] 104 | np = bin_counts[key] 105 | 106 | for k in range(np): 107 | j = sorted_indices[start_idx + k] 108 | xij = x[i] - x[j] 109 | yij = y[i] - y[j] 110 | zij = z[i] - z[j] 111 | rij2 = xij * xij + yij * yij + zij * zij 112 | 113 | if rij2 < d: 114 | nbr_lengths[i] += 1 115 | 116 | 117 | @annotate 118 | def find_neighbors_knl(i, x, y, z, h, eps, qmax, rmax, start_indices, sorted_indices, 119 | bin_counts, nbr_starts, nbrs, max_key): 120 | d = h * h 121 | q_c = declare('matrix(3, "int")') 122 | find_cell_id(x[i], y[i], z[i], h, eps, q_c) 123 | length = 0 124 | nbr_start_idx = nbr_starts[i] 125 | 126 | for p in range(-1, 2): 127 | for q in range(-1, 2): 128 | for r in range(-1, 2): 129 | cx = q_c[0] + p 130 | cy = q_c[1] + q 131 | cz = q_c[2] + r 132 | 133 | key = flatten(cx, cy, cz, qmax, rmax) 134 | 135 | if key >= max_key or key < 0: 136 | continue 137 | 138 | start_idx = start_indices[key] 139 | np = bin_counts[key] 140 | 141 | for k in range(np): 142 | j = sorted_indices[start_idx + k] 143 | xij = x[i] - x[j] 144 | yij = y[i] - y[j] 145 | zij = z[i] - z[j] 146 | rij2 = xij * xij + yij * yij + zij * zij 147 | 148 | if rij2 < d: 149 | nbrs[nbr_start_idx + length] = j 150 | length += 1 151 | 152 | 153 | @annotate 154 | def find_neighbor_lengths_periodic_knl(i, x, y, z, h, eps, xmax, ymax, zmax, 155 | pmax, qmax, rmax, start_indices, 156 | sorted_indices, bin_counts, nbr_lengths, 157 | max_key): 158 | d = h * h 159 | q_c = declare('matrix(3, "int")') 160 | xij, yij, zij = declare('double', 3) 161 | find_cell_id(x[i], y[i], z[i], h, eps, q_c) 162 | 163 | for p in range(-1, 2): 164 | for q in range(-1, 2): 165 | for r in range(-1, 2): 166 | cx = q_c[0] + p 167 | cy = q_c[1] + q 168 | cz = q_c[2] + r 169 | 170 | cx_f = cast(cx, "float") 171 | cy_f = cast(cy, "float") 172 | cz_f = cast(cz, "float") 173 | 174 | xoffset = cast(floor(cx_f / pmax), "int") 175 | yoffset = cast(floor(cy_f / qmax), "int") 176 | zoffset = cast(floor(cz_f / rmax), "int") 177 | 178 | cx -= xoffset * pmax 179 | cy -= yoffset * qmax 180 | cz -= zoffset * rmax 181 | 182 | key = flatten(cx, cy, cz, qmax, rmax) 183 | 184 | if key >= max_key or key < 0: 185 | continue 186 | 187 | start_idx = start_indices[key] 188 | np = bin_counts[key] 189 | 190 | for k in range(np): 191 | j = sorted_indices[start_idx + k] 192 | xij = abs(x[i] - x[j]) 193 | yij = abs(y[i] - y[j]) 194 | zij = abs(z[i] - z[j]) 195 | xij = min(xij, xmax - xij) 196 | yij = min(yij, ymax - yij) 197 | zij = min(zij, zmax - zij) 198 | rij2 = xij * xij + yij * yij + zij * zij 199 | 200 | if rij2 < d: 201 | nbr_lengths[i] += 1 202 | 203 | 204 | @annotate 205 | def find_neighbors_periodic_knl(i, x, y, z, h, eps, xmax, ymax, zmax, 206 | pmax, qmax, rmax, start_indices, sorted_indices, 207 | bin_counts, nbr_starts, nbrs, max_key): 208 | d = h * h 209 | q_c = declare('matrix(3, "int")') 210 | xij, yij, zij = declare('double', 3) 211 | find_cell_id(x[i], y[i], z[i], h, eps, q_c) 212 | length = 0 213 | nbr_start_idx = nbr_starts[i] 214 | 215 | for p in range(-1, 2): 216 | for q in range(-1, 2): 217 | for r in range(-1, 2): 218 | cx = q_c[0] + p 219 | cy = q_c[1] + q 220 | cz = q_c[2] + r 221 | 222 | cx_f = cast(cx, "float") 223 | cy_f = cast(cy, "float") 224 | cz_f = cast(cz, "float") 225 | 226 | xoffset = cast(floor(cx_f / pmax), "int") 227 | yoffset = cast(floor(cy_f / qmax), "int") 228 | zoffset = cast(floor(cz_f / rmax), "int") 229 | 230 | cx -= xoffset * pmax 231 | cy -= yoffset * qmax 232 | cz -= zoffset * rmax 233 | 234 | key = flatten(cx, cy, cz, qmax, rmax) 235 | 236 | if key >= max_key or key < 0: 237 | continue 238 | 239 | start_idx = start_indices[key] 240 | np = bin_counts[key] 241 | 242 | for k in range(np): 243 | j = sorted_indices[start_idx + k] 244 | xij = abs(x[i] - x[j]) 245 | yij = abs(y[i] - y[j]) 246 | zij = abs(z[i] - z[j]) 247 | xij = min(xij, xmax - xij) 248 | yij = min(yij, ymax - yij) 249 | zij = min(zij, zmax - zij) 250 | rij2 = xij * xij + yij * yij + zij * zij 251 | 252 | if rij2 < d: 253 | nbrs[nbr_start_idx + length] = j 254 | length += 1 255 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/3D/performance_comparison.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | from md_nnps_periodic import MDNNPSSolverPeriodic 4 | 5 | from compyle.config import get_config 6 | from hoomd_periodic import simulate 7 | 8 | 9 | def solve(n, backend, tf=4., dt=0.02, use_count_sort=False): 10 | if backend == 'hoomd': 11 | return simulate(n, dt, tf) 12 | else: 13 | solver = MDNNPSSolverPeriodic( 14 | n, dx=2., backend=backend, use_count_sort=use_count_sort) 15 | start = time.time() 16 | solver.solve(tf, dt) 17 | end = time.time() 18 | print("Time taken for backend = %s, N = %i is %g secs" % 19 | (backend, n, (end - start))) 20 | return end - start 21 | 22 | 23 | def compare(backends, n_list, niter=3, use_count_sort=False): 24 | t_list = {b: [] for b in backends} 25 | speedups = {b: [] for b in backends} 26 | for backend in backends: 27 | for n in n_list: 28 | print("Running for N = %i" % n) 29 | t = 1e9 30 | for it in range(niter): 31 | t = min(t, solve(n, backend, use_count_sort=use_count_sort)) 32 | t_list[backend].append(t) 33 | 34 | if 'hoomd' in backends: 35 | for backend in backends: 36 | for i, n in enumerate(n_list): 37 | speedups[backend].append( 38 | t_list['hoomd'][i] / t_list[backend][i]) 39 | else: 40 | speedups = None 41 | 42 | return speedups, t_list 43 | 44 | 45 | def plot(n_list, speedups, t_list, label): 46 | backend_label_map = {'hoomd': 'HooMD', 47 | 'opencl': 'OpenCL', 'cuda': 'CUDA'} 48 | import matplotlib.pyplot as plt 49 | plt.figure() 50 | 51 | if speedups: 52 | for backend, arr in speedups.items(): 53 | if backend == "hoomd": 54 | continue 55 | plt.semilogx(n_list, arr, 'x-', label=backend_label_map[backend]) 56 | 57 | plt.xlabel("Number of particles") 58 | plt.ylabel("Speedup") 59 | plt.legend() 60 | plt.grid(True) 61 | plt.savefig("%s_speedup_%s.png" % 62 | (label, "_".join(speedups.keys())), dpi=300) 63 | 64 | plt.clf() 65 | 66 | for backend, arr in t_list.items(): 67 | plt.loglog(n_list, arr, 'x-', label=backend_label_map[backend]) 68 | 69 | plt.xlabel("Number of particles") 70 | plt.ylabel("Time (secs)") 71 | plt.legend() 72 | plt.grid(True) 73 | plt.savefig("%s_time_%s.png" % (label, "_".join(t_list.keys())), dpi=300) 74 | 75 | 76 | if __name__ == "__main__": 77 | from argparse import ArgumentParser 78 | p = ArgumentParser() 79 | 80 | p.add_argument( 81 | '--use-count-sort', action='store_true', dest='use_count_sort', 82 | default=False, help='Use count sort instead of radix sort' 83 | ) 84 | o = p.parse_args() 85 | 86 | n_list = [1000 * (2 ** i) for i in range(11)] 87 | backends = ["cuda", "hoomd"] 88 | print("Running for", n_list) 89 | speedups, t_list = compare(backends, n_list, 90 | use_count_sort=o.use_count_sort) 91 | plot(n_list, speedups, t_list, "hoomd") 92 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/README.rst: -------------------------------------------------------------------------------- 1 | Molecular Dynamics Example 2 | -------------------------- 3 | 4 | We have 3 implementations of a simple molecular dynamics simulation 5 | of an N body problem in Lennard Jones potential. The first implementation 6 | is a simple :math:`O(N^2)` implementation that can be found in 7 | :code:`md_simple.py`. The second implementation is using nearest neighbor 8 | searching to reduce the complexity to :math:`O(N)` and can be 9 | found in :code:`md_nnps.py`. 10 | 11 | We also have two different implementations of nearest neighbor search 12 | algorithms, one using a radix sort on GPU and numpy sort on CPU 13 | and the other using a native counting sort implementation. The counting 14 | sort version is about 30% faster. Both these implementations can be 15 | found in :code:`nnps.py`. 16 | 17 | This example has been discussed at length in 18 | `this `_ 19 | SciPy 2020 paper. 20 | Following commands can be used to reproduce the performance results 21 | shown in the paper. 22 | 23 | +------------------+---------------------------------------------------------------+ 24 | | Figure 2 | `python performance_comparison.py -c omp_comp --nnps simple` | 25 | +------------------+---------------------------------------------------------------+ 26 | | Figure 3 | `python performance_comparison.py -c gpu_comp --nnps simple` | 27 | +------------------+---------------------------------------------------------------+ 28 | | Figure 4 & 5 | `python performance_comparison.py -c gpu_comp` | 29 | +------------------+---------------------------------------------------------------+ 30 | | Figure 6 & 7 | `python performance_comparison.py -c comp_algo` | 31 | +------------------+---------------------------------------------------------------+ 32 | | Figure 8 | `cd 3D && python performance_comparison.py --use-count-sort` | 33 | +------------------+---------------------------------------------------------------+ 34 | 35 | To generate energy plots for HooMD and Compyle implementations, run the script 36 | :code:`3D/compare_results.py` 37 | 38 | Users can use the google colab notebook 39 | `here `_ 40 | to play around with the example. 41 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/md_nnps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.config import get_config 6 | from compyle.api import declare, annotate 7 | from compyle.parallel import Elementwise, Reduction 8 | from compyle.array import get_backend, wrap 9 | import compyle.array as carr 10 | 11 | from nnps import NNPSCountingSort, NNPSRadixSort 12 | from md_simple import integrate_step1, integrate_step2, \ 13 | boundary_condition, MDSolverBase 14 | 15 | 16 | @annotate 17 | def calculate_force(i, x, y, fx, fy, pe, nbr_starts, nbr_lengths, nbrs): 18 | start_idx = nbr_starts[i] 19 | length = nbr_lengths[i] 20 | for k in range(start_idx, start_idx + length): 21 | j = nbrs[k] 22 | if i == j: 23 | continue 24 | xij = x[i] - x[j] 25 | yij = y[i] - y[j] 26 | rij2 = xij * xij + yij * yij 27 | irij2 = 1.0 / rij2 28 | irij6 = irij2 * irij2 * irij2 29 | irij12 = irij6 * irij6 30 | pe[i] += (2 * (irij12 - irij6)) 31 | f_base = 24 * irij2 * (2 * irij12 - irij6) 32 | 33 | fx[i] += f_base * xij 34 | fy[i] += f_base * yij 35 | 36 | 37 | @annotate 38 | def step_method1(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 39 | ymin, ymax, m, dt, nbr_starts, nbr_lengths, 40 | nbrs): 41 | integrate_step1(i, m, dt, x, y, vx, vy, fx, fy) 42 | boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 43 | ymin, ymax) 44 | 45 | 46 | @annotate 47 | def step_method2(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 48 | ymin, ymax, m, dt, nbr_starts, nbr_lengths, 49 | nbrs): 50 | calculate_force(i, x, y, fx, fy, pe, nbr_starts, nbr_lengths, nbrs) 51 | integrate_step2(i, m, dt, x, y, vx, vy, fx, fy) 52 | 53 | 54 | class MDNNPSSolver(MDSolverBase): 55 | def __init__(self, num_particles, x=None, y=None, vx=None, vy=None, 56 | xmax=100., ymax=100., dx=1.5, init_T=0., 57 | backend=None, use_count_sort=False): 58 | super().__init__(num_particles, x=x, y=y, vx=vx, vy=vy, 59 | xmax=xmax, ymax=ymax, dx=dx, init_T=init_T, 60 | backend=backend) 61 | self.init_forces = Elementwise(calculate_force, backend=self.backend) 62 | self.step1 = Elementwise(step_method1, backend=self.backend) 63 | self.step2 = Elementwise(step_method2, backend=self.backend) 64 | self.nnps_algorithm = NNPSCountingSort \ 65 | if use_count_sort else NNPSRadixSort 66 | self.nnps = self.nnps_algorithm(self.x, self.y, 3., self.xmax, 67 | self.ymax, backend=self.backend) 68 | 69 | def solve(self, t, dt): 70 | num_steps = int(t // dt) 71 | self.nnps.build() 72 | self.nnps.get_neighbors() 73 | self.init_forces(self.x, self.y, self.fx, self.fy, 74 | self.pe, self.nnps.nbr_starts, 75 | self.nnps.nbr_lengths, self.nnps.nbrs) 76 | for i in range(num_steps): 77 | self.step1(self.x, self.y, self.vx, self.vy, self.fx, 78 | self.fy, self.pe, self.xmin, self.xmax, self.ymin, 79 | self.ymax, self.m, dt, self.nnps.nbr_starts, 80 | self.nnps.nbr_lengths, self.nnps.nbrs) 81 | self.nnps.build() 82 | self.nnps.get_neighbors() 83 | self.step2(self.x, self.y, self.vx, self.vy, self.fx, 84 | self.fy, self.pe, self.xmin, self.xmax, self.ymin, 85 | self.ymax, self.m, dt, self.nnps.nbr_starts, 86 | self.nnps.nbr_lengths, self.nnps.nbrs) 87 | if i % 100 == 0: 88 | self.post_step(i) 89 | 90 | 91 | if __name__ == '__main__': 92 | from compyle.utils import ArgumentParser 93 | p = ArgumentParser() 94 | p.add_argument( 95 | '--use-count-sort', action='store_true', dest='use_count_sort', 96 | default=False, help='Use count sort instead of radix sort' 97 | ) 98 | p.add_argument( 99 | '--show', action='store_true', dest='show', 100 | default=False, help='Show plot at end of simulation' 101 | ) 102 | 103 | p.add_argument('-n', action='store', type=int, dest='n', 104 | default=100, help='Number of particles') 105 | 106 | p.add_argument('--tf', action='store', type=float, dest='t', 107 | default=40., help='Final time') 108 | 109 | p.add_argument('--dt', action='store', type=float, dest='dt', 110 | default=0.02, help='Time step') 111 | 112 | o = p.parse_args() 113 | 114 | solver = MDNNPSSolver( 115 | o.n, 116 | backend=o.backend, 117 | use_count_sort=o.use_count_sort) 118 | 119 | start = time.time() 120 | solver.solve(o.t, o.dt) 121 | end = time.time() 122 | print("Time taken for N = %i is %g secs" % (o.n, (end - start))) 123 | if o.show: 124 | solver.pull() 125 | solver.plot() 126 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/md_simple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.config import get_config 6 | from compyle.api import declare, annotate 7 | from compyle.parallel import Elementwise, Reduction 8 | from compyle.array import get_backend, wrap 9 | 10 | import compyle.array as carr 11 | 12 | 13 | @annotate 14 | def calculate_energy(i, vx, vy, pe, num_particles): 15 | ke = 0.5 * (vx[i] * vx[i] + vy[i] * vy[i]) 16 | return pe[i] + ke 17 | 18 | 19 | @annotate 20 | def calculate_force(i, x, y, fx, fy, pe, num_particles): 21 | force_cutoff = 3. 22 | force_cutoff2 = force_cutoff * force_cutoff 23 | for j in range(num_particles): 24 | if i == j: 25 | continue 26 | xij = x[i] - x[j] 27 | yij = y[i] - y[j] 28 | rij2 = xij * xij + yij * yij 29 | if rij2 > force_cutoff2: 30 | continue 31 | irij2 = 1.0 / rij2 32 | irij6 = irij2 * irij2 * irij2 33 | irij12 = irij6 * irij6 34 | pe[i] += (2 * (irij12 - irij6)) 35 | f_base = 24 * irij2 * (2 * irij12 - irij6) 36 | 37 | fx[i] += f_base * xij 38 | fy[i] += f_base * yij 39 | 40 | 41 | @annotate 42 | def step_method1(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 43 | ymin, ymax, m, dt, num_particles): 44 | integrate_step1(i, m, dt, x, y, vx, vy, fx, fy) 45 | boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 46 | ymin, ymax) 47 | 48 | 49 | @annotate 50 | def step_method2(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, 51 | ymin, ymax, m, dt, num_particles): 52 | calculate_force(i, x, y, fx, fy, pe, num_particles) 53 | integrate_step2(i, m, dt, x, y, vx, vy, fx, fy) 54 | 55 | 56 | @annotate 57 | def integrate_step1(i, m, dt, x, y, vx, vy, fx, fy): 58 | axi = fx[i] 59 | ayi = fy[i] 60 | x[i] += vx[i] * dt + 0.5 * axi * dt * dt 61 | y[i] += vy[i] * dt + 0.5 * ayi * dt * dt 62 | vx[i] += 0.5 * axi * dt 63 | vy[i] += 0.5 * ayi * dt 64 | 65 | 66 | @annotate 67 | def integrate_step2(i, m, dt, x, y, vx, vy, fx, fy): 68 | axi = fx[i] 69 | ayi = fy[i] 70 | vx[i] += 0.5 * axi * dt 71 | vy[i] += 0.5 * ayi * dt 72 | 73 | 74 | @annotate 75 | def boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, ymin, ymax): 76 | xwidth = xmax - xmin 77 | ywidth = ymax - ymin 78 | stiffness = 50. 79 | pe[i] = 0. 80 | if x[i] < 0.5: 81 | fx[i] = stiffness * (0.5 - x[i]) 82 | pe[i] += 0.5 * stiffness * (0.5 - x[i]) * (0.5 - x[i]) 83 | elif x[i] > xwidth - 0.5: 84 | fx[i] = stiffness * (xwidth - 0.5 - x[i]) 85 | pe[i] += 0.5 * stiffness * (xwidth - 0.5 - x[i]) * (xwidth - 0.5 - x[i]) 86 | else: 87 | fx[i] = 0. 88 | 89 | if y[i] < 0.5: 90 | fy[i] = stiffness * (0.5 - y[i]) 91 | pe[i] += 0.5 * stiffness * (0.5 - y[i]) * (0.5 - y[i]) 92 | elif y[i] > ywidth - 0.5: 93 | fy[i] = stiffness * (ywidth - 0.5 - y[i]) 94 | pe[i] += 0.5 * stiffness * (ywidth - 0.5 - y[i]) * (ywidth - 0.5 - y[i]) 95 | else: 96 | fy[i] = 0. 97 | 98 | 99 | class MDSolverBase(object): 100 | def __init__(self, num_particles, x=None, y=None, vx=None, vy=None, 101 | xmax=100., ymax=100., dx=1.5, init_T=0., 102 | backend=None): 103 | self.backend = get_backend(backend) 104 | self.num_particles = num_particles 105 | self.xmin, self.xmax = 0., xmax 106 | self.ymin, self.ymax = 0., ymax 107 | self.m = 1. 108 | if x is None and y is None: 109 | self.x, self.y = self.setup_positions(num_particles, dx) 110 | if vx is None and vy is None: 111 | self.vx, self.vy = self.setup_velocities(init_T, num_particles) 112 | self.fx = carr.zeros_like(self.x, backend=self.backend) 113 | self.fy = carr.zeros_like(self.x, backend=self.backend) 114 | self.pe = carr.zeros_like(self.x, backend=self.backend) 115 | self.energy_calc = Reduction("a+b", map_func=calculate_energy, 116 | backend=self.backend) 117 | 118 | def setup_velocities(self, T, num_particles): 119 | np.random.seed(123) 120 | vx = np.random.uniform(0, 1., size=num_particles).astype(np.float64) 121 | vy = np.random.uniform(0, 1., size=num_particles).astype(np.float64) 122 | T_current = np.average(vx ** 2 + vy ** 2) 123 | scaling_factor = (T / T_current) ** 0.5 124 | vx = vx * scaling_factor 125 | vy = vy * scaling_factor 126 | return wrap(vx, vy, backend=self.backend) 127 | 128 | def setup_positions(self, num_particles, dx): 129 | ndim = np.ceil(num_particles ** 0.5) 130 | dim_length = ndim * dx 131 | 132 | self.xmax = dim_length * 3 133 | self.ymax = dim_length * 3 134 | 135 | xmin_eff = ((self.xmax - self.xmin) - dim_length) / 2. 136 | xmax_eff = ((self.xmax - self.xmin) + dim_length) / 2. 137 | 138 | x, y = np.mgrid[xmin_eff:xmax_eff:dx, xmin_eff:xmax_eff:dx] 139 | x = x.ravel().astype(np.float64)[:num_particles] 140 | y = y.ravel().astype(np.float64)[:num_particles] 141 | return wrap(x, y, backend=self.backend) 142 | 143 | def post_step(self, step): 144 | energy = self.energy_calc(self.vx, self.vy, self.pe, 145 | self.num_particles) 146 | print("Energy at time step =", step, "is", energy) 147 | 148 | def pull(self): 149 | self.x.pull() 150 | self.y.pull() 151 | 152 | def plot(self): 153 | import matplotlib.pyplot as plt 154 | plt.xlim(self.xmin, self.xmax) 155 | plt.ylim(self.ymin, self.ymax) 156 | plt.scatter(self.x.data, self.y.data, 4.2) 157 | plt.show() 158 | 159 | 160 | class MDSolver(MDSolverBase): 161 | def __init__(self, num_particles, x=None, y=None, vx=None, vy=None, 162 | xmax=100., ymax=100., dx=1.5, init_T=0., 163 | backend=None): 164 | super().__init__(num_particles, x=x, y=y, vx=vx, vy=vy, 165 | xmax=xmax, ymax=ymax, dx=dx, init_T=init_T, 166 | backend=backend) 167 | self.init_forces = Elementwise(calculate_force, backend=self.backend) 168 | self.step1 = Elementwise(step_method1, backend=self.backend) 169 | self.step2 = Elementwise(step_method2, backend=self.backend) 170 | 171 | def solve(self, t, dt): 172 | num_steps = int(t // dt) 173 | self.init_forces(self.x, self.y, self.fx, self.fy, self.pe, 174 | self.num_particles) 175 | for i in range(num_steps): 176 | self.step1(self.x, self.y, self.vx, self.vy, self.fx, self.fy, 177 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 178 | self.m, dt, self.num_particles) 179 | self.step2(self.x, self.y, self.vx, self.vy, self.fx, self.fy, 180 | self.pe, self.xmin, self.xmax, self.ymin, self.ymax, 181 | self.m, dt, self.num_particles) 182 | if i % 100 == 0: 183 | self.post_step(i) 184 | 185 | 186 | if __name__ == '__main__': 187 | from compyle.utils import ArgumentParser 188 | p = ArgumentParser() 189 | p.add_argument( 190 | '--show', action='store_true', dest='show', 191 | default=False, help='Show plot at end of simulation' 192 | ) 193 | 194 | p.add_argument('-n', action='store', type=int, dest='n', 195 | default=100, help='Number of particles') 196 | 197 | p.add_argument('--tf', action='store', type=float, dest='t', 198 | default=40., help='Final time') 199 | 200 | p.add_argument('--dt', action='store', type=float, dest='dt', 201 | default=0.02, help='Time step') 202 | 203 | o = p.parse_args() 204 | 205 | solver = MDSolver(o.n, backend=o.backend) 206 | 207 | start = time.time() 208 | solver.solve(o.t, o.dt) 209 | end = time.time() 210 | print("Time taken for N = %i is %g secs" % (o.n, (end - start))) 211 | if o.show: 212 | solver.pull() 213 | solver.plot() 214 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/nnps.py: -------------------------------------------------------------------------------- 1 | from nnps_kernels import * 2 | from compyle.config import get_config 3 | from compyle.api import declare, annotate 4 | from compyle.parallel import serial, Elementwise, Reduction, Scan 5 | from compyle.array import get_backend, wrap 6 | from compyle.low_level import atomic_inc, cast 7 | from math import floor 8 | from time import time 9 | 10 | import numpy as np 11 | import compyle.array as carr 12 | 13 | 14 | class NNPS(object): 15 | def __init__(self, x, y, h, xmax, ymax, backend=None): 16 | self.backend = backend 17 | self.num_particles = x.length 18 | self.x, self.y = x, y 19 | self.h = h 20 | 21 | cmax = np.array([floor(xmax / h), floor(ymax / h)], dtype=np.int32) 22 | self.max_key = 1 + flatten(cmax[0], cmax[1], 1 + cmax[1]) 23 | self.qmax = 1 + cmax[1] 24 | 25 | # neighbor kernels 26 | self.find_neighbor_lengths = Elementwise(find_neighbor_lengths_knl, 27 | backend=self.backend) 28 | self.find_neighbors = Elementwise(find_neighbors_knl, 29 | backend=self.backend) 30 | self.scan_start_indices = Scan(input=input_start_indices, 31 | output=output_start_indices, 32 | scan_expr="a+b", dtype=np.int32, 33 | backend=self.backend) 34 | self.init_arrays() 35 | 36 | def init_arrays(self): 37 | # sort arrays 38 | self.bin_counts = carr.zeros(self.max_key, dtype=np.int32, 39 | backend=self.backend) 40 | self.start_indices = carr.zeros(self.max_key, dtype=np.int32, 41 | backend=self.backend) 42 | self.keys = carr.zeros(self.num_particles, dtype=np.int32, 43 | backend=self.backend) 44 | self.sorted_indices = carr.zeros(self.num_particles, dtype=np.int32, 45 | backend=self.backend) 46 | 47 | # neighbor arrays 48 | self.nbr_lengths = carr.zeros(self.num_particles, dtype=np.int32, 49 | backend=self.backend) 50 | self.nbr_starts = carr.zeros(self.num_particles, dtype=np.int32, 51 | backend=self.backend) 52 | self.nbrs = carr.zeros(2 * self.num_particles, dtype=np.int32, 53 | backend=self.backend) 54 | 55 | def reset_arrays(self): 56 | # sort arrays 57 | self.bin_counts.fill(0) 58 | self.start_indices.fill(0) 59 | self.sorted_indices.fill(0) 60 | 61 | # neighbors array 62 | self.nbr_lengths.fill(0) 63 | self.nbr_starts.fill(0) 64 | 65 | def get_neighbors(self): 66 | self.find_neighbor_lengths(self.x, self.y, self.h, self.qmax, 67 | self.start_indices, self.sorted_indices, 68 | self.bin_counts, self.nbr_lengths, 69 | self.max_key) 70 | self.scan_start_indices(counts=self.nbr_lengths, 71 | indices=self.nbr_starts) 72 | self.total_neighbors = int(self.nbr_lengths[-1] + self.nbr_starts[-1]) 73 | self.nbrs.resize(self.total_neighbors) 74 | self.find_neighbors(self.x, self.y, self.h, self.qmax, 75 | self.start_indices, self.sorted_indices, 76 | self.bin_counts, self.nbr_starts, 77 | self.nbrs, self.max_key) 78 | 79 | 80 | class NNPSCountingSort(NNPS): 81 | def __init__(self, x, y, h, xmax, ymax, backend=None): 82 | super().__init__(x, y, h, xmax, ymax, backend=backend) 83 | # sort kernels 84 | self.count_bins = Elementwise(count_bins, backend=self.backend) 85 | self.sort_indices = Elementwise(sort_indices, backend=self.backend) 86 | 87 | def init_arrays(self): 88 | super().init_arrays() 89 | self.sort_offsets = carr.zeros(self.num_particles, dtype=np.int32, 90 | backend=self.backend) 91 | 92 | def reset_arrays(self): 93 | super().reset_arrays() 94 | # sort arrays 95 | self.sort_offsets.fill(0) 96 | 97 | def build(self): 98 | self.reset_arrays() 99 | self.count_bins(self.x, self.y, self.h, self.qmax, self.keys, 100 | self.bin_counts, self.sort_offsets) 101 | self.scan_start_indices(counts=self.bin_counts, 102 | indices=self.start_indices) 103 | self.sort_indices(self.keys, self.sort_offsets, self.start_indices, 104 | self.sorted_indices) 105 | 106 | 107 | class NNPSRadixSort(NNPS): 108 | def __init__(self, x, y, h, xmax, ymax, backend=None): 109 | super().__init__(x, y, h, xmax, ymax, backend=backend) 110 | self.max_bits = np.ceil(np.log2(self.max_key)) 111 | 112 | # sort kernels 113 | self.fill_keys = Elementwise(fill_keys, backend=self.backend) 114 | self.fill_bin_counts = Elementwise(fill_bin_counts, 115 | backend=self.backend) 116 | self.scan_keys = Scan(input=input_scan_keys, 117 | output=output_scan_keys, 118 | scan_expr="a+b", dtype=np.int32, 119 | backend=self.backend) 120 | 121 | def init_arrays(self): 122 | super().init_arrays() 123 | # sort arrays 124 | self.sorted_keys = carr.zeros(self.num_particles, dtype=np.int32, 125 | backend=self.backend) 126 | self.indices = carr.zeros(self.num_particles, dtype=np.int32, 127 | backend=self.backend) 128 | 129 | def reset_arrays(self): 130 | super().reset_arrays() 131 | self.sorted_keys.fill(0) 132 | 133 | def build(self): 134 | self.reset_arrays() 135 | self.fill_keys(self.x, self.y, self.h, self.qmax, self.indices, 136 | self.keys) 137 | self.sorted_keys, self.sorted_indices = carr.sort_by_keys( 138 | [self.keys, self.indices], 139 | key_bits=self.max_bits, backend=self.backend) 140 | self.scan_keys(keys=self.sorted_keys, 141 | start_indices=self.start_indices) 142 | self.fill_bin_counts(self.sorted_keys, self.start_indices, 143 | self.bin_counts, self.num_particles) 144 | 145 | 146 | if __name__ == "__main__": 147 | import sys 148 | backend = sys.argv[1] if len(sys.argv) > 1 else 'cython' 149 | np.random.seed(123) 150 | num_particles = 20 151 | x = np.random.uniform(0, 10., size=num_particles).astype(np.float32) 152 | y = np.random.uniform(0, 10., size=num_particles).astype(np.float32) 153 | x, y = wrap(x, y, backend=backend) 154 | nnps = NNPSRadixSort(x, y, 3., 10., 10., backend=backend) 155 | nnps.build() 156 | nnps.get_neighbors() 157 | print(nnps.start_indices) 158 | print(nnps.bin_counts) 159 | print(nnps.nbr_lengths) 160 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/nnps_kernels.py: -------------------------------------------------------------------------------- 1 | from compyle.api import declare, annotate 2 | from compyle.parallel import serial 3 | from compyle.low_level import atomic_inc, cast 4 | from math import floor 5 | import numpy as np 6 | 7 | 8 | @annotate 9 | def find_cell_id(x, y, h, c): 10 | c[0] = cast(floor((x) / h), "int") 11 | c[1] = cast(floor((y) / h), "int") 12 | 13 | 14 | @annotate 15 | def flatten(p, q, qmax): 16 | return p * qmax + q 17 | 18 | 19 | @serial 20 | @annotate 21 | def count_bins(i, x, y, h, cmax, keys, bin_counts, 22 | sort_offsets): 23 | c = declare('matrix(2, "int")') 24 | find_cell_id(x[i], y[i], h, c) 25 | key = flatten(c[0], c[1], cmax) 26 | keys[i] = key 27 | idx = atomic_inc(bin_counts[key]) 28 | sort_offsets[i] = idx 29 | 30 | 31 | @annotate 32 | def sort_indices(i, keys, sort_offsets, start_indices, sorted_indices): 33 | key = keys[i] 34 | offset = sort_offsets[i] 35 | start_idx = start_indices[key] 36 | sorted_indices[start_idx + offset] = i 37 | 38 | 39 | @annotate 40 | def input_start_indices(i, counts): 41 | return 0 if i == 0 else counts[i - 1] 42 | 43 | 44 | @annotate 45 | def output_start_indices(i, item, indices): 46 | indices[i] = item 47 | 48 | 49 | @annotate 50 | def fill_keys(i, x, y, h, cmax, indices, keys): 51 | c = declare('matrix(2, "int")') 52 | find_cell_id(x[i], y[i], h, c) 53 | key = flatten(c[0], c[1], cmax) 54 | keys[i] = key 55 | indices[i] = i 56 | 57 | 58 | @annotate 59 | def input_scan_keys(i, keys): 60 | return 1 if i == 0 or keys[i] != keys[i - 1] else 0 61 | 62 | 63 | @annotate 64 | def output_scan_keys(i, item, prev_item, keys, start_indices): 65 | key = keys[i] 66 | if item != prev_item: 67 | start_indices[key] = i 68 | 69 | 70 | @annotate 71 | def fill_bin_counts(i, keys, start_indices, bin_counts, num_particles): 72 | if i == num_particles - 1: 73 | last_key = keys[num_particles - 1] 74 | bin_counts[last_key] = num_particles - start_indices[last_key] 75 | if i == 0 or keys[i] == keys[i - 1]: 76 | return 77 | key = keys[i] 78 | prev_key = keys[i - 1] 79 | bin_counts[prev_key] = start_indices[key] - start_indices[prev_key] 80 | 81 | 82 | @annotate 83 | def find_neighbor_lengths_knl(i, x, y, h, cmax, start_indices, sorted_indices, 84 | bin_counts, nbr_lengths, max_key): 85 | d = h * h 86 | q_c = declare('matrix(2, "int")') 87 | find_cell_id(x[i], y[i], h, q_c) 88 | 89 | for p in range(-1, 2): 90 | for q in range(-1, 2): 91 | cx = q_c[0] + p 92 | cy = q_c[1] + q 93 | 94 | key = flatten(cx, cy, cmax) 95 | 96 | if key >= max_key or key < 0: 97 | continue 98 | 99 | start_idx = start_indices[key] 100 | np = bin_counts[key] 101 | 102 | for k in range(np): 103 | j = sorted_indices[start_idx + k] 104 | xij = x[i] - x[j] 105 | yij = y[i] - y[j] 106 | rij2 = xij * xij + yij * yij 107 | 108 | if rij2 < d: 109 | nbr_lengths[i] += 1 110 | 111 | 112 | @annotate 113 | def find_neighbors_knl(i, x, y, h, cmax, start_indices, sorted_indices, 114 | bin_counts, nbr_starts, nbrs, max_key): 115 | d = h * h 116 | q_c = declare('matrix(2, "int")') 117 | find_cell_id(x[i], y[i], h, q_c) 118 | length = 0 119 | nbr_start_idx = nbr_starts[i] 120 | 121 | for p in range(-1, 2): 122 | for q in range(-1, 2): 123 | cx = q_c[0] + p 124 | cy = q_c[1] + q 125 | 126 | key = flatten(cx, cy, cmax) 127 | 128 | if key >= max_key or key < 0: 129 | continue 130 | 131 | start_idx = start_indices[key] 132 | np = bin_counts[key] 133 | 134 | for k in range(np): 135 | j = sorted_indices[start_idx + k] 136 | xij = x[i] - x[j] 137 | yij = y[i] - y[j] 138 | rij2 = xij * xij + yij * yij 139 | 140 | if rij2 < d: 141 | nbrs[nbr_start_idx + length] = j 142 | length += 1 143 | -------------------------------------------------------------------------------- /examples/molecular_dynamics/performance_comparison.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import md_simple 4 | import md_nnps 5 | 6 | from compyle.config import get_config 7 | 8 | 9 | def solve(n, backend, solver_algo, tf=0.5, dt=0.02, use_count_sort=False): 10 | solver = solver_algo(n, backend=backend.replace("_omp", "")) 11 | start = time.time() 12 | solver.solve(tf, dt) 13 | end = time.time() 14 | print("Time taken for backend = %s, N = %i is %g secs" % 15 | (backend, n, (end - start))) 16 | return end - start 17 | 18 | 19 | def compare(backends, n_list, solver_algo, niter=3): 20 | t_list = {b: [] for b in backends} 21 | speedups = {b: [] for b in backends} 22 | for n in n_list: 23 | print("Running for N = %i" % n) 24 | for backend in backends: 25 | if "omp" in backend: 26 | get_config().use_openmp = True 27 | t = 1e9 28 | for it in range(niter): 29 | t = min(t, solve(n, backend, solver_algo)) 30 | t_list[backend].append(t) 31 | if "omp" in backend: 32 | get_config().use_openmp = False 33 | 34 | if 'cython' in backends: 35 | for backend in backends: 36 | for i, n in enumerate(n_list): 37 | speedups[backend].append( 38 | t_list["cython"][i] / t_list[backend][i]) 39 | else: 40 | speedups = None 41 | 42 | return speedups, t_list 43 | 44 | 45 | def compare_implementations(backend, n_list, niter=3): 46 | import matplotlib.pyplot as plt 47 | sp, nnps_tlist = compare([backend], n_list, 48 | md_nnps.MDSolver, niter=niter) 49 | sp, simple_tlist = compare([backend], n_list, 50 | md_simple.MDSolver, niter=niter) 51 | 52 | speedup = [simple_tlist[backend][i] / nnps_tlist[backend][i] 53 | for i in range(len(n_list))] 54 | 55 | plt.loglog(n_list, nnps_tlist[backend], 'x-', label="Linear") 56 | plt.loglog(n_list, simple_tlist[backend], 'x-', label="Simple") 57 | 58 | plt.xlabel("Number of particles") 59 | plt.ylabel("Time (secs)") 60 | plt.legend() 61 | plt.grid(True) 62 | plt.savefig("time_comp_impl.png", dpi=300) 63 | 64 | plt.clf() 65 | 66 | plt.loglog(n_list, speedup, 'x-') 67 | 68 | plt.xlabel("Number of particles") 69 | plt.ylabel("Speedup") 70 | plt.grid(True) 71 | plt.savefig("speedup_comp_impl.png", dpi=300) 72 | 73 | 74 | def plot(n_list, speedups, t_list, label): 75 | backend_label_map = {'cython': 'Cython', 'cython_omp': 'OpenMP', 76 | 'opencl': 'OpenCL', 'cuda': 'CUDA'} 77 | import matplotlib.pyplot as plt 78 | plt.figure() 79 | 80 | if speedups: 81 | for backend, arr in speedups.items(): 82 | if backend == "cython": 83 | continue 84 | plt.semilogx(n_list, arr, 'x-', label=backend_label_map[backend]) 85 | 86 | plt.xlabel("Number of particles") 87 | plt.ylabel("Speedup") 88 | plt.legend() 89 | plt.grid(True) 90 | plt.savefig("%s_speedup_%s.png" % 91 | (label, "_".join(speedups.keys())), dpi=300) 92 | 93 | plt.clf() 94 | 95 | for backend, arr in t_list.items(): 96 | plt.loglog(n_list, arr, 'x-', label=backend_label_map[backend]) 97 | 98 | plt.xlabel("Number of particles") 99 | plt.ylabel("Time (secs)") 100 | plt.legend() 101 | plt.grid(True) 102 | plt.savefig("%s_time_%s.png" % (label, "_".join(t_list.keys())), dpi=300) 103 | 104 | 105 | if __name__ == "__main__": 106 | from argparse import ArgumentParser 107 | p = ArgumentParser() 108 | p.add_argument( 109 | '-c', '--comparison', action='store', dest='comp', default='gpu_comp', 110 | choices=['gpu_comp', 'omp_comp', 'comp_algo'], 111 | help='Choose the comparison.' 112 | ) 113 | p.add_argument( 114 | '--nnps', action='store', dest='nnps', default='linear', 115 | choices=['linear', 'simple'], 116 | help='Choose algorithm.' 117 | ) 118 | p.add_argument( 119 | '--use-double', action='store_true', dest='use_double', 120 | default=False, help='Use double precision on the GPU.' 121 | ) 122 | 123 | o = p.parse_args() 124 | get_config().use_double = o.use_double 125 | solver_algo = (md_nnps.MDNNPSSolver if o.nnps == 'linear' 126 | else md_simple.MDSolver) 127 | n_list = [10000 * (2 ** i) for i in range(10)] if o.nnps == 'linear' else \ 128 | [500 * (2 ** i) for i in range(8)] 129 | 130 | if o.comp == "gpu_comp": 131 | backends = ["opencl", "cuda", "cython"] 132 | print("Running for", n_list) 133 | speedups, t_list = compare(backends, n_list, solver_algo) 134 | plot(n_list, speedups, t_list, o.nnps) 135 | elif o.comp == "omp_comp": 136 | backends = ["cython_omp", "cython"] 137 | print("Running for", n_list) 138 | speedups, t_list = compare(backends, n_list, solver_algo) 139 | plot(n_list, speedups, t_list, o.nnps) 140 | elif o.comp == "comp_algo": 141 | backend = "cython" 142 | n_list = [500, 1000, 2000, 4000, 8000, 16000, 32000] 143 | print("Running for", n_list) 144 | compare_implementations(backend, n_list) 145 | -------------------------------------------------------------------------------- /examples/vm_elementwise.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.api import declare, annotate 6 | from compyle.parallel import Elementwise 7 | from compyle.array import wrap 8 | 9 | 10 | @annotate(double='xi, yi, xj, yj, gamma', result='doublep') 11 | def point_vortex(xi, yi, xj, yj, gamma, result): 12 | xij = xi - xj 13 | yij = yi - yj 14 | r2ij = xij*xij + yij*yij 15 | if r2ij < 1e-14: 16 | result[0] = 0.0 17 | result[1] = 0.0 18 | else: 19 | tmp = gamma/(2.0*pi*r2ij) 20 | result[0] = -tmp*yij 21 | result[1] = tmp*xij 22 | 23 | 24 | @annotate(int='i, nv', gdoublep='x, y, gamma, u, v') 25 | def velocity(i, x, y, gamma, u, v, nv): 26 | j = declare('int') 27 | tmp = declare('matrix(2)') 28 | vx = 0.0 29 | vy = 0.0 30 | xi = x[i] 31 | yi = y[i] 32 | for j in range(nv): 33 | point_vortex(xi, yi, x[j], y[j], gamma[j], tmp) 34 | vx += tmp[0] 35 | vy += tmp[1] 36 | u[i] = vx 37 | v[i] = vy 38 | 39 | 40 | def make_vortices(nv, backend): 41 | x = np.linspace(-1, 1, nv) 42 | y = x.copy() 43 | gamma = np.ones(nv) 44 | u = np.zeros_like(x) 45 | v = np.zeros_like(x) 46 | x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend) 47 | return x, y, gamma, u, v, nv 48 | 49 | 50 | def run(nv, backend): 51 | e = Elementwise(velocity, backend=backend) 52 | args = make_vortices(nv, backend) 53 | t1 = time.time() 54 | e(*args) 55 | print(time.time() - t1) 56 | u = args[-3] 57 | u.pull() 58 | return e, args 59 | 60 | 61 | if __name__ == '__main__': 62 | from compyle.utils import ArgumentParser 63 | p = ArgumentParser() 64 | p.add_argument('-n', action='store', type=int, dest='n', 65 | default=10000, help='Number of particles.') 66 | o = p.parse_args() 67 | run(o.n, o.backend) 68 | -------------------------------------------------------------------------------- /examples/vm_elementwise_jit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from compyle.api import declare, annotate 6 | from compyle.parallel import Elementwise 7 | from compyle.array import wrap 8 | 9 | 10 | @annotate 11 | def point_vortex(xi, yi, xj, yj, gamma, result): 12 | xij = xi - xj 13 | yij = yi - yj 14 | r2ij = xij*xij + yij*yij 15 | if r2ij < 1.0e-14: 16 | result[0] = 0.0 17 | result[1] = 0.0 18 | else: 19 | tmp = gamma/(2.0*pi*r2ij) 20 | result[0] = -tmp*yij 21 | result[1] = tmp*xij 22 | 23 | 24 | @annotate 25 | def velocity(i, x, y, gamma, u, v, nv): 26 | tmp = declare('matrix(2)') 27 | xi = x[i] 28 | yi = y[i] 29 | vx = 0.0 30 | vy = 0.0 31 | u[i] = 0.0 32 | v[i] = 0.0 33 | for j in range(nv): 34 | point_vortex(xi, yi, x[j], y[j], gamma[j], tmp) 35 | vx += tmp[0] 36 | vy += tmp[1] 37 | u[i] = vx 38 | v[i] = vy 39 | 40 | 41 | def make_vortices(nv, backend): 42 | x = np.linspace(-1, 1, nv) 43 | y = x.copy() 44 | gamma = np.ones(nv) 45 | u = np.zeros_like(x) 46 | v = np.zeros_like(x) 47 | x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend) 48 | return x, y, gamma, u, v, nv 49 | 50 | 51 | def run(nv, backend): 52 | e = Elementwise(velocity, backend=backend) 53 | args = make_vortices(nv, backend) 54 | t1 = time.time() 55 | e(*args) 56 | print(time.time() - t1) 57 | u = args[-3] 58 | u.pull() 59 | return e, args 60 | 61 | 62 | if __name__ == '__main__': 63 | from compyle.utils import ArgumentParser 64 | p = ArgumentParser() 65 | p.add_argument('-n', action='store', type=int, dest='n', 66 | default=10000, help='Number of particles.') 67 | o = p.parse_args() 68 | run(o.n, o.backend) 69 | -------------------------------------------------------------------------------- /examples/vm_kernel.py: -------------------------------------------------------------------------------- 1 | """Shows the use of a raw opencl Kernel but written using pure Python. It 2 | makes use of local memory allocated on the host. 3 | 4 | Note that the local memory is allocated as a multiple of workgroup size times 5 | the size of the data type automatically. 6 | 7 | This is a raw opencl kernel so will not work on Cython! 8 | 9 | """ 10 | import numpy as np 11 | from math import pi 12 | import time 13 | 14 | from compyle.api import annotate, declare, wrap 15 | from compyle.low_level import (Kernel, LocalMem, local_barrier, 16 | LID_0, LDIM_0, GDIM_0) 17 | 18 | 19 | @annotate(double='xi, yi, xj, yj, gamma', result='doublep') 20 | def point_vortex(xi, yi, xj, yj, gamma, result): 21 | xij = xi - xj 22 | yij = yi - yj 23 | r2ij = xij*xij + yij*yij 24 | if r2ij < 1.0e-14: 25 | result[0] = 0.0 26 | result[1] = 0.0 27 | else: 28 | tmp = gamma/(2.0*pi*r2ij) 29 | result[0] = -tmp*yij 30 | result[1] = tmp*xij 31 | 32 | 33 | @annotate(nv='int', gdoublep='x, y, gamma, u, v', ldoublep='xc, yc, gc') 34 | def velocity(x, y, gamma, u, v, xc, yc, gc, nv): 35 | i, gid, nb = declare('int', 3) 36 | j, ti, nt, jb = declare('int', 4) 37 | ti = LID_0 38 | nt = LDIM_0 39 | gid = GID_0 40 | i = gid*nt + ti 41 | idx = declare('int') 42 | tmp = declare('matrix(2)') 43 | uj, vj = declare('double', 2) 44 | nb = GDIM_0 45 | 46 | if i < nv: 47 | xi = x[i] 48 | yi = y[i] 49 | uj = 0.0 50 | vj = 0.0 51 | for jb in range(nb): 52 | idx = jb*nt + ti 53 | if idx < nv: 54 | xc[ti] = x[idx] 55 | yc[ti] = y[idx] 56 | gc[ti] = gamma[idx] 57 | else: 58 | gc[ti] = 0.0 59 | local_barrier() 60 | 61 | if i < nv: 62 | for j in range(nt): 63 | point_vortex(xi, yi, xc[j], yc[j], gc[j], tmp) 64 | uj += tmp[0] 65 | vj += tmp[1] 66 | 67 | local_barrier() 68 | 69 | if i < nv: 70 | u[i] = uj 71 | v[i] = vj 72 | 73 | 74 | def make_vortices(nv, backend): 75 | x = np.linspace(-1, 1, nv) 76 | y = x.copy() 77 | gamma = np.ones(nv) 78 | u = np.zeros_like(x) 79 | v = np.zeros_like(x) 80 | x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend) 81 | xc, yc, gc = (LocalMem(1, backend), LocalMem(1, backend), 82 | LocalMem(1, backend)) 83 | return x, y, gamma, u, v, xc, yc, gc, nv 84 | 85 | 86 | def run(nv, backend): 87 | e = Kernel(velocity, backend=backend) 88 | args = make_vortices(nv, backend) 89 | t1 = time.time() 90 | gs = ((nv + 128 - 1)//128)*128 91 | e(*args, global_size=(gs,)) 92 | print(time.time() - t1) 93 | u = args[3] 94 | u.pull() 95 | print(u.data) 96 | return e, args 97 | 98 | 99 | if __name__ == '__main__': 100 | from compyle.utils import ArgumentParser 101 | p = ArgumentParser() 102 | p.add_argument('-n', action='store', type=int, dest='n', 103 | default=10000, help='Number of particles.') 104 | o = p.parse_args() 105 | assert o.backend in ['opencl', 'cuda'], ("Only OpenCL/CUDA backend is " 106 | "supported.") 107 | run(o.n, o.backend) 108 | -------------------------------------------------------------------------------- /examples/vm_numba.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import pi 3 | import time 4 | 5 | from numba import jit 6 | 7 | 8 | @jit 9 | def point_vortex(xi, yi, xj, yj, gamma, result): 10 | xij = xi - xj 11 | yij = yi - yj 12 | r2ij = xij*xij + yij*yij 13 | if r2ij < 1e-14: 14 | result[0] = 0.0 15 | result[1] = 0.0 16 | else: 17 | tmp = gamma/(2.0*pi*r2ij) 18 | result[0] = -tmp*yij 19 | result[1] = tmp*xij 20 | 21 | 22 | @jit 23 | def velocity(x, y, gamma, u, v, nv): 24 | tmp = np.zeros(2) 25 | for i in range(nv): 26 | xi = x[i] 27 | yi = y[i] 28 | u[i] = 0.0 29 | v[i] = 0.0 30 | for j in range(nv): 31 | point_vortex(xi, yi, x[j], y[j], gamma[j], tmp) 32 | u[i] += tmp[0] 33 | v[i] += tmp[1] 34 | 35 | 36 | def make_vortices(nv): 37 | x = np.linspace(-1, 1, nv) 38 | y = x.copy() 39 | gamma = np.ones(nv) 40 | u = np.zeros_like(x) 41 | v = np.zeros_like(x) 42 | return x, y, gamma, u, v, nv 43 | 44 | 45 | def run(nv): 46 | args = make_vortices(nv) 47 | t1 = time.time() 48 | velocity(*args) 49 | print(time.time() - t1) 50 | u = args[-3] 51 | print(u) 52 | return velocity, args 53 | 54 | 55 | if __name__ == '__main__': 56 | from argparse import ArgumentParser 57 | p = ArgumentParser() 58 | p.add_argument('-n', action='store', 59 | type=int, dest='n', default=10000) 60 | o = p.parse_args() 61 | run(o.n) 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "wheel>=0.29.0", 4 | "setuptools>=42.0.0", 5 | "numpy>=2.0,<3", 6 | "Cython>=0.20", 7 | "mako", 8 | "pytools" 9 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mako 2 | pytools 3 | cython 4 | numpy 5 | pytest 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup, find_packages 3 | 4 | try: 5 | from Cython.Distutils import Extension 6 | from Cython.Build import cythonize 7 | except ImportError: 8 | from distutils.core import Extension 9 | 10 | def cythonize(*args, **kw): 11 | return args[0] 12 | 13 | 14 | def get_version(): 15 | import os 16 | data = {} 17 | fname = os.path.join('compyle', '__init__.py') 18 | exec(compile(open(fname).read(), fname, 'exec'), data) 19 | return data.get('__version__') 20 | 21 | 22 | install_requires = ['mako', 'pytools', 'cython', 'numpy'] 23 | tests_require = ['pytest'] 24 | if sys.version_info[0] < 3: 25 | tests_require += ['mock>=1.0'] 26 | docs_require = ['sphinx'] 27 | cuda_require = ['pycuda', 'cupy'] 28 | opencl_require = ['pyopencl'] 29 | 30 | classes = ''' 31 | Development Status :: 4 - Beta 32 | Intended Audience :: Developers 33 | Intended Audience :: Science/Research 34 | Natural Language :: English 35 | Operating System :: MacOS :: MacOS X 36 | Operating System :: Microsoft :: Windows 37 | Operating System :: POSIX 38 | Operating System :: Unix 39 | Programming Language :: Python 40 | Programming Language :: Python :: 2.7 41 | Programming Language :: Python :: 3 42 | Topic :: Scientific/Engineering 43 | Topic :: Software Development :: Code Generators 44 | Topic :: Software Development :: Compilers 45 | Topic :: Software Development :: Libraries 46 | Topic :: Utilities 47 | ''' 48 | classifiers = [x.strip() for x in classes.splitlines() if x] 49 | 50 | ext_modules = [ 51 | Extension( 52 | name="compyle.thrust.sort", 53 | sources=["compyle/thrust/sort.pyx"], 54 | language="c++" 55 | ), 56 | ] 57 | 58 | setup( 59 | name='compyle', 60 | version=get_version(), 61 | author='Prabhu Ramachandran', 62 | author_email='prabhu@aero.iitb.ac.in', 63 | description='Execute a subset of Python on HPC platforms', 64 | long_description=open('README.rst').read(), 65 | license="BSD-3-Clause", 66 | url='https://github.com/pypr/compyle', 67 | classifiers=classifiers, 68 | packages=find_packages(), 69 | ext_modules=cythonize(ext_modules, language="c++"), 70 | install_requires=install_requires, 71 | extras_require={ 72 | "docs": docs_require, 73 | "tests": tests_require, 74 | "dev": docs_require + tests_require, 75 | "cuda": cuda_require, 76 | "opencl": opencl_require, 77 | }, 78 | ) 79 | --------------------------------------------------------------------------------