├── .coveragerc
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── .readthedocs.yaml
├── CHANGES.rst
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── compyle
    ├── __init__.py
    ├── api.py
    ├── array.py
    ├── ast_utils.py
    ├── capture_stream.py
    ├── config.py
    ├── cuda.py
    ├── cython_generator.py
    ├── ext_module.py
    ├── extern.py
    ├── jit.py
    ├── low_level.py
    ├── opencl.py
    ├── parallel.py
    ├── profile.py
    ├── sort.py
    ├── template.py
    ├── tests
    │   ├── __init__.py
    │   ├── py3_code.py
    │   ├── test_array.py
    │   ├── test_ast_utils.py
    │   ├── test_capture_stream.py
    │   ├── test_change_backend.py
    │   ├── test_config.py
    │   ├── test_cuda.py
    │   ├── test_cython_generator.py
    │   ├── test_ext_module.py
    │   ├── test_gpu_struct.py
    │   ├── test_jit.py
    │   ├── test_low_level.py
    │   ├── test_parallel.py
    │   ├── test_profile.py
    │   ├── test_template.py
    │   ├── test_translator.py
    │   ├── test_transpiler.py
    │   ├── test_types.py
    │   └── test_utils.py
    ├── thrust
    │   ├── __init__.py
    │   └── sort.pyx
    ├── translator.py
    ├── transpiler.py
    ├── types.py
    └── utils.py
├── docs
    ├── Makefile
    ├── requirements.txt
    └── source
    │   ├── conf.py
    │   ├── details.rst
    │   ├── index.rst
    │   ├── installation.rst
    │   └── overview.rst
├── examples
    ├── axpb.py
    ├── axpb_jit.py
    ├── bench_vm.py
    ├── julia_set.py
    ├── laplace.py
    ├── molecular_dynamics
    │   ├── 3D
    │   │   ├── compare_results.py
    │   │   ├── hoomd_periodic.py
    │   │   ├── md_nnps.py
    │   │   ├── md_nnps_periodic.py
    │   │   ├── md_simple.py
    │   │   ├── nnps.py
    │   │   ├── nnps_kernels.py
    │   │   └── performance_comparison.py
    │   ├── README.rst
    │   ├── md_nnps.py
    │   ├── md_simple.py
    │   ├── nnps.py
    │   ├── nnps_kernels.py
    │   └── performance_comparison.py
    ├── vm_elementwise.py
    ├── vm_elementwise_jit.py
    ├── vm_kernel.py
    └── vm_numba.py
├── pyproject.toml
├── requirements.txt
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source = compyle
 4 | omit =
 5 |      */tests/*
 6 |      compyle/api.py
 7 | 
 8 | [report]
 9 | exclude_lines =
10 |     # Have to re-enable the standard pragma
11 |     pragma: no cover
12 |     except ImportError:
13 |     raise NotImplementedError()
14 |     if __name__ == .__main__.:
15 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   schedule:
 6 |     # Run Test at 0400 UTC on Saturday.
 7 |     - cron: '0 4 * * 6'
 8 |     # Run test at 0400 UTC on day 1 of every month to create auto-generated
 9 |     # code afresh and cache it.
10 |     - cron: '0 4 1 * *'  # Ref https://crontab.guru/#0_4_1_*_*
11 | 
12 | jobs:
13 |   tests:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: [ubuntu-latest, macos-latest, windows-latest]
18 |         python-version: [3.11, 3.12]
19 | 
20 |     runs-on: ${{ matrix.os }}
21 |     defaults:
22 |       run:
23 |         shell: bash -l {0}
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Set up Python ${{ matrix.python-version }}
28 |         uses: conda-incubator/setup-miniconda@v3
29 |         with:
30 |           auto-update-conda: true
31 |           python-version: ${{ matrix.python-version }}
32 |           channels: defaults, conda-forge
33 |           channel-priority: flexible
34 |       - name: Install dependencies on Linux/MacOS
35 |         run: |
36 |           conda info
37 |           conda install pocl pyopencl
38 |           python -c 'import pyopencl as cl'
39 |         if: ${{ runner.os != 'Windows' }}
40 |       - name: Setup compyle config on MacOS to use openmp enabled clang from homebrew
41 |         run: |
42 |           brew install libomp
43 |           mkdir -p ~/.compyle
44 |           touch ~/.compyle/config.py
45 |           echo "import os" >> ~/.compyle/config.py
46 |           echo "os.environ['CC'] = '$(brew --prefix llvm@15)/bin/clang'" >> ~/.compyle/config.py
47 |           echo "os.environ['CXX'] = '$(brew --prefix llvm@15)/bin/clang++'" >> ~/.compyle/config.py
48 |           export CPPFLAGS="-I$(brew --prefix libomp)/include -I$(brew --prefix llvm@15)/include -Xclang -fopenmp"
49 |           export LDFLAGS="-L$(brew --prefix libomp)/lib -L$(brew --prefix llvm@15)/lib -lomp"
50 |           python -c "import os; OMP_CFLAGS=os.environ.get('CPPFLAGS').split(' '); print(f'{OMP_CFLAGS=}')" >> ~/.compyle/config.py
51 |           python -c "import os; OMP_LINK=os.environ.get('LDFLAGS').split(' '); print(f'{OMP_LINK=}')" >> ~/.compyle/config.py
52 |           cat ~/.compyle/config.py
53 |         if: ${{ runner.os == 'macOS' }}
54 |       - name: Install dependencies
55 |         run: |
56 |           conda info
57 |           conda install numpy cython
58 |           python -m pip install -r requirements.txt
59 |           python -m pip install coverage codecov
60 |           python -m pip install -e ".[dev]"
61 |       # Cache auto-generated code. Cache key changes every month.
62 |       # Thanks https://stackoverflow.com/a/60942437
63 |       - name: Get month to use as cache key.
64 |         id: month
65 |         run: echo "month=$(date +'%m')" >> $GITHUB_OUTPUT
66 |       - name: Deal with auto-generated code cache
67 |         uses: actions/cache@v4
68 |         with:
69 |           path: |
70 |             ~/.compyle
71 |           key: ${{ runner.os }}-${{ steps.month.outputs.month }}-${{ matrix.python-version }}
72 |       - name: Run tests
73 |         run: |
74 |           coverage erase
75 |           coverage run -m pytest -v
76 |       - name: Report
77 |         if: ${{ success() }}
78 |         run: coverage report
79 |       - name: Upload Coverage to Codecov
80 |         uses: codecov/codecov-action@v4
81 |         with:
82 |           env_vars: ${{ matrix.os }}, ${{ matrix.python-version }}
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.o
 3 | *.c
 4 | *.cpp
 5 | *~
 6 | *.so
 7 | build/
 8 | dist/
 9 | *.egg-info/
10 | .pytest_cache


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-24.04
10 |   tools:
11 |     python: "3.12"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/source/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #   - pdf
28 | #   - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | python:
34 |   install:
35 |     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
 1 | 0.9.1
 2 | ~~~~~~
 3 | 
 4 | * Release date: 23 May, 2025.
 5 | * Fix issue with unnecessary files to make a smaller source dist.
 6 | 
 7 | 
 8 | 0.9
 9 | ~~~~
10 | 
11 | * Release date: 23 May, 2025.
12 | * Allow user to pass a cython include directory when using ``ExtModule``.
13 | * Fix error with Cython compilation error messages not being shown.
14 | * Fix issue with the root log level being set when building an extension module.
15 | * Use cdivision always as that is usually the intent when using compyle.
16 | * Add a ``COMPYLE_DEBUG`` environment variable to print debug information.
17 | * Explicitly type any float literals for single precision to improve GPU performance.
18 | * Fix bug with the directory where the sources were saved.
19 | * Support for NumPy 2 and Cython 3.x.
20 | * Drop Python 2 support.
21 | * Do late binding so the backend can be changed.
22 | * Fix NumPy deprecation errors.
23 | 
24 | 
25 | 0.8.1
26 | ~~~~~~
27 | 
28 | * Release date: 7th November, 2021.
29 | * Fix issue with accidental file in sdist.
30 | 
31 | 
32 | 0.8
33 | ~~~~
34 | 
35 | * Release date: 7th November, 2021.
36 | * Improve array module to support more numpy like functionality.
37 | * Improve profile output so it works in a distributed setting.
38 | * Add support for a configuration file in ~/.compyle/config.py
39 | * Added `atomic_dec` support.
40 | * Fix output capturing on jupyter notebooks.
41 | * Fix issues due to ast changes in Python 3.9.x.
42 | * Fix tests on 32bit architectures.
43 | * Fix several bugs and issues.
44 | 
45 | 
46 | 0.7
47 | ~~~~
48 | 
49 | * Release date: 1st October, 2020.
50 | * Add convenient option to profile execution of code.
51 | * Add a convenient argument parser for scripts.
52 | * Add easy way to see generated sources.
53 | * Fix bug with installation of previous version.
54 | * Fix several bugs and issues.
55 | * Update the documentation.
56 | 
57 | 0.6
58 | ~~~~
59 | 
60 | * Release date: 15th June, 2020.
61 | * Add some non-trivial examples showcasing the package.
62 | * Document how one can use clang + OpenMP.
63 | * Add sorting, align, and other functions to array module.
64 | * Support for mapping structs on a GPU with CUDA.
65 | * Add address, cast, and address low-level functions.
66 | * Support for mako-templates for reducing repetitive code.
67 | * Bitwise operator support.
68 | * Attempt to auto-declare variables when possible.
69 | * Fix several bugs and issues.
70 | 
71 | 
72 | 
73 | 0.5
74 | ~~~~
75 | 
76 | * Release date: 3rd, December 2018
77 | * First public release.
78 | * Support for elementwise, scan, and reduction operations on CPU and GPU using
79 |   Cython, OpenCL and CUDA.
80 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Unless otherwise specified by LICENSE.txt files in individual
 2 | directories, all code is
 3 | 
 4 | Copyright (c) 2009-2018, the PySPH developers
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are
 9 | met:
10 | 
11 |  1. Redistributions of source code must retain the above copyright
12 |     notice, this list of conditions and the following disclaimer.
13 |  2. Redistributions in binary form must reproduce the above copyright
14 |     notice, this list of conditions and the following disclaimer in
15 |     the documentation and/or other materials provided with the
16 |     distribution.
17 |  3. Neither the name of the copyright holder nor the names of its contributors
18 |     may be used to endorse or promote products derived from this software
19 |     without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in *.py *.rst *.yml *.txt *.toml
2 | recursive-include compyle *.pyx
3 | recursive-exclude compyle *.cpp
4 | recursive-include docs *.*
5 | recursive-include examples *.*
6 | recursive-exclude docs/build *.*
7 | recursive-exclude examples/ *.png __pycache__/* .DS_Store
8 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Compyle: execute a subset of Python on HPC platforms
  2 | ======================================================
  3 | 
  4 | |CI Status| |Coverage Status| |Documentation Status|
  5 | 
  6 | 
  7 | .. |CI Status| image:: https://github.com/pypr/compyle/actions/workflows/tests.yml/badge.svg
  8 |     :target: https://github.com/pypr/compyle/actions/workflows/tests.yml
  9 | .. |Documentation Status| image:: https://readthedocs.org/projects/compyle/badge/?version=latest
 10 |     :target: https://compyle.readthedocs.io/en/latest/?badge=latest
 11 |     :alt: Documentation Status
 12 | .. |Coverage Status| image:: https://codecov.io/gh/pypr/compyle/branch/main/graph/badge.svg
 13 |     :target: https://codecov.io/gh/pypr/compyle
 14 | 
 15 | Compyle allows users to execute a restricted subset of Python (almost similar
 16 | to C) on a variety of HPC platforms. Currently we support multi-core CPU
 17 | execution using Cython, and for GPU devices we use OpenCL or CUDA.
 18 | 
 19 | Users start with code implemented in a very restricted Python syntax, this code
 20 | is then automatically transpiled, compiled and executed to run on either one CPU
 21 | core, or multiple CPU cores (via OpenMP_) or on a GPU. Compyle offers
 22 | source-to-source transpilation, making it a very convenient tool for writing HPC
 23 | libraries.
 24 | 
 25 | Some simple yet powerful parallel utilities are provided which can allow you
 26 | to solve a remarkably large number of interesting HPC problems. Compyle also
 27 | features JIT transpilation making it easy to use.
 28 | 
 29 | Documentation and learning material is also available in the form of:
 30 | 
 31 | - Documentation at: https://compyle.readthedocs.io
 32 | 
 33 | - An introduction to compyle in the context of writing a parallel molecular
 34 |   dynamics simulator is in our `SciPy 2020 paper
 35 |   <http://conference.scipy.org/proceedings/scipy2020/compyle_pr_ab.html>`_.
 36 | 
 37 | - `Compyle poster presentation <https://docs.google.com/presentation/d/1LS9XO5pQXz8G5d27RP5oWLFxUA-Fr5OvfVUGsgg86TQ/edit#slide=id.p>`_
 38 | 
 39 | - You may also try Compyle online for free on a `Google Colab notebook`_.
 40 | 
 41 | While Compyle seems simple it is not a toy and is used heavily by the PySPH_
 42 | project where Compyle has its origins.
 43 | 
 44 | .. _PySPH: https://github.com/pypr/pysph
 45 | .. _Google Colab notebook: https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing
 46 | 
 47 | 
 48 | Installation
 49 | -------------
 50 | 
 51 | Compyle is itself largely pure Python but depends on numpy_ and requires
 52 | either Cython_ or PyOpenCL_ or PyCUDA_ along with the respective backends of a
 53 | C/C++ compiler, OpenCL and CUDA. If you are only going to execute code on a
 54 | CPU then all you need is Cython.
 55 | 
 56 | You should be able to install Compyle by doing::
 57 | 
 58 |   $ pip install compyle
 59 | 
 60 | 
 61 | .. _PyOpenCL: https://documen.tician.de/pyopencl/
 62 | .. _OpenCL: https://www.khronos.org/opencl/
 63 | .. _Cython: http://www.cython.org
 64 | .. _numpy: http://www.numpy.org
 65 | .. _OpenMP: http://openmp.org/
 66 | .. _PyCUDA: https://documen.tician.de/pycuda/
 67 | 
 68 | A simple example
 69 | ----------------
 70 | 
 71 | Here is a very simple example::
 72 | 
 73 |    from compyle.api import Elementwise, annotate, wrap, get_config
 74 |    import numpy as np
 75 | 
 76 |    @annotate
 77 |    def axpb(i, x, y, a, b):
 78 |        y[i] = a*sin(x[i]) + b
 79 | 
 80 |    x = np.linspace(0, 1, 10000)
 81 |    y = np.zeros_like(x)
 82 |    a, b = 2.0, 3.0
 83 | 
 84 |    backend = 'cython'
 85 |    get_config().use_openmp = True
 86 |    x, y = wrap(x, y, backend=backend)
 87 |    e = Elementwise(axpb, backend=backend)
 88 |    e(x, y, a, b)
 89 | 
 90 | This will execute the elementwise operation in parallel using OpenMP with
 91 | Cython. The code is auto-generated, compiled and called for you transparently.
 92 | The first time this runs, it will take a bit of time to compile everything but
 93 | the next time, this is cached and will run much faster.
 94 | 
 95 | If you just change the ``backend = 'opencl'``, the same exact code will be
 96 | executed using PyOpenCL_ and if you change the backend to ``'cuda'``, it will
 97 | execute via CUDA without any other changes to your code. This is obviously a
 98 | very trivial example, there are more complex examples available as well.
 99 | 
100 | 
101 | Examples
102 | ---------
103 | 
104 | Some simple examples and benchmarks are available in the `examples
105 | <https://github.com/pypr/compyle/tree/main/examples>`_ directory.
106 | 
107 | You may also run these examples on the `Google Colab notebook`_
108 | 


--------------------------------------------------------------------------------
/compyle/__init__.py:
--------------------------------------------------------------------------------
1 | # See PEP 440 for more on suitable version numbers.
2 | __version__ = '0.10.dev0'
3 | 


--------------------------------------------------------------------------------
/compyle/api.py:
--------------------------------------------------------------------------------
 1 | from .array import Array, wrap
 2 | from .ast_utils import (get_symbols, get_assigned,
 3 |                         get_unknown_names_and_calls, has_return, has_node)
 4 | from .config import get_config, set_config, use_config, Config
 5 | from .cython_generator import (
 6 |     CythonGenerator, get_func_definition
 7 | )
 8 | from .ext_module import ExtModule
 9 | from .extern import Extern
10 | from .low_level import Kernel, LocalMem, Cython, cast
11 | from .parallel import (
12 |     Elementwise, Reduction, Scan, elementwise
13 | )
14 | from .profile import (
15 |     get_profile_info, named_profile, profile, profile_ctx, print_profile,
16 |     profile_kernel, ProfileContext, profile2csv
17 | )
18 | from .translator import (
19 |     CConverter, CStructHelper, OpenCLConverter, detect_type, ocl_detect_type,
20 |     py2c
21 | )
22 | from .types import KnownType, annotate, declare
23 | from .utils import ArgumentParser
24 | 


--------------------------------------------------------------------------------
/compyle/ast_utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities to work with the Python AST.
  2 | """
  3 | 
  4 | import ast
  5 | import sys
  6 | 
  7 | PY_VER = sys.version_info.major
  8 | 
  9 | basestring = str if PY_VER > 2 else basestring
 10 | 
 11 | 
 12 | class NameLister(ast.NodeVisitor):
 13 |     """Utility class to collect the Names in an AST.
 14 |     """
 15 |     def __init__(self, ctx=(ast.Load, ast.Store)):
 16 |         self.names = set()
 17 |         self.ctx = ctx
 18 | 
 19 |     def visit_Name(self, node):
 20 |         if isinstance(node.ctx, self.ctx):
 21 |             self.names.add(node.id)
 22 |         self.generic_visit(node)
 23 | 
 24 | 
 25 | class SymbolParser(ast.NodeVisitor):
 26 |     """Utility class to gather the used symbols in a block of code. We look at
 27 |     assignments, augmented assignments, function calls, and any Names. These
 28 |     are all parsed in one shot and collected.
 29 | 
 30 |     Note that this works best for a single function that is parsed rather than
 31 |     for a collection of functions.
 32 | 
 33 |     """
 34 |     def __init__(self):
 35 |         self.names = set()
 36 |         self.assign = set()
 37 |         self.calls = set()
 38 |         self.funcargs = set()
 39 |         self.func_name = ''
 40 |         self.ctx = (ast.Load, ast.Store)
 41 | 
 42 |     def visit_Name(self, node):
 43 |         if isinstance(node.ctx, self.ctx):
 44 |             self.names.add(node.id)
 45 |         self.generic_visit(node)
 46 | 
 47 |     def visit_AugAssign(self, node):
 48 |         if isinstance(node.target, ast.Name):
 49 |             self.assign.add(node.target.id)
 50 |         elif isinstance(node.target, ast.Subscript):
 51 |             v = node.target.value
 52 |             while not isinstance(v, ast.Name):
 53 |                 v = v.value
 54 |             self.assign.add(v.id)
 55 |         self.generic_visit(node)
 56 | 
 57 |     def visit_Assign(self, node):
 58 |         for target in node.targets:
 59 |             if isinstance(target, ast.Name):
 60 |                 self.assign.add(target.id)
 61 |             elif isinstance(target, ast.Subscript):
 62 |                 n = target.value
 63 |                 while not isinstance(n, ast.Name):
 64 |                     n = n.value
 65 |                 self.assign.add(n.id)
 66 |             elif isinstance(target, (ast.List, ast.Tuple)):
 67 |                 for n in target.elts:
 68 |                     if isinstance(n, ast.Name):
 69 |                         self.assign.add(n.id)
 70 |         self.generic_visit(node)
 71 | 
 72 |     def visit_Call(self, node):
 73 |         if isinstance(node.func, ast.Name):
 74 |             self.calls.add(node.func.id)
 75 |         self.generic_visit(node)
 76 | 
 77 |     def visit_FunctionDef(self, node):
 78 |         self.func_name = node.name
 79 |         if PY_VER == 2:
 80 |             self.funcargs.update(x.id for x in node.args.args)
 81 |             if node.args.vararg:
 82 |                 self.funcargs.add(node.args.vararg)
 83 |             if node.args.kwarg:
 84 |                 self.funcargs.add(node.args.kwarg)
 85 |         else:
 86 |             self.funcargs.update(x.arg for x in node.args.args)
 87 |             if node.args.vararg:
 88 |                 self.funcargs.add(node.args.vararg.arg)
 89 |             if node.args.kwarg:
 90 |                 self.funcargs.add(node.args.kwarg.arg)
 91 |             if node.args.kwonlyargs:
 92 |                 self.funcargs.update(x.arg for x in node.args.kwonlyargs)
 93 |         for arg in node.body:
 94 |             self.visit(arg)
 95 | 
 96 | 
 97 | def _get_tree(code):
 98 |     return ast.parse(code) if isinstance(code, basestring) else code
 99 | 
100 | 
101 | def get_symbols(code, ctx=(ast.Load, ast.Store)):
102 |     """Given an AST or code string return the symbols used therein.
103 | 
104 |     Parameters
105 |     ----------
106 | 
107 |     code: A code string or the result of an ast.parse.
108 | 
109 |     ctx: The context of the names, can be one of ast.Load, ast.Store, ast.Del.
110 |     """
111 |     tree = _get_tree(code)
112 |     n = NameLister(ctx=ctx)
113 |     n.visit(tree)
114 |     return n.names
115 | 
116 | 
117 | def get_assigned(code):
118 |     """Given an AST or code string return the symbols that are augmented
119 |     assigned or assigned.
120 | 
121 |     Parameters
122 |     ----------
123 | 
124 |     code: A code string or the result of an ast.parse.
125 | 
126 |     """
127 |     tree = _get_tree(code)
128 |     p = SymbolParser()
129 |     p.visit(tree)
130 |     return p.assign
131 | 
132 | 
133 | def get_unknown_names_and_calls(code):
134 |     """Given an AST or code string return the unknown variables and calls in
135 |     the code.  The function returns two sets, ``names, calls``.
136 | 
137 |     Parameters
138 |     ----------
139 | 
140 |     code: A code string or the result of an ast.parse.
141 | 
142 |     """
143 |     tree = ast.parse(code) if isinstance(code, basestring) else code
144 |     p = SymbolParser()
145 |     p.visit(tree)
146 |     funcargs = p.funcargs
147 |     if len(p.func_name) > 0:
148 |         funcargs.add(p.func_name)
149 |     names = p.names - funcargs - p.calls - p.assign
150 |     calls = p.calls
151 |     return names, calls
152 | 
153 | 
154 | def has_node(code, node):
155 |     """Given an AST or code string returns True if the code contains
156 |     any particular node statement.
157 | 
158 |     Parameters
159 |     ----------
160 | 
161 |     code: A code string or the result of an ast.parse.
162 | 
163 |     node: A node type or tuple of node types to check for.  If a tuple
164 |         is passed it returns True if any one of them is in the code.
165 |     """
166 |     tree = _get_tree(code)
167 |     for n in ast.walk(tree):
168 |         if isinstance(n, node):
169 |             return True
170 |     return False
171 | 
172 | 
173 | def has_return(code):
174 |     """Returns True of the node has a return statement.
175 |     """
176 |     return has_node(code, ast.Return)
177 | 


--------------------------------------------------------------------------------
/compyle/capture_stream.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import sys
  4 | from tempfile import mktemp
  5 | 
  6 | 
  7 | def get_ipython_capture():
  8 |     try:
  9 |         # This will work inside IPython but not outside it.
 10 |         name = get_ipython().__class__.__name__
 11 |         if name.startswith('ZMQ'):
 12 |             from IPython.utils.capture import capture_output
 13 |             return capture_output
 14 |         else:
 15 |             return None
 16 |     except NameError:
 17 |         return None
 18 | 
 19 | 
 20 | class CaptureStream(object):
 21 |     """A context manager which captures any errors on a given stream (like
 22 |     sys.stderr).  The stream is captured and the outputs can be used.
 23 | 
 24 |     We treat sys.stderr and stdout specially as very often these are
 25 |     overridden by nose or IPython.  We always wrap the underlying file
 26 |     descriptors in this case as this is the intent of this context manager.
 27 | 
 28 |     This is somewhat based on this question:
 29 |         http://stackoverflow.com/questions/7018879/disabling-output-when-compiling-with-distutils
 30 | 
 31 |     Examples
 32 |     --------
 33 | 
 34 |     See the tests in tests/test_capture_stream.py for example usage.
 35 |     """
 36 | 
 37 |     def __init__(self, stream=sys.stderr):
 38 |         self.stream = stream
 39 |         if stream is sys.stderr:
 40 |             self.fileno = 2
 41 |         elif stream is sys.stdout:
 42 |             self.fileno = 1
 43 |         else:
 44 |             self.fileno = stream.fileno()
 45 |         self.orig_stream = None
 46 |         self.tmp_stream = None
 47 |         self.tmp_path = ''
 48 |         self._cached_output = None
 49 | 
 50 |     def __enter__(self):
 51 |         if sys.platform.startswith('win32') and sys.version_info[:2] > (3, 5):
 52 |             return self
 53 |         self.orig_stream = os.dup(self.fileno)
 54 |         self.tmp_path = mktemp()
 55 |         self.tmp_stream = io.open(self.tmp_path, 'w+', encoding='utf-8')
 56 |         os.dup2(self.tmp_stream.fileno(), self.fileno)
 57 |         return self
 58 | 
 59 |     def __exit__(self, type, value, tb):
 60 |         if sys.platform.startswith('win32') and sys.version_info[:2] > (3, 5):
 61 |             return
 62 |         if self.orig_stream is not None:
 63 |             os.dup2(self.orig_stream, self.fileno)
 64 |         if self.tmp_stream is not None:
 65 |             self._cache_output()
 66 |             self.tmp_stream.close()
 67 |             os.remove(self.tmp_path)
 68 | 
 69 |     def _cache_output(self):
 70 |         if self._cached_output is not None:
 71 |             return
 72 |         tmp_stream = self.tmp_stream
 73 |         result = ''
 74 |         if tmp_stream is not None:
 75 |             tmp_stream.flush()
 76 |             tmp_stream.seek(0)
 77 |             result = tmp_stream.read()
 78 |         self._cached_output = result
 79 | 
 80 |     def get_output(self):
 81 |         """Return the captured output.
 82 |         """
 83 |         if self._cached_output is None:
 84 |             self._cache_output()
 85 |         return self._cached_output
 86 | 
 87 | 
 88 | class CaptureMultipleStreams(object):
 89 |     """This lets one capture multiple streams together.
 90 |     """
 91 |     def __init__(self, streams=None):
 92 |         streams = (sys.stdout, sys.stderr) if streams is None else streams
 93 |         self.streams = streams
 94 |         self.captures = [CaptureStream(x) for x in streams]
 95 |         cap = get_ipython_capture()
 96 |         if cap:
 97 |             self.jcap = cap(stdout=True, stderr=True, display=True)
 98 |         else:
 99 |             self.jcap = None
100 |         self.joutput = None
101 | 
102 |     def __enter__(self):
103 |         for capture in self.captures:
104 |             capture.__enter__()
105 |         if self.jcap:
106 |             self.joutput = self.jcap.__enter__()
107 |         return self
108 | 
109 |     def __exit__(self, type, value, tb):
110 |         for capture in self.captures:
111 |             capture.__exit__(type, value, tb)
112 |         if self.jcap:
113 |             self.jcap.__exit__(type, value, tb)
114 | 
115 |     def get_output(self):
116 |         out = list(x.get_output() for x in self.captures)
117 |         if self.joutput:
118 |             out[0] += self.joutput.stdout
119 |             out[1] += self.joutput.stderr
120 |         return out
121 | 


--------------------------------------------------------------------------------
/compyle/config.py:
--------------------------------------------------------------------------------
  1 | """Simple configuration options for PySPH.
  2 | 
  3 | Do not import any PySPH specific extensions here, if you must, do the import
  4 | inside the function/method.
  5 | """
  6 | 
  7 | from contextlib import contextmanager
  8 | 
  9 | 
 10 | class Config(object):
 11 |     def __init__(self):
 12 |         self._use_openmp = None
 13 |         self._use_opencl = None
 14 |         self._use_cuda = None
 15 |         self._use_double = None
 16 |         self._omp_schedule = None
 17 |         self._profile = None
 18 |         self._use_local_memory = None
 19 |         self._wgs = None
 20 |         self._suppress_warnings = None
 21 | 
 22 |     @property
 23 |     def suppress_warnings(self):
 24 |         if self._suppress_warnings is None:
 25 |             self._suppress_warnings = self._suppress_warnings_default()
 26 |         return self._suppress_warnings
 27 | 
 28 |     @suppress_warnings.setter
 29 |     def suppress_warnings(self, value):
 30 |         self._suppress_warnings = value
 31 | 
 32 |     def _suppress_warnings_default(self):
 33 |         return False
 34 | 
 35 |     @property
 36 |     def use_openmp(self):
 37 |         if self._use_openmp is None:
 38 |             self._use_openmp = self._use_openmp_default()
 39 |         return self._use_openmp
 40 | 
 41 |     @use_openmp.setter
 42 |     def use_openmp(self, value):
 43 |         self._use_openmp = value
 44 | 
 45 |     def _use_openmp_default(self):
 46 |         return False
 47 | 
 48 |     @property
 49 |     def omp_schedule(self):
 50 |         if self._omp_schedule is None:
 51 |             self._omp_schedule = self._omp_schedule_default()
 52 |         return self._omp_schedule
 53 | 
 54 |     @omp_schedule.setter
 55 |     def omp_schedule(self, value):
 56 |         if len(value) != 2 or \
 57 |                 value[0].lower() not in ("static", "dynamic", "guided"):
 58 |             raise ValueError("Invalid OpenMP Schedule: {}".format(value))
 59 | 
 60 |         self._omp_schedule = value
 61 | 
 62 |     def set_omp_schedule(self, omp_schedule):
 63 |         """
 64 |         Expects input to be in the format used by OMP_SCHEDULE
 65 |         i.e. "schedule_type, chunk_size"
 66 |         """
 67 |         temp = omp_schedule.split(",")
 68 |         if len(temp) == 2:
 69 |             self.omp_schedule = (temp[0], int(temp[1]))
 70 |         else:
 71 |             self.omp_schedule = (temp[0], None)
 72 | 
 73 |     def _omp_schedule_default(self):
 74 |         return ("dynamic", 64)
 75 | 
 76 |     @property
 77 |     def use_opencl(self):
 78 |         if self._use_opencl is None:
 79 |             self._use_opencl = self._use_opencl_default()
 80 |         return self._use_opencl
 81 | 
 82 |     @use_opencl.setter
 83 |     def use_opencl(self, value):
 84 |         self._use_opencl = value
 85 | 
 86 |     def _use_opencl_default(self):
 87 |         return False
 88 | 
 89 |     @property
 90 |     def use_cuda(self):
 91 |         if self._use_cuda is None:
 92 |             self._use_cuda = self._use_cuda_default()
 93 |         return self._use_cuda
 94 | 
 95 |     @use_cuda.setter
 96 |     def use_cuda(self, value):
 97 |         self._use_cuda = value
 98 | 
 99 |     def _use_cuda_default(self):
100 |         return False
101 | 
102 |     @property
103 |     def use_double(self):
104 |         """This is only used by OpenCL code.
105 |         """
106 |         if self._use_double is None:
107 |             self._use_double = self._use_double_default()
108 |         return self._use_double
109 | 
110 |     @use_double.setter
111 |     def use_double(self, value):
112 |         """This is only used by OpenCL code.
113 |         """
114 |         self._use_double = value
115 | 
116 |     def _use_double_default(self):
117 |         return False
118 | 
119 |     @property
120 |     def profile(self):
121 |         if self._profile is None:
122 |             self._profile = self._profile_default()
123 |         return self._profile
124 | 
125 |     @profile.setter
126 |     def profile(self, value):
127 |         self._profile = value
128 | 
129 |     def _profile_default(self):
130 |         return False
131 | 
132 |     @property
133 |     def use_local_memory(self):
134 |         if self._use_local_memory is None:
135 |             self._use_local_memory = self._use_local_memory_default()
136 |         return self._use_local_memory
137 | 
138 |     @use_local_memory.setter
139 |     def use_local_memory(self, value):
140 |         self._use_local_memory = value
141 | 
142 |     def _use_local_memory_default(self):
143 |         return False
144 | 
145 |     @property
146 |     def wgs(self):
147 |         if self._wgs is None:
148 |             self._wgs = self._wgs_default()
149 |         return self._wgs
150 | 
151 |     @wgs.setter
152 |     def wgs(self, value):
153 |         self._wgs = value
154 | 
155 |     def _wgs_default(self):
156 |         return 32
157 | 
158 | 
159 | _config = None
160 | 
161 | 
162 | def get_config():
163 |     global _config
164 |     if _config is None:
165 |         _config = Config()
166 |     return _config
167 | 
168 | 
169 | def set_config(config):
170 |     global _config
171 |     _config = config
172 | 
173 | 
174 | @contextmanager
175 | def use_config(**kw):
176 |     """A context manager for the configuration.
177 | 
178 |     One can do the following::
179 | 
180 |         with use_config(use_openmp=True) as cfg:
181 |             do_something()
182 |             cfg.use_opencl = True
183 |             do_something_else()
184 | 
185 |     The configuration will be restored to the original when one exits the
186 |     context. Inside the scope of the with statement the configuration ``cfg``
187 |     is the one operational and so can be changed.
188 |     """
189 |     orig_cfg = get_config()
190 |     cfg = Config()
191 |     for k, v in kw.items():
192 |         setattr(cfg, k, v)
193 | 
194 |     set_config(cfg)
195 | 
196 |     try:
197 |         yield cfg
198 |     finally:
199 |         set_config(orig_cfg)
200 | 


--------------------------------------------------------------------------------
/compyle/extern.py:
--------------------------------------------------------------------------------
 1 | class Extern(object):
 2 |     """A simple way to support external functions and symbols.
 3 |     """
 4 |     def link(self, backend):
 5 |         """Return a list of extra link args."""
 6 |         return []
 7 | 
 8 |     def code(self, backend):
 9 |         """Return suitable code as a string.
10 | 
11 |         This code is injected at the top of the generated code.
12 |         """
13 |         raise NotImplementedError()
14 | 
15 |     def __call__(self, *args, **kw):
16 |         """Implement for a pure Python implementation if needed.
17 |         """
18 |         raise NotImplementedError()
19 | 
20 | 
21 | class _printf(Extern):
22 |     def code(self, backend):
23 |         # Always available so no need but in Cython we explicitly define it as
24 |         # an example.
25 | 
26 |         if backend == 'cython':
27 |             return 'from libc.studio cimport printf'
28 |         return ''
29 | 
30 |     def __call__(self, *args):
31 |         print(args[0] % args[1:])
32 | 
33 | 
34 | # Now make it available publicly.
35 | printf = _printf()
36 | 
37 | # More examples are available in the low_level.py module.
38 | 
39 | 
40 | def get_extern_code(externs, backend):
41 |     links = []
42 |     code = []
43 |     for ex in externs:
44 |         link = ex.link(backend)
45 |         if link:
46 |             links.extend(link)
47 |         c = ex.code(backend)
48 |         if c:
49 |             code.append(c)
50 | 
51 |     return links, code
52 | 


--------------------------------------------------------------------------------
/compyle/opencl.py:
--------------------------------------------------------------------------------
  1 | """Common OpenCL related functionality.
  2 | """
  3 | from __future__ import print_function
  4 | import pyopencl as cl
  5 | 
  6 | from .config import get_config
  7 | from .profile import profile_kernel, named_profile
  8 | 
  9 | _ctx = None
 10 | _queue = None
 11 | 
 12 | 
 13 | class DeviceWGSException(Exception):
 14 |     pass
 15 | 
 16 | 
 17 | def get_context():
 18 |     global _ctx
 19 |     if _ctx is None:
 20 |         _ctx = cl.create_some_context()
 21 |     return _ctx
 22 | 
 23 | 
 24 | def set_context(ctx):
 25 |     global _ctx
 26 |     _ctx = ctx
 27 | 
 28 | 
 29 | def get_queue():
 30 |     global _queue
 31 |     if _queue is None:
 32 |         kwargs = dict()
 33 |         if get_config().profile:
 34 |             kwargs['properties'] = cl.command_queue_properties.PROFILING_ENABLE
 35 |         _queue = cl.CommandQueue(get_context(), **kwargs)
 36 |     return _queue
 37 | 
 38 | 
 39 | def set_queue(q):
 40 |     global _queue
 41 |     _queue = q
 42 | 
 43 | 
 44 | class SimpleKernel(object):
 45 |     """ElementwiseKernel substitute that supports a custom work group size.
 46 |     """
 47 | 
 48 |     def __init__(self, ctx, args, operation, wgs,
 49 |                  name="", preamble="", options=[]):
 50 |         self.args = args
 51 |         self.operation = operation
 52 |         self.name = name
 53 |         self.preamble = preamble
 54 |         self.options = options
 55 | 
 56 |         self.prg = cl.Program(ctx, self._generate()).build(options)
 57 |         self.knl = getattr(self.prg, name)
 58 | 
 59 |         if self.get_max_wgs() < wgs:
 60 |             raise DeviceWGSException("")
 61 | 
 62 |     def _massage_arg(self, arg):
 63 |         if '*' in arg:
 64 |             return "__global " + arg
 65 |         return arg
 66 | 
 67 |     def _generate(self):
 68 |         args = [self._massage_arg(arg) for arg in self.args.split(",")]
 69 | 
 70 |         source = r"""
 71 |         %(preamble)s
 72 | 
 73 |         __kernel void %(name)s(%(args)s)
 74 |         {
 75 |           int lid = get_local_id(0);
 76 |           int gsize = get_global_size(0);
 77 |           int work_group_start = get_local_size(0)*get_group_id(0);
 78 |           long i = get_global_id(0);
 79 | 
 80 |           %(body)s
 81 |         }
 82 |         """ % {
 83 |             "args": ",".join(args),
 84 |             "name": self.name,
 85 |             "preamble": self.preamble,
 86 |             "body": self.operation
 87 |         }
 88 | 
 89 |         return source
 90 | 
 91 |     def get_max_wgs(self):
 92 |         return self.knl.get_work_group_info(
 93 |             cl.kernel_work_group_info.WORK_GROUP_SIZE,
 94 |             get_queue().device
 95 |         )
 96 | 
 97 |     def __call__(self, *args, **kwargs):
 98 |         wait_for = kwargs.pop("wait_for", None)
 99 |         queue = kwargs.pop("queue", None)
100 |         gs = kwargs.pop("gs", None)
101 |         ls = kwargs.pop("ls", None)
102 | 
103 |         if queue is None or gs is None or ls is None:
104 |             raise ValueError("queue, gs and ls can not be empty")
105 | 
106 |         if kwargs:
107 |             raise TypeError("unknown keyword arguments: '%s'"
108 |                             % ", ".join(kwargs))
109 | 
110 |         def unwrap(arg):
111 |             return arg.data if isinstance(arg, cl.array.Array) else arg
112 | 
113 |         self.knl.set_args(*[unwrap(arg) for arg in args])
114 |         return cl.enqueue_nd_range_kernel(queue, self.knl, gs, ls,
115 |                                           wait_for=wait_for)
116 | 


--------------------------------------------------------------------------------
/compyle/profile.py:
--------------------------------------------------------------------------------
  1 | """ Utils for profiling kernels
  2 | """
  3 | 
  4 | from contextlib import contextmanager
  5 | from collections import defaultdict
  6 | import time
  7 | from .config import get_config
  8 | 
  9 | 
 10 | def _make_default():
 11 |     return dict(calls=0, time=0.0)
 12 | 
 13 | 
 14 | _current_level = 0
 15 | _profile_info = defaultdict(
 16 |     lambda: defaultdict(_make_default)
 17 | )
 18 | 
 19 | 
 20 | def _record_profile(name, time):
 21 |     global _profile_info, _current_level
 22 |     li = _profile_info[_current_level]
 23 |     li[name]['time'] += time
 24 |     li[name]['calls'] += 1
 25 | 
 26 | 
 27 | @contextmanager
 28 | def profile_ctx(name):
 29 |     """ Context manager for profiling
 30 | 
 31 |     For profiling a function f, it can be used as follows::
 32 | 
 33 |     with profile_ctx('f'):
 34 |         f()
 35 |     """
 36 |     global _current_level
 37 |     _current_level += 1
 38 |     start = time.time()
 39 |     try:
 40 |         yield start
 41 |         end = time.time()
 42 |     finally:
 43 |         _current_level -= 1
 44 |     _record_profile(name, end - start)
 45 | 
 46 | 
 47 | def profile(method=None, name=None):
 48 |     """Decorator for profiling a function. Can be used as follows::
 49 | 
 50 |     @profile
 51 |     def f():
 52 |         pass
 53 | 
 54 | 
 55 |     If explicitly passed a name, with @profile(name='some name'), it will use
 56 |     the given name. Otherwise, if the function is a class method, and the class
 57 |     has a `self.name` attribute, it will use that. Otherwise, it will use the
 58 |     method's qualified name to record the profile.
 59 | 
 60 |     """
 61 |     def make_wrapper(method):
 62 |         def wrapper(*args, **kwargs):
 63 |             self = args[0] if len(args) else None
 64 |             if name is None:
 65 |                 if hasattr(self, method.__name__) and hasattr(self, 'name'):
 66 |                     p_name = self.name
 67 |                 else:
 68 |                     p_name = getattr(method, '__qualname__', method.__name__)
 69 |             else:
 70 |                 p_name = name
 71 |             with profile_ctx(p_name):
 72 |                 return method(*args, **kwargs)
 73 |         wrapper.__doc__ = method.__doc__
 74 |         return wrapper
 75 |     if method is None:
 76 |         return make_wrapper
 77 |     else:
 78 |         return make_wrapper(method)
 79 | 
 80 | 
 81 | class ProfileContext:
 82 |     """Used for a low-level profiling context.
 83 | 
 84 |     This is typically useful in Cython code where decorators are not usable and
 85 |     using a context manager makes the code hard to read.
 86 | 
 87 |     Example
 88 |     -------
 89 | 
 90 |     p = ProfileContext('some_func')
 91 |     do_something()
 92 |     p.stop()
 93 | 
 94 |     """
 95 |     def __init__(self, name):
 96 |         self.name = name
 97 |         global _current_level
 98 |         _current_level += 1
 99 |         self.start = time.time()
100 | 
101 |     def stop(self):
102 |         global _current_level
103 |         _current_level -= 1
104 |         _record_profile(self.name, time.time() - self.start)
105 | 
106 | 
107 | def get_profile_info():
108 |     global _profile_info
109 |     return _profile_info
110 | 
111 | 
112 | def print_profile():
113 |     global _profile_info
114 |     hr = '-'*70
115 |     print(hr)
116 |     if len(_profile_info) == 0:
117 |         print("No profiling information available")
118 |         print(hr)
119 |         return
120 |     print("Profiling info:")
121 |     print(
122 |         "{:<6} {:<40} {:<10} {:<10}".format(
123 |             'Level', 'Function', 'N calls', 'Time')
124 |     )
125 |     tot_time = 0
126 |     for level in range(0, min(len(_profile_info), 2)):
127 |         profile_data = sorted(
128 |             _profile_info[level].items(), key=lambda x: x[1]['time'],
129 |             reverse=True
130 |         )
131 |         for kernel, data in profile_data:
132 |             print("{:<6} {:<40} {:<10} {:<10.3g}".format(
133 |                 level, kernel, data['calls'], data['time'])
134 |             )
135 |             if level == 0:
136 |                 tot_time += data['time']
137 |     print("Total profiled time: %g secs" % tot_time)
138 |     print(hr)
139 | 
140 | 
141 | def profile2csv(fname, info=None):
142 |     '''Write profile info to a CSV file.
143 | 
144 |     If the optional info argument is passed, it is used as the profile info.
145 |     The `info` argument is a list, potentially one for each rank (for a
146 |     parallel simulation).
147 |     '''
148 |     if info is None:
149 |         info = [get_profile_info()]
150 |     with open(fname, 'w') as f:
151 |         f.write("{0},{1},{2},{3},{4}\n".format(
152 |             'rank', 'level', 'function', 'calls', 'time')
153 |         )
154 |         for rank in range(len(info)):
155 |             pdata = info[rank]
156 |             for level in sorted(pdata.keys()):
157 |                 profile_data = sorted(
158 |                     pdata[level].items(), key=lambda x: x[1]['time'],
159 |                     reverse=True
160 |                 )
161 |                 for name, data in profile_data:
162 |                     f.write("{0},{1},{2},{3},{4}\n".format(
163 |                         rank, level, name, data['calls'], data['time']
164 |                     ))
165 | 
166 | 
167 | def profile_kernel(kernel, name, backend=None):
168 |     """For profiling raw PyCUDA/PyOpenCL kernels or cython functions
169 |     """
170 |     from compyle.array import get_backend
171 |     backend = get_backend(backend)
172 | 
173 |     def _profile_knl(*args, **kwargs):
174 |         if backend == 'opencl':
175 |             start = time.time()
176 |             event = kernel(*args, **kwargs)
177 |             event.wait()
178 |             end = time.time()
179 |             _record_profile(name, end - start)
180 |             return event
181 |         elif backend == 'cuda':
182 |             exec_time = kernel(*args, **kwargs, time_kernel=True)
183 |             _record_profile(name, exec_time)
184 |             return exec_time
185 |         else:
186 |             start = time.time()
187 |             kernel(*args, **kwargs)
188 |             end = time.time()
189 |             _record_profile(name, end - start)
190 | 
191 |     if get_config().profile:
192 |         wgi = getattr(kernel, 'get_work_group_info', None)
193 |         if wgi is not None:
194 |             _profile_knl.get_work_group_info = wgi
195 |         return _profile_knl
196 |     else:
197 |         return kernel
198 | 
199 | 
200 | def named_profile(name, backend=None):
201 |     """Decorator for profiling raw PyOpenCL/PyCUDA kernels or cython functions.
202 |     This can be used on a function that returns a raw PyCUDA/PyOpenCL kernel
203 | 
204 |     For example::
205 | 
206 |     @named_profile('prefix_sum')
207 |     def _get_prefix_sum(ctx):
208 |         return GenericScanKernel(ctx, np.int32,
209 |                                  arguments="__global int *ary",
210 |                                  input_expr="ary[i]",
211 |                                  scan_expr="a+b", neutral="0",
212 |                                  output_statement="ary[i] = prev_item")
213 |     """
214 |     from compyle.array import get_backend
215 |     backend = get_backend(backend)
216 | 
217 |     def _decorator(f):
218 |         if name is None:
219 |             n = f.__name__
220 |         else:
221 |             n = name
222 | 
223 |         def _profiled_kernel_generator(*args, **kwargs):
224 |             kernel = f(*args, **kwargs)
225 |             return profile_kernel(kernel, n, backend=backend)
226 | 
227 |         return _profiled_kernel_generator
228 | 
229 |     return _decorator
230 | 


--------------------------------------------------------------------------------
/compyle/sort.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .config import get_config
 4 | from .cython_generator import get_parallel_range, CythonGenerator
 5 | from .transpiler import Transpiler, convert_to_float_if_needed
 6 | from .types import dtype_to_ctype, annotate
 7 | from .parallel import Scan
 8 | from .template import Template
 9 | 
10 | from . import array
11 | 
12 | 
13 | class OutputSortBit(Template):
14 |     def __init__(self, name, num_arys):
15 |         super(OutputSortBit, self).__init__(name=name)
16 |         self.num_arys = num_arys
17 | 
18 |     def extra_args(self):
19 |         args = ['inp_%s' % num for num in range(self.num_arys)]
20 |         args += ['out_%s' % num for num in range(self.num_arys)]
21 |         return args, {}
22 | 
23 |     def template(self, i, item, prev_item, last_item, bit_number, indices,
24 |                  sorted_indices):
25 |         '''
26 |         key_bit = (inp_0[i] >> bit_number) & 1
27 |         t = last_item + i - prev_item
28 |         idx = t if key_bit else prev_item
29 | 
30 |         sorted_indices[idx] = indices[i]
31 |         % for num in range(obj.num_arys):
32 |         out_${num}[idx] = inp_${num}[i]
33 |         % endfor
34 |         '''
35 | 
36 | 
37 | @annotate
38 | def input_sort_bit(i, inp_0, bit_number):
39 |     return 1 if (inp_0[i] >> bit_number) & 1 == 0 else 0
40 | 
41 | 
42 | def radix_sort(ary_list, out_list=None, max_key_bits=None, backend=None):
43 |     keys = ary_list[0]
44 |     backend = array.get_backend(backend)
45 |     if not np.issubdtype(keys.dtype, np.integer):
46 |         raise ValueError("RadixSort can only sort integer types")
47 |     if max_key_bits is None:
48 |         max_key_bits = 8 * keys.dtype.itemsize
49 | 
50 |     # temp arrays
51 |     sorted_indices = array.zeros(keys.length, np.int32, backend=backend)
52 |     temp_indices = array.zeros_like(sorted_indices)
53 | 
54 |     indices = array.arange(0, keys.length, 1, backend=backend)
55 | 
56 |     # allocate temp arrays
57 |     if out_list:
58 |         temp_ary_list = out_list
59 |     else:
60 |         temp_ary_list = [array.zeros_like(ary) for ary in ary_list]
61 |     sorted_ary_list = [array.zeros_like(ary) for ary in ary_list]
62 | 
63 |     # kernel
64 |     output_sort_bit = OutputSortBit('output_sort_bit', len(ary_list))
65 | 
66 |     sort_bit_knl = Scan(input_sort_bit, output_sort_bit.function,
67 |                         'a+b', dtype=keys.dtype, backend=backend)
68 | 
69 |     for bit_number in range(max_key_bits):
70 |         if bit_number == 0:
71 |             inp_indices = indices
72 |             inp_ary_list = ary_list
73 |         else:
74 |             inp_indices = temp_indices
75 |             inp_ary_list = temp_ary_list
76 | 
77 |         args = {'bit_number': bit_number, 'indices': indices,
78 |                 'sorted_indices': sorted_indices}
79 |         args.update({'inp_%i' % i: ary for i, ary in enumerate(inp_ary_list)})
80 |         args.update({'out_%i' %
81 |                      i: ary for i, ary in enumerate(sorted_ary_list)})
82 | 
83 |         sort_bit_knl(**args)
84 | 
85 |         temp_indices, sorted_indices = sorted_indices, temp_indices
86 |         temp_ary_list, sorted_ary_list = sorted_ary_list, temp_ary_list
87 | 
88 |     return temp_ary_list, temp_indices
89 | 


--------------------------------------------------------------------------------
/compyle/template.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import inspect
  3 | from textwrap import dedent
  4 | 
  5 | from .types import kwtype_to_annotation
  6 | import mako.template
  7 | 
  8 | 
  9 | getfullargspec = inspect.getfullargspec
 10 | 
 11 | 
 12 | class Template(object):
 13 |     def __init__(self, name):
 14 |         self.name = name
 15 |         self._function = None
 16 | 
 17 |     @property
 18 |     def function(self):
 19 |         if self._function is None:
 20 |             self._function = self._make_function()
 21 |         return self._function
 22 | 
 23 |     def _make_function(self):
 24 |         src, annotations = self._get_code()
 25 |         self._source = src
 26 |         namespace = {}
 27 |         exec(src, namespace)
 28 |         f = namespace[self.name]
 29 |         f.__module__ = self.__module__
 30 |         f.is_jit = len(annotations) == 0
 31 |         try:
 32 |             f.__annotations__ = annotations
 33 |         except AttributeError:
 34 |             f.im_func.__annotations__ = annotations
 35 |         f.source = src
 36 |         return f
 37 | 
 38 |     def _get_code(self):
 39 |         m = ast.parse(dedent(inspect.getsource(self.template)))
 40 |         argspec = getfullargspec(self.template)
 41 |         args = argspec.args
 42 |         if args[0] == 'self':
 43 |             args = args[1:]
 44 |         extra_args, extra_annotations = self.extra_args()
 45 |         args += extra_args
 46 |         arg_string = ', '.join(args)
 47 |         body = m.body[0].body
 48 |         template = body[-1].value.s
 49 |         docstring = body[0].value.s if len(body) == 2 else ''
 50 |         name = self.name
 51 |         sig = 'def {name}({args}):\n    """{docs}\n    """'.format(
 52 |             name=name, args=arg_string, docs=docstring
 53 |         )
 54 |         src = sig + self.render(template)
 55 |         annotations = getattr(self.template, '__annotations__', {})
 56 |         data = kwtype_to_annotation(extra_annotations)
 57 |         annotations.update(data)
 58 |         return src, annotations
 59 | 
 60 |     def inject(self, func, indent=1):
 61 |         '''Returns the source code of the body of `func`.
 62 | 
 63 |         The optional `indent` parameter is the indentation to be used for the
 64 |         code.  When indent is 1, 4 spaces are added to each line.
 65 | 
 66 |         This is meant to be used from the mako template. The idea is that one
 67 |         can define the code to be injected as a method and have the body be
 68 |         directly injected.
 69 |         '''
 70 |         lines = inspect.getsourcelines(func)[0]
 71 |         src = dedent(''.join(lines))
 72 |         m = ast.parse(src)
 73 |         # We do this so as to not inject any docstrings.
 74 |         body_start_index = 1 if isinstance(m.body[0].body[0], ast.Expr) else 0
 75 |         body_start = m.body[0].body[body_start_index].lineno - 1
 76 |         body_lines = lines[body_start:]
 77 |         first = body_lines[0]
 78 |         leading = first.index(first.lstrip())
 79 |         diff = indent*4 - leading
 80 |         if diff < 0:
 81 |             indented_body = [x[-diff:] for x in body_lines]
 82 |         else:
 83 |             indented_body = [' '*diff + x for x in body_lines]
 84 |         return ''.join(indented_body)
 85 | 
 86 |     def render(self, src):
 87 |         t = mako.template.Template(text=src)
 88 |         return t.render(obj=self)
 89 | 
 90 |     def extra_args(self):
 91 |         '''Override this to provide configurable arguments.
 92 | 
 93 |         Return a list of strings which are the arguments and a dictionary with
 94 |         the type annotations.
 95 | 
 96 |         '''
 97 |         return [], {}
 98 | 
 99 |     def template(self):
100 |         '''Override this to write your mako template.
101 | 
102 |         `obj` is mapped to self.
103 |         '''
104 |         '''
105 |         ## Mako code here.
106 |         '''
107 | 


--------------------------------------------------------------------------------
/compyle/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pypr/compyle/bc858bab005f2cc9990267448c5b873d4b5f8635/compyle/tests/__init__.py


--------------------------------------------------------------------------------
/compyle/tests/py3_code.py:
--------------------------------------------------------------------------------
 1 | # Python3 specific code for some tests.
 2 | 
 3 | from ..types import int_, declare
 4 | 
 5 | 
 6 | def py3_f(x: int_) -> int_:
 7 |     y = declare('int')
 8 |     y = x + 1
 9 |     return x*y
10 | 


--------------------------------------------------------------------------------
/compyle/tests/test_ast_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import ast
  3 | import sys
  4 | from textwrap import dedent
  5 | import unittest
  6 | 
  7 | from ..ast_utils import (
  8 |     get_assigned, get_symbols, get_unknown_names_and_calls,
  9 |     has_node, has_return
 10 | )
 11 | 
 12 | 
 13 | class TestASTUtils(unittest.TestCase):
 14 |     def test_get_symbols(self):
 15 |         code = '''
 16 |         x = 1
 17 |         d_x[d_idx] += s_x[s_idx]
 18 |         '''
 19 |         tree = ast.parse(dedent(code))
 20 |         result = list(get_symbols(tree))
 21 |         result.sort()
 22 |         expect = ['d_idx', 'd_x', 's_idx', 's_x', 'x']
 23 |         self.assertEqual(result, expect)
 24 | 
 25 |         # Test if it parses with the code itself instead of a tree.
 26 |         result = list(get_symbols(dedent(code)))
 27 |         result.sort()
 28 |         self.assertEqual(result, expect)
 29 | 
 30 |         result = list(get_symbols(tree, ctx=ast.Store))
 31 |         result.sort()
 32 |         self.assertEqual(result, ['x'])
 33 | 
 34 |     def test_has_return(self):
 35 |         code = dedent('''
 36 |                 x = 1
 37 |                 ''')
 38 |         self.assertFalse(has_return(code))
 39 |         code = dedent('''
 40 |                 def f():
 41 |                     pass
 42 |                 ''')
 43 |         self.assertFalse(has_return(code))
 44 |         code = dedent('''
 45 |                 def f(x):
 46 |                     return x+1
 47 |                 ''')
 48 |         self.assertTrue(has_return(code))
 49 | 
 50 |     def test_has_node(self):
 51 |         code = dedent('''
 52 |                 x = 1
 53 |                 ''')
 54 |         self.assertFalse(has_node(code, (ast.Return, ast.AugAssign)))
 55 |         code = dedent('''
 56 |                 def f():
 57 |                     pass
 58 |                 ''')
 59 |         self.assertTrue(has_node(code, (ast.AugAssign, ast.FunctionDef)))
 60 | 
 61 |     def test_assigned_values(self):
 62 |         code = dedent('''
 63 |             u[0] = 0.0
 64 |             x = 1
 65 |             y = sin(x)*theta
 66 |             z += 1
 67 |             ''')
 68 |         assigned = list(sorted(get_assigned(code)))
 69 |         # sin or theta should not be detected.
 70 |         expect = ['u', 'x', 'y', 'z']
 71 |         self.assertEqual(assigned, expect)
 72 | 
 73 |     def test_assigned_tuple_expansion(self):
 74 |         code = dedent('''
 75 |             u, v = 0.0, 1.0
 76 |             [x, y] = 0.0, 1.0
 77 |             ''')
 78 |         assigned = list(sorted(get_assigned(code)))
 79 |         expect = ['u', 'v', 'x', 'y']
 80 |         self.assertEqual(assigned, expect)
 81 | 
 82 |     def test_get_unknown_names_and_calls(self):
 83 |         code = dedent('''
 84 |         def f(x):
 85 |             g(h(x))
 86 |             y = x + SIZE
 87 |             for i in range(y):
 88 |                 x += func(JUNK)
 89 |             sin(x)
 90 |         ''')
 91 | 
 92 |         # When
 93 |         names, calls = get_unknown_names_and_calls(code)
 94 | 
 95 |         # Then.
 96 |         e_names = {'SIZE', 'i', 'JUNK'}
 97 |         e_calls = {'g', 'h', 'range', 'func', 'sin'}
 98 |         self.assertSetEqual(names, e_names)
 99 |         self.assertSetEqual(calls, e_calls)
100 | 
101 |     @unittest.skipIf(sys.version_info < (3, 4),
102 |                      reason='Test requires Python 3.')
103 |     def test_get_unknown_names_and_calls_with_py3_annotation(self):
104 |         code = dedent('''
105 |         from compyle import types as T
106 | 
107 |         def f(x: T.doublep, n: T.int_)-> T.double:
108 |             s = declare('double')
109 |             for i in range(n):
110 |                 s += func(x)
111 |             return s
112 |         ''')
113 | 
114 |         # When
115 |         names, calls = get_unknown_names_and_calls(code)
116 | 
117 |         # Then.
118 |         e_names = {'i'}
119 |         e_calls = {'declare', 'func', 'range'}
120 |         self.assertSetEqual(names, e_names)
121 |         self.assertSetEqual(calls, e_calls)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     unittest.main()
126 | 


--------------------------------------------------------------------------------
/compyle/tests/test_capture_stream.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | import unittest
 4 | 
 5 | import pytest
 6 | 
 7 | from ..capture_stream import CaptureMultipleStreams, CaptureStream
 8 | 
 9 | if sys.platform.startswith("win32") and sys.version_info[:2] > (3, 5):
10 |     pytest.skip("skipping capture tests on windows", allow_module_level=True)
11 | 
12 | 
13 | def write_stderr():
14 |     subprocess.call(
15 |         [sys.executable, "-S", "-s", "-c",
16 |          "import sys;sys.stderr.write('stderr')"]
17 |     )
18 | 
19 | 
20 | def write_stdout():
21 |     subprocess.call(
22 |         [sys.executable, "-S", "-s", "-c",
23 |          "import sys;sys.stdout.write('stdout')"]
24 |     )
25 | 
26 | 
27 | class TestCaptureStream(unittest.TestCase):
28 |     def test_that_stderr_is_captured_by_default(self):
29 |         # Given
30 |         # When
31 |         with CaptureStream() as stream:
32 |             write_stderr()
33 |         # Then
34 |         self.assertEqual(stream.get_output(), "stderr")
35 | 
36 |     def test_that_stdout_can_be_captured(self):
37 |         # Given
38 |         # When
39 |         with CaptureStream(sys.stdout) as stream:
40 |             write_stdout()
41 |         # Then
42 |         self.assertEqual(stream.get_output(), "stdout")
43 | 
44 |     def test_that_output_is_available_in_context_and_outside(self):
45 |         # Given
46 |         # When
47 |         with CaptureStream(sys.stderr) as stream:
48 |             write_stderr()
49 |             # Then
50 |             self.assertEqual(stream.get_output(), "stderr")
51 | 
52 |         # Then
53 |         self.assertEqual(stream.get_output(), "stderr")
54 | 
55 | 
56 | class TestCaptureMultipleStreams(unittest.TestCase):
57 |     def test_that_stdout_stderr_are_captured_by_default(self):
58 |         # Given
59 |         # When
60 |         with CaptureMultipleStreams() as stream:
61 |             write_stderr()
62 |             write_stdout()
63 |         # Then
64 |         outputs = stream.get_output()
65 |         self.assertEqual(outputs[0], "stdout")
66 |         self.assertEqual(outputs[1], "stderr")
67 | 
68 |     def test_that_order_is_preserved(self):
69 |         # Given
70 |         # When
71 |         with CaptureMultipleStreams((sys.stderr, sys.stdout)) as stream:
72 |             write_stderr()
73 |             write_stdout()
74 |         # Then
75 |         outputs = stream.get_output()
76 |         self.assertEqual(outputs[0], "stderr")
77 |         self.assertEqual(outputs[1], "stdout")
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     unittest.main()
82 | 


--------------------------------------------------------------------------------
/compyle/tests/test_change_backend.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import numpy as np
  4 | from pytest import importorskip
  5 | 
  6 | from ..config import use_config
  7 | from ..array import wrap
  8 | from ..types import annotate
  9 | from ..parallel import elementwise, Reduction, Scan
 10 | 
 11 | 
 12 | class TestChangeBackend(unittest.TestCase):
 13 |     def test_elementwise_late_binding(self):
 14 |         # Given/When
 15 |         @elementwise
 16 |         @annotate
 17 |         def axpb(i, y, x, a, b):
 18 |             y[i] = a*x[i] + b
 19 | 
 20 |         # Then
 21 |         self.assertIsNone(axpb.elementwise)
 22 | 
 23 |     def test_reduction_late_binding(self):
 24 |         # Given/When
 25 |         r = Reduction('a+b')
 26 | 
 27 |         # Then
 28 |         self.assertIsNone(r.reduction)
 29 | 
 30 |     def test_scan_late_binding(self):
 31 |         # Given/When
 32 |         @annotate
 33 |         def output_f(i, last_item, item, ary):
 34 |             ary[i] = item + last_item
 35 | 
 36 |         scan = Scan(output=output_f, scan_expr='a+b',
 37 |                     dtype=np.int32)
 38 | 
 39 |         # Then
 40 |         self.assertIsNone(scan.scan)
 41 | 
 42 |     def test_elementwise_supports_changing_backend(self):
 43 |         importorskip("pyopencl")
 44 | 
 45 |         # Given/When
 46 |         @elementwise
 47 |         @annotate
 48 |         def axpb(i, y, x, a, b):
 49 |             y[i] = a*x[i] + b
 50 | 
 51 |         # When
 52 |         a, b = 2.0, 1.5
 53 |         x = np.linspace(0, 2*np.pi, 100)
 54 |         y = np.zeros_like(x)
 55 |         y0 = a*x + b
 56 |         with use_config(use_opencl=True):
 57 |             x, y = wrap(x, y)
 58 |             axpb(y, x, a, b)
 59 |             y.pull()
 60 | 
 61 |         # Then
 62 |         np.testing.assert_array_almost_equal(y.data, y0)
 63 |         self.assertEqual(axpb.elementwise.backend, 'opencl')
 64 | 
 65 |         # When
 66 |         x, y = wrap(x.data, y.data)
 67 |         axpb.set_backend('cython')
 68 |         axpb(y, x, a, b)
 69 |         # Then
 70 |         np.testing.assert_array_almost_equal(y.data, y0)
 71 |         self.assertEqual(axpb.elementwise.backend, 'cython')
 72 | 
 73 |     def test_reduction_supports_changing_backend(self):
 74 |         importorskip("pyopencl")
 75 | 
 76 |         # Given
 77 |         r = Reduction('a+b')
 78 | 
 79 |         # When
 80 |         x = np.linspace(0, 1, 1000) / 1000
 81 |         x_orig = x.copy()
 82 |         expect = 0.5
 83 | 
 84 |         with use_config(use_opencl=True):
 85 |             x = wrap(x)
 86 |             result = r(x)
 87 | 
 88 |         # Then
 89 |         self.assertAlmostEqual(result, expect, 6)
 90 | 
 91 |         # When
 92 |         x = wrap(x_orig)
 93 |         r.set_backend('cython')
 94 |         result = r(x)
 95 | 
 96 |         # Then
 97 |         self.assertAlmostEqual(result, expect, 6)
 98 | 
 99 |     def test_scan_supports_changing_backend(self):
100 |         importorskip("pyopencl")
101 | 
102 |         # Given/When
103 |         @annotate
104 |         def input_f(i, ary):
105 |             return ary[i]
106 | 
107 |         @annotate
108 |         def output_f(i, item, ary):
109 |             ary[i] = item
110 | 
111 |         scan = Scan(input_f, output_f, 'a+b', dtype=np.int32)
112 | 
113 |         # When
114 |         a = np.arange(10000, dtype=np.int32)
115 |         data = a.copy()
116 |         expect = np.cumsum(a)
117 | 
118 |         with use_config(use_opencl=True):
119 |             a = wrap(a)
120 |             scan(input=a, ary=a)
121 |             a.pull()
122 | 
123 |         # Then
124 |         np.testing.assert_array_almost_equal(a.data, expect)
125 | 
126 |         # When
127 |         a = wrap(data)
128 |         scan.set_backend('cython')
129 |         scan(input=a, ary=a)
130 |         a.pull()
131 | 
132 |         # Then
133 |         np.testing.assert_array_almost_equal(a.data, expect)
134 | 
135 |     def test_wrap_is_identity_on_arrays_with_same_backend(self):
136 |         # Given
137 |         x = np.linspace(0, 1, 100)
138 | 
139 |         # When
140 |         xw = wrap(x)
141 | 
142 |         res = wrap(xw)
143 | 
144 |         # Then
145 |         self.assertIs(res, xw)
146 | 
147 |     def test_wrap_can_wrap_array_to_different_backend(self):
148 |         importorskip("pyopencl")
149 |         # Given
150 |         x = np.linspace(0, 1, 100)
151 | 
152 |         # When
153 |         xc = wrap(x)
154 |         with use_config(use_opencl=True):
155 |             xocl = wrap(xc)
156 | 
157 |         # Then
158 |         self.assertEqual(xc.backend, 'cython')
159 |         self.assertEqual(xocl.backend, 'opencl')
160 |         np.testing.assert_array_almost_equal(xocl.data, xc.data)
161 | 


--------------------------------------------------------------------------------
/compyle/tests/test_config.py:
--------------------------------------------------------------------------------
  1 | """Tests for the configuration.
  2 | """
  3 | from unittest import TestCase, main
  4 | 
  5 | from ..config import Config, get_config, set_config, use_config
  6 | 
  7 | 
  8 | class ConfigTestCase(TestCase):
  9 | 
 10 |     def setUp(self):
 11 |         # Unset any default configuration.
 12 |         set_config(None)
 13 |         self.config = Config()
 14 | 
 15 |     def tearDown(self):
 16 |         # Unset any default configuration.
 17 |         set_config(None)
 18 | 
 19 |     def test_use_openmp_config_default(self):
 20 |         # Given
 21 |         config = self.config
 22 |         # When
 23 |         # Then
 24 |         self.assertFalse(config.use_openmp)
 25 | 
 26 |     def test_set_get_use_openmp_config(self):
 27 |         # Given
 28 |         config = self.config
 29 |         # When
 30 |         config.use_openmp = 10
 31 |         # Then
 32 |         self.assertEqual(config.use_openmp, 10)
 33 | 
 34 |     def test_set_get_omp_schedule_config(self):
 35 |         # Given
 36 |         config = self.config
 37 |         # When
 38 |         config.omp_schedule = ("static", 10)
 39 |         # Then
 40 |         self.assertEqual(config.omp_schedule, ("static", 10))
 41 | 
 42 |     def test_set_string_omp_schedule(self):
 43 |         # Given
 44 |         config = self.config
 45 |         # When
 46 |         config.set_omp_schedule("dynamic,20")
 47 |         # Then
 48 |         self.assertEqual(config.omp_schedule, ("dynamic", 20))
 49 | 
 50 |     def test_set_omp_schedule_config_exception(self):
 51 |         # Given
 52 |         config = self.config
 53 |         # When
 54 |         # Then
 55 |         with self.assertRaises(ValueError):
 56 |             config.omp_schedule = ("random", 20)
 57 | 
 58 |     def test_use_opencl_config_default(self):
 59 |         # Given
 60 |         config = self.config
 61 |         # When
 62 |         # Then
 63 |         self.assertFalse(config.use_opencl)
 64 | 
 65 |     def test_set_get_use_opencl_config(self):
 66 |         # Given
 67 |         config = self.config
 68 |         # When
 69 |         config.use_opencl = 10
 70 |         # Then
 71 |         self.assertEqual(config.use_opencl, 10)
 72 | 
 73 |     def test_use_double_config_default(self):
 74 |         # Given
 75 |         config = self.config
 76 |         # When
 77 |         # Then
 78 |         self.assertFalse(config.use_double)
 79 | 
 80 |     def test_set_get_use_double_config(self):
 81 |         # Given
 82 |         config = self.config
 83 |         # When
 84 |         config.use_double = 10
 85 |         # Then
 86 |         self.assertEqual(config.use_double, 10)
 87 | 
 88 |     def test_default_global_config_is_really_global(self):
 89 |         # Given.
 90 |         config = get_config()
 91 |         self.assertTrue(isinstance(config, Config))
 92 | 
 93 |         # When
 94 |         config.use_openmp = 100
 95 | 
 96 |         # Then.
 97 |         config1 = get_config()
 98 |         self.assertEqual(config1.use_openmp, 100)
 99 | 
100 |     def test_set_global(self):
101 |         # Given.
102 |         self.config.use_openmp = 200
103 |         set_config(self.config)
104 | 
105 |         # When
106 |         config = get_config()
107 | 
108 |         # Then.
109 |         self.assertEqual(config.use_openmp, 200)
110 | 
111 |     def test_use_config(self):
112 |         # Given
113 |         self.config.use_openmp = 200
114 |         set_config(self.config)
115 | 
116 |         # When/Then
117 |         with use_config(use_openmp=300) as cfg:
118 |             config = get_config()
119 |             self.assertEqual(config.use_openmp, 300)
120 |             self.assertEqual(cfg.use_openmp, 300)
121 |             cfg.use_openmp = 100
122 |             cfg.use_double = False
123 |             self.assertEqual(config.use_openmp, 100)
124 |             self.assertEqual(config.use_double, False)
125 | 
126 |         # Then
127 |         self.assertEqual(get_config().use_openmp, 200)
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     main()
132 | 


--------------------------------------------------------------------------------
/compyle/tests/test_cuda.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pytest.importorskip('pycuda')
 4 | 
 5 | from compyle.array import wrap
 6 | from compyle.thrust.sort import argsort
 7 | import numpy as np
 8 | 
 9 | 
10 | def test_sort():
11 |     length = 100
12 |     a = np.array(np.random.rand(length), dtype=np.float32)
13 |     b = wrap(a, backend='cuda')
14 |     res_gpu = argsort(b).get()
15 |     res_cpu = np.argsort(a)
16 |     assert np.all(res_gpu == res_cpu)
17 | 


--------------------------------------------------------------------------------
/compyle/tests/test_ext_module.py:
--------------------------------------------------------------------------------
  1 | from contextlib import contextmanager
  2 | from io import open as io_open
  3 | import os
  4 | from os.path import join, exists
  5 | import shutil
  6 | import sys
  7 | import tempfile
  8 | from textwrap import dedent
  9 | from multiprocessing import Pool
 10 | import pytest
 11 | from unittest import TestCase, main, SkipTest
 12 | 
 13 | try:
 14 |     from unittest import mock
 15 | except ImportError:
 16 |     import mock
 17 | 
 18 | import compyle.ext_module
 19 | 
 20 | from ..ext_module import (get_md5, ExtModule, get_ext_extension,
 21 |                           get_config_file_opts, get_openmp_flags)
 22 | 
 23 | 
 24 | def _check_write_source(root):
 25 |     """Used to create an ExtModule and test if a file was opened.
 26 | 
 27 |     It returns the number of times "open" was called.
 28 |     """
 29 |     m = mock.mock_open()
 30 |     orig_side_effect = m.side_effect
 31 | 
 32 |     def _side_effect(*args, **kw):
 33 |         with io_open(*args, **kw) as fp:
 34 |             fp.write("junk")
 35 |         return orig_side_effect(*args, **kw)
 36 |     m.side_effect = _side_effect
 37 | 
 38 |     with mock.patch('compyle.ext_module.io.open', m, create=True):
 39 |         s = ExtModule("print('hello')", root=root)
 40 |         s.write_source()
 41 |     return m.call_count
 42 | 
 43 | 
 44 | def _check_compile(root):
 45 |     with mock.patch('shutil.copy') as m:
 46 |         s = ExtModule("print('hello')", root=root)
 47 |         s.write_and_build()
 48 |     if m.called:
 49 |         # If it was called, do the copy to mimic the action.
 50 |         shutil.copy(*m.call_args[0])
 51 |     return m.call_count
 52 | 
 53 | 
 54 | def test_get_config_file_opts():
 55 |     # Given
 56 |     cfg = dedent('''
 57 |     OMP_CFLAGS = ['-fxxx']
 58 |     OMP_LINK = ['-fyyy']
 59 |     ''')
 60 |     m = mock.mock_open(read_data=cfg)
 61 |     with mock.patch('compyle.ext_module.open', m), \
 62 |         mock.patch('compyle.ext_module.exists') as mock_exists:
 63 |         # When
 64 |         mock_exists.return_value = False
 65 |         opts = get_config_file_opts()
 66 |         print(opts)
 67 | 
 68 |         # Then
 69 |         assert 'OMP_CFLAGS' not in opts
 70 |         assert 'OMP_LINK' not in opts
 71 | 
 72 |         # When
 73 |         mock_exists.return_value = True
 74 |         opts = get_config_file_opts()
 75 | 
 76 |         # Then
 77 |         assert opts['OMP_CFLAGS'] == ['-fxxx']
 78 |         assert opts['OMP_LINK'] == ['-fyyy']
 79 | 
 80 | 
 81 | def test_get_openmp_flags():
 82 |     # Given/When
 83 |     f = get_openmp_flags()
 84 | 
 85 |     # Then
 86 |     assert f[0] != ['-fxxx']
 87 |     assert f[1] != ['-fyyy']
 88 |     assert len(f[0]) > 0
 89 | 
 90 |     # Given
 91 |     m = dict(OMP_CFLAGS=['-fxxx'], OMP_LINK=['-fyyy'])
 92 | 
 93 |     with mock.patch.object(compyle.ext_module, 'CONFIG_OPTS', m):
 94 |         # When
 95 |         f = get_openmp_flags()
 96 | 
 97 |         # Then
 98 |         assert f[0] == ['-fxxx']
 99 |         assert f[1] == ['-fyyy']
100 | 
101 | 
102 | class TestMiscExtMod(TestCase):
103 |     def test_md5(self):
104 |         data = "hello world"
105 |         # Two calls with same data produce same result
106 |         self.assertEqual(get_md5(data), get_md5(data))
107 |         # Two calls with different data produce different md5sums.
108 |         self.assertNotEqual(get_md5(data), get_md5(data + ' '))
109 | 
110 | 
111 | @pytest.fixture(scope="function")
112 | def use_capsys(request, capsys):
113 |     request.instance.capsys = capsys
114 | 
115 | 
116 | class TestExtModule(TestCase):
117 |     def setUp(self):
118 |         self.root = tempfile.mkdtemp()
119 |         self.data = dedent('''\
120 |         # cython: language_level=3
121 |         def f():
122 |             return "hello world"
123 |         ''')
124 | 
125 |     def tearDown(self):
126 |         if sys.platform.startswith('win'):
127 |             try:
128 |                 shutil.rmtree(self.root)
129 |             except WindowsError:
130 |                 pass
131 |         else:
132 |             shutil.rmtree(self.root)
133 | 
134 |     def test_constructor(self):
135 |         data = self.data
136 |         s = ExtModule(data, root=self.root)
137 |         self.assertTrue(exists(join(self.root, 'build')))
138 | 
139 |         self.assertEqual(s.hash, get_md5(data))
140 |         self.assertEqual(s.code, data)
141 |         expect_name = 'm_%s' % (s.hash)
142 |         self.assertEqual(s.name, expect_name)
143 |         self.assertEqual(s.src_path, join(self.root, expect_name + '.pyx'))
144 |         self.assertEqual(s.ext_path,
145 |                          join(self.root, expect_name + get_ext_extension()))
146 | 
147 |         s.write_source()
148 |         self.assertTrue(exists(s.src_path))
149 |         self.assertEqual(data, open(s.src_path).read())
150 | 
151 |     def test_default_root(self):
152 |         try:
153 |             data = self.data
154 |             s = ExtModule(data)
155 |             s.write_source()
156 |             self.assertTrue(exists(join(s.root, 'build')))
157 |             self.assertEqual(s.hash, get_md5(data))
158 |             self.assertEqual(s.code, data)
159 |             self.assertTrue(exists(s.src_path))
160 |             self.assertEqual(data, open(s.src_path).read())
161 |         finally:
162 |             os.unlink(s.src_path)
163 | 
164 |     def test_load_module(self):
165 |         data = self.data
166 |         s = ExtModule(data, root=self.root)
167 |         mod = s.load()
168 |         self.assertEqual(mod.f(), "hello world")
169 |         self.assertTrue(exists(s.ext_path))
170 | 
171 |     @pytest.mark.usefixtures("use_capsys")
172 |     def test_compiler_errors_are_captured(self):
173 |         # Given
174 |         src = dedent('''\
175 |         # cython: language_level=3
176 |         def f():
177 |             print(bug)
178 |         ''')
179 |         s = ExtModule(src, root=self.root)
180 | 
181 |         # When
182 |         self.assertRaises(SystemExit, s.write_and_build)
183 | 
184 |         # Then
185 |         captured = self.capsys.readouterr()
186 |         err = captured.out + captured.err
187 |         print(err)
188 |         self.assertTrue('Error compiling Cython file' in err)
189 |         self.assertTrue('def f()' in err)
190 | 
191 |     def _create_dummy_module(self):
192 |         code = "# cython: language_level=3\ndef hello(): return 'hello'"
193 |         modname = 'test_rebuild.py'
194 |         f = join(self.root, modname)
195 |         with open(f, 'w') as fp:
196 |             fp.write(code)
197 |         return f
198 | 
199 |     @contextmanager
200 |     def _add_root_to_sys_path(self):
201 |         import sys
202 |         if self.root not in sys.path:
203 |             sys.path.insert(0, self.root)
204 |         try:
205 |             yield
206 |         finally:
207 |             sys.path.remove(self.root)
208 | 
209 |     def test_rebuild_when_dependencies_change(self):
210 |         # Given.
211 |         data = self.data
212 |         depends = ["test_rebuild"]
213 |         s = ExtModule(data, root=self.root, depends=depends)
214 |         fname = self._create_dummy_module()
215 |         f_stat = os.stat(fname)
216 | 
217 |         with self._add_root_to_sys_path():
218 |             # When
219 |             self.assertTrue(s.should_recompile())
220 |             s.write_and_build()
221 | 
222 |             # Then.
223 |             self.assertFalse(s.should_recompile())
224 | 
225 |             # Now lets re-create the module and try again.
226 | 
227 |             # When.
228 |             fname = self._create_dummy_module()
229 |             # Update the timestamp to make it newer, otherwise we need to
230 |             # sleep.
231 |             os.utime(fname, (f_stat.st_atime, f_stat.st_mtime + 10))
232 | 
233 |             # Then.
234 |             self.assertTrue(s.should_recompile())
235 | 
236 |     def test_that_multiple_writes_do_not_occur_for_same_source(self):
237 |         if (sys.platform.startswith("win32") and
238 |            sys.version_info[:2] == (3, 11)):
239 |             raise SkipTest('Fails on Python 3.11')
240 | 
241 |         # Given
242 |         n_proc = 5
243 |         p = Pool(n_proc)
244 | 
245 |         # When
246 | 
247 |         # Note that _create_extension cannot be defined here or even in the
248 |         # class as a nested function or instance method cannot be pickled.
249 | 
250 |         result = p.map(_check_write_source, [self.root]*n_proc)
251 |         p.close()
252 | 
253 |         # Then
254 |         # The file should have been opened only once.
255 |         self.assertEqual(sum(result), 1)
256 | 
257 |     def test_that_multiple_compiles_do_not_occur_for_same_source(self):
258 |         # Given
259 |         n_proc = 5
260 |         p = Pool(n_proc)
261 | 
262 |         # When
263 | 
264 |         # Note that _check_compile cannot be defined here or even in the
265 |         # class as a nested function or instance method cannot be pickled.
266 | 
267 |         result = p.map(_check_compile, [self.root]*n_proc)
268 |         p.close()
269 | 
270 |         # Then
271 |         # The shutil.copy should have been run only once.
272 |         self.assertEqual(sum(result), 1)
273 | 
274 | 
275 | if __name__ == '__main__':
276 |     main()
277 | 


--------------------------------------------------------------------------------
/compyle/tests/test_gpu_struct.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pytest
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class TestStructMapping(unittest.TestCase):
 8 | 
 9 |     @classmethod
10 |     def setUpClass(cls):
11 |         print("SetupClass")
12 |         pytest.importorskip("pycuda")
13 |         from compyle.cuda import set_context
14 |         set_context()
15 | 
16 |     def test_cuda_struct_mapping(self):
17 |         from compyle.cuda import match_dtype_to_c_struct
18 |         from pycuda import gpuarray
19 |         # Given
20 |         dtype = np.dtype([('l', np.int64),
21 |                           ('i', np.uint8),
22 |                           ('x', np.float32)])
23 |         a = np.empty(1, dtype)
24 |         a['l'] = 1.0
25 |         a['i'] = 2
26 |         a['x'] = 1.23
27 | 
28 |         # When
29 |         gs1, code1 = match_dtype_to_c_struct(None, "junk", a.dtype)
30 |         a_ga = a.astype(gs1)
31 |         ga = gpuarray.to_gpu(a_ga)
32 | 
33 |         # Then
34 |         result = ga.get()
35 |         np.testing.assert_almost_equal(result.tolist(), a.tolist())
36 |         self.assertFalse(a.dtype.fields == gs1.fields)
37 | 


--------------------------------------------------------------------------------
/compyle/tests/test_low_level.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | 
  4 | from pytest import importorskip
  5 | 
  6 | from ..config import use_config
  7 | from ..array import wrap
  8 | from ..types import annotate, declare
  9 | from ..low_level import (
 10 |     Cython, Kernel, LocalMem, local_barrier, GID_0, LDIM_0, LID_0,
 11 |     nogil, prange, parallel, cast
 12 | )
 13 | 
 14 | 
 15 | class TestKernel(unittest.TestCase):
 16 |     def test_simple_kernel_opencl(self):
 17 |         importorskip('pyopencl')
 18 | 
 19 |         # Given
 20 |         @annotate(gdoublep='x, y', a='float', size='int')
 21 |         def knl(x, y, a, size):
 22 |             i = declare('int')
 23 |             i = GID_0*LDIM_0 + LID_0
 24 |             if i < size:
 25 |                 y[i] = x[i]*a
 26 | 
 27 |         x = np.linspace(0, 1, 1000)
 28 |         y = np.zeros_like(x)
 29 |         x, y = wrap(x, y, backend='opencl')
 30 | 
 31 |         # When
 32 |         k = Kernel(knl, backend='opencl')
 33 |         a = 21.0
 34 |         k(x, y, a, 1000)
 35 | 
 36 |         # Then
 37 |         y.pull()
 38 |         self.assertTrue(np.allclose(y.data, x.data * a))
 39 | 
 40 |     def test_simple_kernel_cuda(self):
 41 |         importorskip('pycuda')
 42 | 
 43 |         # Given
 44 |         @annotate(gdoublep='x, y', a='float', size='int')
 45 |         def knl(x, y, a, size):
 46 |             i = declare('int')
 47 |             i = GID_0*LDIM_0 + LID_0
 48 |             if i < size:
 49 |                 y[i] = x[i]*a
 50 | 
 51 |         x = np.linspace(0, 1, 1000)
 52 |         y = np.zeros_like(x)
 53 |         x, y = wrap(x, y, backend='cuda')
 54 | 
 55 |         # When
 56 |         k = Kernel(knl, backend='cuda')
 57 |         a = 21.0
 58 |         k(x, y, a, 1000)
 59 | 
 60 |         # Then
 61 |         y.pull()
 62 |         self.assertTrue(np.allclose(y.data, x.data * a))
 63 | 
 64 |     def test_kernel_with_local_memory_opencl(self):
 65 |         importorskip('pyopencl')
 66 | 
 67 |         # Given
 68 |         @annotate(gdoublep='x, y', xc='ldoublep', a='float')
 69 |         def knl(x, y, xc, a):
 70 |             i, lid = declare('int', 2)
 71 |             lid = LID_0
 72 |             i = GID_0 * LDIM_0 + lid
 73 | 
 74 |             xc[lid] = x[i]
 75 | 
 76 |             local_barrier()
 77 | 
 78 |             y[i] = xc[lid] * a
 79 | 
 80 |         x = np.linspace(0, 1, 1024)
 81 |         y = np.zeros_like(x)
 82 |         xc = LocalMem(1, backend='opencl')
 83 | 
 84 |         x, y = wrap(x, y, backend='opencl')
 85 | 
 86 |         # When
 87 |         k = Kernel(knl, backend='opencl')
 88 |         a = 21.0
 89 |         k(x, y, xc, a)
 90 | 
 91 |         # Then
 92 |         y.pull()
 93 |         self.assertTrue(np.allclose(y.data, x.data * a))
 94 | 
 95 |     def test_kernel_with_local_memory_cuda(self):
 96 |         importorskip('pycuda')
 97 | 
 98 |         # Given
 99 |         @annotate(gdoublep='x, y', xc='ldoublep', a='float')
100 |         def knl(x, y, xc, a):
101 |             i, lid = declare('int', 2)
102 |             lid = LID_0
103 |             i = GID_0 * LDIM_0 + lid
104 | 
105 |             xc[lid] = x[i]
106 | 
107 |             local_barrier()
108 | 
109 |             y[i] = xc[lid] * a
110 | 
111 |         x = np.linspace(0, 1, 1024)
112 |         y = np.zeros_like(x)
113 |         xc = LocalMem(1, backend='cuda')
114 | 
115 |         x, y = wrap(x, y, backend='cuda')
116 | 
117 |         # When
118 |         k = Kernel(knl, backend='cuda')
119 |         a = 21.0
120 |         k(x, y, xc, a)
121 | 
122 |         # Then
123 |         y.pull()
124 |         self.assertTrue(np.allclose(y.data, x.data * a))
125 | 
126 | 
127 | @annotate(double='x, y, a', return_='double')
128 | def func(x, y, a):
129 |     return x * y * a
130 | 
131 | 
132 | @annotate(doublep='x, y', a='double', n='int', return_='double')
133 | def knl(x, y, a, n):
134 |     i = declare('int')
135 |     s = declare('double')
136 |     s = 0.0
137 |     for i in range(n):
138 |         s += func(x[i], y[i], a)
139 |     return s
140 | 
141 | 
142 | @annotate(n='int', doublep='x, y', a='double')
143 | def cy_extern(x, y, a, n):
144 |     i = declare('int')
145 |     with nogil, parallel():
146 |         for i in prange(n):
147 |             y[i] = x[i] * a
148 | 
149 | 
150 | @annotate(int='num, return_')
151 | def _factorial(num):
152 |     if num == 0:
153 |         return 1
154 |     else:
155 |         return num*_factorial(num - 1)
156 | 
157 | 
158 | class TestCython(unittest.TestCase):
159 |     def test_cython_code_with_return_and_nested_call(self):
160 |         # Given
161 |         n = 1000
162 |         x = np.linspace(0, 1, n)
163 |         y = x.copy()
164 |         a = 2.0
165 | 
166 |         # When
167 |         cy = Cython(knl)
168 |         result = cy(x, y, a, n)
169 | 
170 |         # Then
171 |         self.assertAlmostEqual(result, np.sum(x * y * a))
172 | 
173 |     def test_cython_with_externs(self):
174 |         # Given
175 |         n = 1000
176 |         x = np.linspace(0, 1, n)
177 |         y = np.zeros_like(x)
178 |         a = 2.0
179 | 
180 |         # When
181 |         with use_config(use_openmp=True):
182 |             cy = Cython(cy_extern)
183 | 
184 |         cy(x, y, a, n)
185 | 
186 |         # Then
187 |         self.assertTrue(np.allclose(y, x * a))
188 | 
189 |     def test_recursive_function(self):
190 |         # Given/when
191 |         fac = Cython(_factorial)
192 | 
193 |         # Then
194 |         self.assertEqual(fac(0), 1)
195 |         self.assertEqual(fac(1), 1)
196 |         self.assertEqual(fac(3), 6)
197 | 
198 | 
199 | def test_cast_works_in_pure_python():
200 |     x = cast(1.23, "int")
201 |     assert x == 1
202 | 
203 |     y = cast(2, "float")
204 |     assert y == 2.0
205 | 


--------------------------------------------------------------------------------
/compyle/tests/test_profile.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | 
  4 | from pytest import importorskip
  5 | 
  6 | from ..config import get_config, use_config
  7 | from ..array import wrap, zeros, ones
  8 | from ..profile import (
  9 |     get_profile_info, named_profile, profile, profile_ctx, ProfileContext
 10 | )
 11 | 
 12 | 
 13 | def axpb():
 14 |     a, b = 7, 13
 15 |     x = np.random.rand(1000)
 16 |     return a * x + b
 17 | 
 18 | 
 19 | class A:
 20 |     @profile
 21 |     def f(self):
 22 |         pass
 23 | 
 24 | 
 25 | class B:
 26 |     def __init__(self):
 27 |         self.name = 'my_name'
 28 | 
 29 |     @profile
 30 |     def f(self):
 31 |         pass
 32 | 
 33 |     @profile(name='explicit_name')
 34 |     def named(self):
 35 |         pass
 36 | 
 37 | 
 38 | @profile
 39 | def profiled_axpb():
 40 |     axpb()
 41 | 
 42 | 
 43 | @profile
 44 | def nested():
 45 |     profiled_axpb()
 46 | 
 47 | 
 48 | @named_profile('prefix_sum', backend='opencl')
 49 | def get_prefix_sum_knl():
 50 |     from ..opencl import get_queue, get_context
 51 |     from pyopencl.scan import GenericScanKernel
 52 |     ctx = get_context()
 53 |     queue = get_queue()
 54 |     return GenericScanKernel(ctx, np.int32,
 55 |                              arguments="__global int *ary",
 56 |                              input_expr="ary[i]",
 57 |                              scan_expr="a+b", neutral="0",
 58 |                              output_statement="ary[i] = prev_item")
 59 | 
 60 | 
 61 | def test_profile_ctx():
 62 |     with profile_ctx('axpb'):
 63 |         axpb()
 64 | 
 65 |     profile_info = get_profile_info()
 66 |     assert profile_info[0]['axpb']['calls'] == 1
 67 | 
 68 | 
 69 | def test_profile():
 70 |     for i in range(100):
 71 |         profiled_axpb()
 72 | 
 73 |     profile_info = get_profile_info()
 74 |     assert profile_info[0]['profiled_axpb']['calls'] == 100
 75 | 
 76 | 
 77 | def test_profile_method():
 78 |     # Given
 79 |     a = A()
 80 |     b = B()
 81 | 
 82 |     # When
 83 |     for i in range(5):
 84 |         a.f()
 85 |         b.f()
 86 |         b.named()
 87 | 
 88 |     # Then
 89 |     profile_info = get_profile_info()
 90 |     assert profile_info[0]['A.f']['calls'] == 5
 91 | 
 92 |     # For b.f(), b.name is my_name.
 93 |     assert profile_info[0]['my_name']['calls'] == 5
 94 | 
 95 |     # profile was given an explicit name for b.named()
 96 |     assert profile_info[0]['explicit_name']['calls'] == 5
 97 | 
 98 | 
 99 | def test_named_profile():
100 |     importorskip('pyopencl')
101 |     get_config().profile = True
102 |     knl = get_prefix_sum_knl()
103 |     x = ones(100, np.int32, backend='opencl')
104 |     knl(x.dev)
105 | 
106 |     profile_info = get_profile_info()
107 |     assert profile_info[0]['prefix_sum']['calls'] == 1
108 | 
109 | 
110 | def test_nesting_and_context():
111 |     # When
112 |     p = ProfileContext('main')
113 |     nested()
114 |     p.stop()
115 | 
116 |     # Then
117 |     prof = get_profile_info()
118 |     assert len(prof) == 3
119 |     assert prof[0]['main']['calls'] == 1
120 |     assert prof[1]['nested']['calls'] == 1
121 |     assert prof[2]['profiled_axpb']['calls'] == 1
122 | 


--------------------------------------------------------------------------------
/compyle/tests/test_template.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | import numpy as np
  4 | 
  5 | from ..array import wrap
  6 | from ..types import annotate, KnownType
  7 | from ..template import Template
  8 | from ..parallel import Elementwise
  9 | 
 10 | 
 11 | class SimpleTemplate(Template):
 12 |     def __init__(self, name, cond=False):
 13 |         super(SimpleTemplate, self).__init__(name=name)
 14 |         self.cond = cond
 15 | 
 16 |     def template(self, x, y):
 17 |         '''Docstring text'''
 18 |         '''
 19 |         % for i in range(5):
 20 |         print(${i})
 21 |         % endfor
 22 |         % if obj.cond:
 23 |         return 'hello'
 24 |         % else:
 25 |         return 'bye'
 26 |         % endif
 27 |         '''
 28 | 
 29 | 
 30 | class Dummy(Template):
 31 |     def template(self):
 32 |         '''Docs'''
 33 |         '''
 34 |         print(123)
 35 |         '''
 36 | 
 37 | 
 38 | class ParallelExample(Template):
 39 |     @annotate(i='int', x='doublep', y='doublep')
 40 |     def template(self, i, x, y):
 41 |         '''
 42 |         y[i] = x[i]*2.0
 43 |         '''
 44 | 
 45 | 
 46 | class ExtraArgs(Template):
 47 |     def extra_args(self):
 48 |         return ['x'], {'x': 'int'}
 49 | 
 50 |     def template(self):
 51 |         '''
 52 |         return x + 1
 53 |         '''
 54 | 
 55 | 
 56 | def test_simple_template():
 57 |     # Given
 58 |     t = SimpleTemplate(name='simple')
 59 | 
 60 |     # When
 61 |     simple = t.function
 62 |     x = simple(1, 2)
 63 | 
 64 |     # Then
 65 |     assert x == 'bye'
 66 | 
 67 |     # Given
 68 |     t = SimpleTemplate(name='simple', cond=True)
 69 | 
 70 |     # When
 71 |     simple = t.function
 72 |     x = simple(1, 2)
 73 | 
 74 |     # Then
 75 |     assert x == 'hello'
 76 | 
 77 | 
 78 | def test_that_source_code_is_available():
 79 |     # Given/When
 80 |     dummy = Dummy('dummy').function
 81 | 
 82 |     # Then
 83 |     expect = dedent('''\
 84 |     def dummy():
 85 |         """Docs
 86 |         """
 87 |         print(123)
 88 |     ''')
 89 |     assert dummy.source.strip() == expect.strip()
 90 |     assert dummy.is_jit is True
 91 | 
 92 | 
 93 | def test_template_usable_in_code_generation():
 94 |     # Given
 95 |     twice = ParallelExample('twice').function
 96 | 
 97 |     x = np.linspace(0, 1, 10)
 98 |     y = np.zeros_like(x)
 99 |     x, y = wrap(x, y)
100 | 
101 |     # When
102 |     e = Elementwise(twice)
103 |     e(x, y)
104 | 
105 |     # Then
106 |     y.pull()
107 |     np.testing.assert_almost_equal(y, 2.0*x.data)
108 |     assert twice.is_jit is False
109 | 
110 | 
111 | def test_template_with_extra_args():
112 |     # Given
113 |     extra = ExtraArgs('extra').function
114 | 
115 |     # When
116 |     result = extra(1)
117 | 
118 |     # Then
119 |     assert result == 2
120 |     assert extra.__annotations__ == {'x': KnownType('int')}
121 | 
122 | 
123 | def test_template_inject_works():
124 |     # Given
125 |     def f(x):
126 |         '''Docs
127 |         '''
128 |         for i in range(5):
129 |             x += i
130 |         return x + 1
131 | 
132 |     # When
133 |     t = Template('t')
134 |     result = t.inject(f, indent=1)
135 | 
136 |     # Then
137 |     lines = ['for i in range(5):\n', '    x += i\n', 'return x + 1\n']
138 |     expect = ''.join([' '*4 + x for x in lines])
139 |     assert result == expect
140 | 
141 |     # When
142 |     result = t.inject(f, indent=2)
143 | 
144 |     # Then
145 |     lines = ['for i in range(5):\n', '    x += i\n', 'return x + 1\n']
146 |     expect = ''.join([' '*8 + x for x in lines])
147 |     assert result == expect
148 | 
149 |     # When
150 |     result = t.inject(f, indent=0)
151 | 
152 |     # Then
153 |     lines = ['for i in range(5):\n', '    x += i\n', 'return x + 1\n']
154 |     expect = ''.join(lines)
155 |     assert result == expect
156 | 


--------------------------------------------------------------------------------
/compyle/tests/test_transpiler.py:
--------------------------------------------------------------------------------
  1 | from math import sin
  2 | import unittest
  3 | 
  4 | from ..transpiler import get_external_symbols_and_calls, Transpiler
  5 | from ..extern import printf
  6 | 
  7 | SIZE = 10
  8 | 
  9 | my_printf = printf
 10 | 
 11 | 
 12 | def h(x=0.0):
 13 |     return sin(x) + 1
 14 | 
 15 | 
 16 | def f(x=0.0):
 17 |     return h(x*2+1)
 18 | 
 19 | 
 20 | def g(x=0.0):
 21 |     return f(x*2)
 22 | 
 23 | 
 24 | def implicit_f(x, y):
 25 |     # These should be ignored.
 26 |     j = LID_0 + GID_0 + LDIM_0 + GDIM_0
 27 |     s = y[SIZE-1]
 28 |     for i in range(SIZE):
 29 |         s += sin(x[i])
 30 | 
 31 |     my_printf("%f", s)
 32 |     return s
 33 | 
 34 | 
 35 | def undefined_call(x):
 36 |     # An intentional error that should be caught.
 37 |     foo(x)
 38 | 
 39 | 
 40 | def _factorial(num):
 41 |     if num == 0:
 42 |         return 1
 43 |     else:
 44 |         return num*_factorial(num - 1)
 45 | 
 46 | 
 47 | class TestTranspiler(unittest.TestCase):
 48 |     def test_get_external_symbols_and_calls(self):
 49 |         # Given/When
 50 |         syms, implicit, calls, ext = get_external_symbols_and_calls(
 51 |             g, 'cython'
 52 |         )
 53 | 
 54 |         # Then
 55 |         expect = [f]
 56 |         self.assertEqual(syms, {})
 57 |         self.assertEqual(expect, calls)
 58 |         self.assertEqual(ext, [])
 59 | 
 60 |         # Given/When
 61 |         syms, implicit, calls, ext = get_external_symbols_and_calls(
 62 |             implicit_f, 'cython'
 63 |         )
 64 | 
 65 |         # Then
 66 |         self.assertEqual(syms, {'SIZE': 10})
 67 |         self.assertEqual(implicit, {'i'})
 68 |         self.assertEqual(calls, [])
 69 |         self.assertEqual(ext, [my_printf])
 70 | 
 71 |         # Given/When
 72 |         self.assertRaises(NameError, get_external_symbols_and_calls,
 73 |                           undefined_call, 'cython')
 74 | 
 75 |     def test_get_external_symbols_and_calls_handles_recursion(self):
 76 |         # Given/When
 77 |         syms, implicit, calls, ext = get_external_symbols_and_calls(
 78 |             _factorial, 'cython'
 79 |         )
 80 | 
 81 |         # Then
 82 |         self.assertEqual(syms, {})
 83 |         self.assertEqual(calls, [])
 84 |         self.assertEqual(implicit, set())
 85 |         self.assertEqual(ext, [])
 86 | 
 87 |     def test_transpiler(self):
 88 |         # Given
 89 |         t = Transpiler(backend='cython')
 90 | 
 91 |         # When
 92 |         t.add(g)
 93 | 
 94 |         # Then
 95 |         for func in (g, f, h):
 96 |             self.assertTrue(func in t.blocks)
 97 | 
 98 |         expect = [h, f, g]
 99 |         self.assertListEqual([x.obj for x in t.blocks], expect)
100 | 


--------------------------------------------------------------------------------
/compyle/tests/test_types.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ..types import KnownType, declare, annotate
 6 | 
 7 | 
 8 | class TestDeclare(unittest.TestCase):
 9 |     def test_declare(self):
10 |         self.assertEqual(declare('int'), 0)
11 |         self.assertEqual(declare('long'), 0)
12 |         self.assertEqual(declare('double'), 0.0)
13 |         self.assertEqual(declare('float'), 0.0)
14 | 
15 |         self.assertEqual(declare('int', 2), (0, 0))
16 |         self.assertEqual(declare('long', 3), (0, 0, 0))
17 |         self.assertEqual(declare('double', 2), (0.0, 0.0))
18 |         self.assertEqual(declare('float', 3), (0.0, 0.0, 0.0))
19 | 
20 |         res = declare('matrix(3)')
21 |         self.assertTrue(np.all(res == np.zeros(3)))
22 |         res = declare('matrix(3)', 3)
23 |         for i in range(3):
24 |             self.assertTrue(np.all(res[0] == np.zeros(3)))
25 |         res = declare('matrix((3,))')
26 |         self.assertTrue(np.all(res == np.zeros(3)))
27 |         res = declare('matrix((3, 3))')
28 |         self.assertTrue(np.all(res == np.zeros((3, 3))))
29 | 
30 |     def test_declare_with_type(self):
31 |         res = declare('matrix(3, "int")')
32 |         self.assertTrue(np.all(res == np.zeros(3)))
33 |         self.assertEqual(res.dtype, np.int32)
34 | 
35 |         res = declare('matrix((2, 2), "unsigned int")')
36 |         self.assertTrue(np.all(res == np.zeros((2, 2))))
37 |         self.assertEqual(res.dtype, np.uint32)
38 | 
39 |         res = declare('matrix((3,), "float")')
40 |         self.assertTrue(np.all(res == np.zeros((3,))))
41 |         self.assertEqual(res.dtype, np.float32)
42 | 
43 |     def test_declare_with_address_space(self):
44 |         self.assertEqual(declare('LOCAL_MEM int', 2), (0, 0))
45 |         self.assertEqual(declare('GLOBAL_MEM float', 2), (0.0, 0.0))
46 | 
47 |         res = declare('LOCAL_MEM matrix(3)')
48 |         self.assertTrue(np.all(res == np.zeros(3)))
49 | 
50 |         res = declare('GLOBAL_MEM matrix(3)')
51 |         self.assertTrue(np.all(res == np.zeros(3)))
52 | 
53 | 
54 | class TestAnnotate(unittest.TestCase):
55 |     def test_simple_annotation(self):
56 |         # Given/When
57 |         @annotate(i='int', x='floatp', return_='float')
58 |         def f(i, x):
59 |             return x[i]*2.0
60 | 
61 |         # Then
62 |         result = f.__annotations__
63 |         self.assertEqual(result['return'], KnownType('float'))
64 |         self.assertEqual(result['i'], KnownType('int'))
65 |         self.assertEqual(result['x'], KnownType('float*', 'float'))
66 | 
67 |     def test_reversed_annotation(self):
68 |         # Given/When
69 |         @annotate(i='int', floatp='x, y', return_='float')
70 |         def f(i, x, y):
71 |             return x[i]*y[i]
72 | 
73 |         # Then
74 |         result = f.__annotations__
75 |         self.assertEqual(result['return'], KnownType('float'))
76 |         self.assertEqual(result['i'], KnownType('int'))
77 |         self.assertEqual(result['x'], KnownType('float*', 'float'))
78 |         self.assertEqual(result['y'], KnownType('float*', 'float'))
79 | 
80 |     def test_decorator_accepts_known_type_instance(self):
81 |         # Given/When
82 |         @annotate(x=KnownType('Thing'))
83 |         def f(x):
84 |             x.f()
85 | 
86 |         # Then
87 |         result = f.__annotations__
88 |         self.assertEqual(result['x'], KnownType('Thing'))
89 | 
90 |     def test_decorator_raises_error_for_unknown_error(self):
91 |         def f(x):
92 |             pass
93 | 
94 |         self.assertRaises(TypeError, annotate, f, x='alpha')
95 | 


--------------------------------------------------------------------------------
/compyle/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from textwrap import dedent
 3 | from unittest import TestCase
 4 | 
 5 | from .. import utils
 6 | 
 7 | 
 8 | def func(x):
 9 |     return x
10 | 
11 | 
12 | class TestUtils(TestCase):
13 |     def test_getsource_works_with_normal_function(self):
14 |         # Given/When
15 |         src = utils.getsource(func)
16 | 
17 |         # Then
18 |         self.assertEqual(src, inspect.getsource(func))
19 | 
20 |     def test_getsource_works_with_generated_function(self):
21 |         # Given
22 |         src = dedent('''
23 |         def gfunc(x):
24 |             return x
25 |         ''')
26 |         ns = {}
27 |         exec(src, ns)
28 |         gfunc = ns['gfunc']
29 |         gfunc.source = src
30 | 
31 |         # When
32 |         result = utils.getsource(gfunc)
33 | 
34 |         # Then
35 |         self.assertEqual(result, src)
36 | 
37 |     def test_getsourcelines_works_with_normal_function(self):
38 |         # Given/When
39 |         result = utils.getsourcelines(func)
40 | 
41 |         # Then
42 |         self.assertEqual(result, inspect.getsourcelines(func))
43 | 
44 |     def test_getsourcelines_works_with_generated_function(self):
45 |         # Given
46 |         src = dedent('''
47 |         def gfunc(x):
48 |             return x
49 |         ''')
50 |         ns = {}
51 |         exec(src, ns)
52 |         gfunc = ns['gfunc']
53 |         gfunc.source = src
54 | 
55 |         # When
56 |         result = utils.getsourcelines(gfunc)
57 | 
58 |         # Then
59 |         self.assertEqual(result, (src.splitlines(True), 0))
60 | 


--------------------------------------------------------------------------------
/compyle/thrust/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pypr/compyle/bc858bab005f2cc9990267448c5b873d4b5f8635/compyle/thrust/__init__.py


--------------------------------------------------------------------------------
/compyle/thrust/sort.pyx:
--------------------------------------------------------------------------------
 1 | import cupy.cuda.thrust as thrust
 2 | from libcpp.vector cimport vector
 3 | import compyle.array as carr
 4 | import numpy as np
 5 | 
 6 | 
 7 | cpdef argsort(array, keys=None):
 8 |     idx_array = carr.empty(array.length, np.intp, backend='cuda')
 9 | 
10 |     cdef vector[int] shape
11 |     shape.push_back(<int> array.length)
12 | 
13 |     cdef size_t keys_ptr
14 |     if keys:
15 |         keys_ptr = <size_t> keys.dev.ptr
16 |     else:
17 |         keys_ptr = 0
18 | 
19 |     thrust.argsort(array.dtype, idx_array.dev.ptr, array.dev.ptr, keys_ptr, shape)
20 | 
21 |     return idx_array
22 | 


--------------------------------------------------------------------------------
/compyle/types.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import platform
  3 | import sys
  4 | import numpy as np
  5 | 
  6 | 
  7 | BITS = platform.architecture()[0]
  8 | 
  9 | 
 10 | def declare(type, num=1):
 11 |     """Declare the variable to be of the given type.
 12 | 
 13 |     The additional optional argument num is the number of items to return.
 14 | 
 15 |     Normally, the declare function only defines a variable when compiled,
 16 |     however, this function here is a pure Python implementation so that the
 17 |     same code can be executed in Python.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 | 
 22 |     type: str: String representing the type.
 23 |     num: int: the number of values to return
 24 | 
 25 |     Examples
 26 |     --------
 27 | 
 28 |     >>> declare('int')
 29 |     0
 30 |     >>> declare('int', 3)
 31 |     0, 0, 0
 32 |     """
 33 |     if num == 1:
 34 |         return _declare(type)
 35 |     else:
 36 |         return tuple(_declare(type) for i in range(num))
 37 | 
 38 | 
 39 | def get_declare_info(arg):
 40 |     """Given the first argument to the declare function, return the
 41 |     (kind, address_space, type, shape), information.
 42 | 
 43 |     kind: is a string, 'primitive' or 'matrix'
 44 |     address_space: is the address space string.
 45 |     type: is the c data type to use.
 46 |     shape: is a tuple with the shape of the matrix.  It is None for primitives.
 47 |     """
 48 |     address_space = ''
 49 |     shape = None
 50 |     if arg.startswith(('LOCAL_MEM', 'GLOBAL_MEM')):
 51 |         idx = arg.index(' ')
 52 |         address_space = arg[:idx]
 53 |         arg = arg[idx + 1:]
 54 |     if arg.startswith('matrix'):
 55 |         kind = 'matrix'
 56 |         m_arg = ast.literal_eval(arg[7:-1])
 57 |         if isinstance(m_arg, tuple) and \
 58 |                         len(m_arg) > 1 and \
 59 |                 isinstance(m_arg[1], str):
 60 |             shape = m_arg[0]
 61 |             type = m_arg[1]
 62 |         else:
 63 |             shape = m_arg
 64 |             type = 'double'
 65 |     else:
 66 |         kind = 'primitive'
 67 |         type = arg
 68 | 
 69 |     return kind, address_space, type, shape
 70 | 
 71 | 
 72 | def _declare(arg):
 73 |     kind, address_space, ctype, shape = get_declare_info(arg)
 74 |     if kind == 'matrix':
 75 |         dtype = C_NP_TYPE_MAP[ctype]
 76 |         return np.zeros(shape, dtype=dtype)
 77 |     else:
 78 |         if ctype in ['double', 'float']:
 79 |             return 0.0
 80 |         else:
 81 |             return 0
 82 | 
 83 | 
 84 | class Undefined(object):
 85 |     pass
 86 | 
 87 | 
 88 | class KnownType(object):
 89 |     """Simple object to specify a known type as a string.
 90 | 
 91 |     Smells but is convenient as the type may be one available only inside
 92 |     Cython without a corresponding Python type.
 93 |     """
 94 | 
 95 |     def __init__(self, type_str, base_type=''):
 96 |         """Constructor
 97 | 
 98 |         The ``base_type`` argument is optional and used to represent the base
 99 |         type, i.e. the type_str may be 'Foo*' but the base type will be 'Foo'
100 |         if specified.
101 | 
102 |         Parameters
103 |         ----------
104 |         type_str: str: A string representation of how the type is declared.
105 |         base_type: str: The base type of this entity. (optional)
106 | 
107 |         """
108 |         self.type = type_str
109 |         self.base_type = base_type
110 | 
111 |     def __repr__(self):
112 |         if self.base_type:
113 |             return 'KnownType("%s", "%s")' % (self.type, self.base_type)
114 |         else:
115 |             return 'KnownType("%s")' % self.type
116 | 
117 |     def __eq__(self, other):
118 |         return self.type == other.type and self.base_type == other.base_type
119 | 
120 | 
121 | TYPES = dict(
122 |     float=KnownType('float'),
123 |     double=KnownType('double'),
124 |     int=KnownType('int'),
125 |     long=KnownType('long'),
126 |     uint=KnownType('unsigned int'),
127 |     ulong=KnownType('unsigned long'),
128 | 
129 |     floatp=KnownType('float*', 'float'),
130 |     doublep=KnownType('double*', 'double'),
131 |     intp=KnownType('int*', 'int'),
132 |     longp=KnownType('long*', 'long'),
133 |     uintp=KnownType('unsigned int*', 'unsigned int'),
134 |     ulongp=KnownType('unsigned long*', 'unsigned long'),
135 | 
136 |     gfloatp=KnownType('GLOBAL_MEM float*', 'float'),
137 |     gdoublep=KnownType('GLOBAL_MEM double*', 'double'),
138 |     gintp=KnownType('GLOBAL_MEM int*', 'int'),
139 |     glongp=KnownType('GLOBAL_MEM long*', 'long'),
140 |     guintp=KnownType('GLOBAL_MEM unsigned int*', 'unsigned int'),
141 |     gulongp=KnownType('GLOBAL_MEM unsigned long*', 'unsigned long'),
142 | 
143 |     lfloatp=KnownType('LOCAL_MEM float*', 'float'),
144 |     ldoublep=KnownType('LOCAL_MEM double*', 'double'),
145 |     lintp=KnownType('LOCAL_MEM int*', 'int'),
146 |     llongp=KnownType('LOCAL_MEM long*', 'long'),
147 |     luintp=KnownType('LOCAL_MEM unsigned int*', 'unsigned int'),
148 |     lulongp=KnownType('LOCAL_MEM unsigned long*', 'unsigned long'),
149 | )
150 | 
151 | 
152 | def _inject_types_in_module():
153 |     g = globals()
154 |     for name, type in TYPES.items():
155 |         if name in ['int', 'long', 'float']:
156 |             name = name + '_'
157 |         g[name] = type
158 | 
159 | 
160 | # A convenience so users can import types directly from the module.
161 | _inject_types_in_module()
162 | 
163 | NP_C_TYPE_MAP = {
164 |     np.dtype(bool): 'char',
165 |     np.dtype(np.float32): 'float', np.dtype(np.float64): 'double',
166 |     np.dtype(np.int8): 'char', np.dtype(np.uint8): 'unsigned char',
167 |     np.dtype(np.int16): 'short', np.dtype(np.uint16): 'unsigned short',
168 |     np.dtype(np.int32): 'int', np.dtype(np.uint32): 'unsigned int',
169 |     np.dtype(np.int64): 'long', np.dtype(np.uint64): 'unsigned long'
170 | }
171 | 
172 | C_NP_TYPE_MAP = {
173 |     'bool': bool,
174 |     'char': np.int8,
175 |     'double': np.float64,
176 |     'float': np.float32,
177 |     'int': np.int32,
178 |     'long': np.int64,
179 |     'short': np.int16,
180 |     'unsigned char': np.uint8,
181 |     'unsigned int': np.uint32,
182 |     'unsigned long': np.uint64,
183 |     'unsigned short': np.uint16
184 | }
185 | 
186 | if sys.platform.startswith('win') or BITS.startswith('32bit'):
187 |     NP_C_TYPE_MAP[np.dtype(np.int64)] = 'long long'
188 |     NP_C_TYPE_MAP[np.dtype(np.uint64)] = 'unsigned long long'
189 |     C_NP_TYPE_MAP['long long'] = np.int64
190 |     C_NP_TYPE_MAP['unsigned long long'] = np.uint64
191 |     TYPES['long long'] = KnownType('long long')
192 |     TYPES['glonglongp'] = KnownType('GLOBAL_MEM long long*', 'long long')
193 |     TYPES['gulonglongp'] = KnownType('GLOBAL_MEM unsigned long long*',
194 |                                      'unsigned long long')
195 |     TYPES['llonglongp'] = KnownType('LOCAL_MEM long long*', 'long long')
196 |     TYPES['lulonglongp'] = KnownType('LOCAL_MEM unsigned long long*',
197 |                                      'unsigned long long')
198 | 
199 | 
200 | NP_TYPE_LIST = list(C_NP_TYPE_MAP.values())
201 | 
202 | 
203 | def dtype_to_ctype(dtype, backend=None):
204 |     if backend in ('opencl', 'cuda'):
205 |         try:
206 |             from pyopencl.compyte.dtypes import \
207 |                 dtype_to_ctype as d2c_opencl
208 |             return d2c_opencl(dtype)
209 |         except (ValueError, ImportError):
210 |             pass
211 |     dtype = np.dtype(dtype)
212 |     return NP_C_TYPE_MAP[dtype]
213 | 
214 | 
215 | def ctype_to_dtype(ctype):
216 |     return np.dtype(C_NP_TYPE_MAP[ctype])
217 | 
218 | 
219 | def knowntype_to_ctype(knowntype):
220 |     knowntype_obj = TYPES.get(knowntype, None)
221 |     if knowntype_obj:
222 |         return knowntype_obj.type
223 |     else:
224 |         raise ValueError("Not a vaild known type")
225 | 
226 | 
227 | def dtype_to_knowntype(dtype, address='scalar', backend=None):
228 |     ctype = dtype_to_ctype(dtype, backend=backend)
229 |     if 'unsigned' in ctype:
230 |         ctype = 'u%s' % ctype.replace('unsigned ', '')
231 |     knowntype = ctype.replace(' ', '')
232 |     if address == 'ptr':
233 |         knowntype = '%sp' % knowntype
234 |     elif address == 'global':
235 |         knowntype = 'g%sp' % knowntype
236 |     elif address == 'local':
237 |         knowntype = 'l%sp' % knowntype
238 |     elif address != 'scalar':
239 |         raise ValueError("address can only be scalar,"
240 |                          " ptr, global or local")
241 | 
242 |     return knowntype
243 |     if knowntype in TYPES:
244 |         return knowntype
245 |     else:
246 |         raise TypeError("Not a vaild KnownType")
247 | 
248 | 
249 | def annotate(func=None, **kw):
250 |     """A decorator to specify the types of a function. These types are injected
251 |     into the functions, `__annotations__` attribute.
252 | 
253 |     An example describes this best:
254 | 
255 |     @annotate(i='int', x='floatp', return_='float')
256 |     def f(i, x):
257 |         return x[i]*2.0
258 | 
259 |     One could also do:
260 | 
261 |     @annotate(i='int', floatp='x, y', return_='float')
262 |     def f(i, x, y):
263 |        return x[i]*y[i]
264 | 
265 |     """
266 |     data = {}
267 | 
268 |     if not kw:
269 |         def wrapper(func):
270 |             func.is_jit = True
271 |             return func
272 |     else:
273 |         data = kwtype_to_annotation(kw)
274 | 
275 |         def wrapper(func):
276 |             # For jitted functions, we should retain
277 |             # the is_jit attribute when we annotate the function.
278 |             func.is_jit = getattr(func, 'is_jit', False)
279 |             try:
280 |                 func.__annotations__ = data
281 |             except AttributeError:
282 |                 func.im_func.__annotations__ = data
283 |             return func
284 | 
285 |     if func is None:
286 |         return wrapper
287 |     else:
288 |         return wrapper(func)
289 | 
290 | 
291 | def _clean_name(name):
292 |     return 'return' if name == 'return_' else name
293 | 
294 | 
295 | def _get_type(type):
296 |     if isinstance(type, KnownType):
297 |         return type
298 |     elif type in TYPES:
299 |         return TYPES[type]
300 |     else:
301 |         msg = ('Unknown type {type}, not a KnownType and not one of '
302 |                'the pre-declared types.'.format(type=str(type)))
303 |         raise TypeError(msg)
304 | 
305 | 
306 | def kwtype_to_annotation(kw):
307 |     """Convert type to a KnownType"""
308 |     data = {}
309 | 
310 |     for name, type in kw.items():
311 |         if isinstance(type, str) and ',' in type:
312 |             for x in type.split(','):
313 |                 data[_clean_name(x.strip())] = _get_type(name)
314 |         else:
315 |             data[_clean_name(name)] = _get_type(type)
316 | 
317 |     return data
318 | 


--------------------------------------------------------------------------------
/compyle/utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import argparse
 3 | import atexit
 4 | from compyle.config import get_config
 5 | from compyle.profile import print_profile
 6 | 
 7 | 
 8 | def getsourcelines(obj):
 9 |     '''Given an object return the source code that defines it as a list of
10 |     lines along with the starting line.
11 |     '''
12 |     try:
13 |         return inspect.getsourcelines(obj)
14 |     except Exception:
15 |         if hasattr(obj, 'source'):
16 |             return obj.source.splitlines(True), 0
17 |         else:
18 |             raise
19 | 
20 | 
21 | def getsource(obj):
22 |     '''Given an object return the source that defines it.
23 |     '''
24 |     try:
25 |         return inspect.getsource(obj)
26 |     except Exception:
27 |         if hasattr(obj, 'source'):
28 |             return obj.source
29 |         else:
30 |             raise
31 | 
32 | 
33 | class ArgumentParser(argparse.ArgumentParser):
34 |     '''Standard argument parser for compyle applications.
35 |     Includes arguments for backend, openmp and use_double
36 |     '''
37 | 
38 |     def __init__(self, *args, **kwargs):
39 |         super().__init__(*args, **kwargs)
40 |         # setup standard arguments
41 |         self.add_argument(
42 |             '-b', '--backend', action='store', dest='backend', default='cython',
43 |             choices = ['cython', 'opencl', 'cuda'],
44 |             help='Choose the backend.'
45 |         )
46 |         self.add_argument(
47 |             '--openmp', action='store_true', dest='openmp', default=False,
48 |             help='Use OpenMP.'
49 |         )
50 |         self.add_argument(
51 |             '--use-double', action='store_true', dest='use_double',
52 |             default=False, help='Use double precision on the GPU.'
53 |         )
54 |         self.add_argument(
55 |             '--suppress-warnings', action='store_true',
56 |             dest='suppress_warnings',
57 |             default=False, help='Suppress warnings'
58 |         )
59 |         self.add_argument(
60 |             '--profile', action='store_true',
61 |             dest='profile',
62 |             default=False, help='Print profiling info'
63 |         )
64 |         self.profile_registered = False
65 | 
66 |     def _set_config_options(self, options):
67 |         get_config().use_openmp = options.openmp
68 |         get_config().use_double = options.use_double
69 |         get_config().suppress_warnings = options.suppress_warnings
70 |         if options.backend == 'opencl':
71 |             get_config().use_opencl = True
72 |         if options.backend == 'cuda':
73 |             get_config().use_cuda = True
74 |         if options.profile and not self.profile_registered:
75 |             get_config().profile = True
76 |             atexit.register(print_profile)
77 |             self.profile_registered = True
78 | 
79 |     def parse_args(self, *args, **kwargs):
80 |         options = super().parse_args(*args, **kwargs)
81 |         self._set_config_options(options)
82 |         return options
83 | 
84 |     def parse_known_args(self, *args, **kwargs):
85 |         options, unknown = super().parse_known_args(*args, **kwargs)
86 |         self._set_config_options(options)
87 |         return options, unknown
88 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = ComPyle
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-rtd-theme


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Compyle documentation build configuration file, created by
  5 | # sphinx-quickstart on Sun Dec  2 14:26:18 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | from os.path import join
 22 | # import sys
 23 | # sys.path.insert(0, os.path.abspath('.'))
 24 | 
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = ['sphinx.ext.autodoc',
 36 |     'sphinx.ext.mathjax',
 37 |     'sphinx.ext.viewcode']
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # The suffix(es) of source filenames.
 43 | # You can specify multiple suffix as a list of string:
 44 | #
 45 | # source_suffix = ['.rst', '.md']
 46 | source_suffix = '.rst'
 47 | 
 48 | # The master toctree document.
 49 | master_doc = 'index'
 50 | 
 51 | # General information about the project.
 52 | project = 'Compyle'
 53 | copyright = '2018-2021, PySPH Developers'
 54 | author = 'PySPH Developers'
 55 | 
 56 | # The version info for the project you're documenting, acts as replacement for
 57 | # |version| and |release|, also used in various other places throughout the
 58 | # built documents.
 59 | #
 60 | # The version info for the project you're documenting, acts as replacement for
 61 | # |version| and |release|, also used in various other places throughout the
 62 | # built documents.
 63 | #
 64 | _d = {}
 65 | fname = join(os.pardir, os.pardir, 'compyle', '__init__.py')
 66 | exec(compile(open(fname).read(), fname, 'exec'), _d)
 67 | version = release = _d['__version__']
 68 | 
 69 | # The language for content autogenerated by Sphinx. Refer to documentation
 70 | # for a list of supported languages.
 71 | #
 72 | # This is also used if you do content translation via gettext catalogs.
 73 | # Usually you set "language" from the command line for these cases.
 74 | language = 'en'
 75 | 
 76 | # List of patterns, relative to source directory, that match files and
 77 | # directories to ignore when looking for source files.
 78 | # This patterns also effect to html_static_path and html_extra_path
 79 | exclude_patterns = []
 80 | 
 81 | # The name of the Pygments (syntax highlighting) style to use.
 82 | pygments_style = 'sphinx'
 83 | 
 84 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 85 | todo_include_todos = False
 86 | 
 87 | 
 88 | # -- Options for HTML output ----------------------------------------------
 89 | 
 90 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 91 | # a list of builtin themes.
 92 | #
 93 | html_theme = 'sphinx_rtd_theme'
 94 | 
 95 | # Theme options are theme-specific and customize the look and feel of a theme
 96 | # further.  For a list of options available for each theme, see the
 97 | # documentation.
 98 | #
 99 | # html_theme_options = {}
100 | 
101 | # Add any paths that contain custom static files (such as style sheets) here,
102 | # relative to this directory. They are copied after the builtin static files,
103 | # so a file named "default.css" will overwrite the builtin "default.css".
104 | html_static_path = ['_static']
105 | 
106 | 
107 | # -- Options for HTMLHelp output ------------------------------------------
108 | 
109 | # Output file base name for HTML help builder.
110 | htmlhelp_basename = 'Compyledoc'
111 | 
112 | 
113 | # -- Options for LaTeX output ---------------------------------------------
114 | 
115 | latex_elements = {
116 |     # The paper size ('letterpaper' or 'a4paper').
117 |     #
118 |     # 'papersize': 'letterpaper',
119 | 
120 |     # The font size ('10pt', '11pt' or '12pt').
121 |     #
122 |     # 'pointsize': '10pt',
123 | 
124 |     # Additional stuff for the LaTeX preamble.
125 |     #
126 |     # 'preamble': '',
127 | 
128 |     # Latex figure (float) alignment
129 |     #
130 |     # 'figure_align': 'htbp',
131 | }
132 | 
133 | # Grouping the document tree into LaTeX files. List of tuples
134 | # (source start file, target name, title,
135 | #  author, documentclass [howto, manual, or own class]).
136 | latex_documents = [
137 |     (master_doc, 'Compyle.tex', 'Compyle Documentation',
138 |      'PySPH Developers', 'manual'),
139 | ]
140 | 
141 | 
142 | # -- Options for manual page output ---------------------------------------
143 | 
144 | # One entry per manual page. List of tuples
145 | # (source start file, name, description, authors, manual section).
146 | man_pages = [
147 |     (master_doc, 'compyle', 'Compyle Documentation',
148 |      [author], 1)
149 | ]
150 | 
151 | 
152 | # -- Options for Texinfo output -------------------------------------------
153 | 
154 | # Grouping the document tree into Texinfo files. List of tuples
155 | # (source start file, target name, title, author,
156 | #  dir menu entry, description, category)
157 | texinfo_documents = [
158 |     (master_doc, 'Compyle', 'Compyle Documentation',
159 |      author, 'Compyle', 'One line description of project.',
160 |      'Miscellaneous'),
161 | ]
162 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Compyle documentation master file, created by
 2 |    sphinx-quickstart on Sun Dec  2 14:26:18 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Compyle's documentation!
 7 | ===================================
 8 | 
 9 | Compyle allows users to execute a restricted subset of Python (almost similar
10 | to C) on a variety of HPC platforms. Currently we support multi-core CPU
11 | execution using Cython, and support GPU devices using OpenCL and CUDA.
12 | 
13 | You can try Compyle online on a `Google Colab notebook <https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing>`_.
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 |    :caption: Contents:
18 | 
19 |    overview.rst
20 |    installation.rst
21 |    details.rst
22 | 
23 | 
24 | Indices and tables
25 | ==================
26 | 
27 | * :ref:`genindex`
28 | * :ref:`modindex`
29 | * :ref:`search`
30 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ==============
  3 | 
  4 | ComPyle is itself pure Python but depends on numpy_ and requires either Cython_
  5 | or PyOpenCL_ or PyCUDA_ along with the respective backends of a C/C++ compiler,
  6 | OpenCL and CUDA. If you are only going to execute code on a CPU then all you
  7 | need is Cython_. The full list of requirements is shown in the
  8 | ``requirements.txt`` file on the repository.
  9 | 
 10 | You should be able to install ComPyle by doing::
 11 | 
 12 |   $ pip install compyle
 13 | 
 14 | 
 15 | Note that when executing code on a CPU, you will need to have a C/C++ compiler
 16 | that is compatible with your Python installation. In addition, if you need to
 17 | use OpenMP you will need to make sure your compiler is compatible with that.
 18 | Some additional information on this is included below.
 19 | 
 20 | Installing the bleeding edge
 21 | ----------------------------
 22 | 
 23 | Note that if you want the latest bleeding edge of compyle, clone the
 24 | repository and install compyle like so::
 25 | 
 26 |   $ git clone https://github.com/pypr/compyle
 27 |   $ cd compyle
 28 |   $ python setup.py develop
 29 |   # Or
 30 |   $ pip install -e .
 31 | 
 32 | If you just want the latest version and do not want to clone the repository,
 33 | you can also do::
 34 | 
 35 |   $ pip install https://github.com/pypr/compyle/zipball/main
 36 | 
 37 | 
 38 | .. _PyOpenCL: https://documen.tician.de/pyopencl/
 39 | .. _OpenCL: https://www.khronos.org/opencl/
 40 | .. _Cython: http://www.cython.org
 41 | .. _numpy: http://www.numpy.org
 42 | .. _PyCUDA: https://documen.tician.de/pycuda
 43 | .. _OpenMP: http://openmp.org/
 44 | .. _CuPy: https://cupy.chainer.org/
 45 | 
 46 | 
 47 | Setting up on GNU/Linux
 48 | -------------------------
 49 | 
 50 | This is usually very simple, just installing the standard gcc/g++ packages ought
 51 | to work. OpenMP_ is typically available but if it is not, it can be installed
 52 | with (on apt-compatible systems)::
 53 | 
 54 |     $ sudo apt-get install libgomp1
 55 | 
 56 | 
 57 | Installation with conda on MacOS
 58 | ---------------------------------
 59 | 
 60 | Recent conda_ packages make the process of setup very easy on MacOS assuming
 61 | that you have the `XCode command line utilities`_ installed. Please make sure
 62 | you install this.
 63 | 
 64 | For example with conda-forge_ the following creates a new Python 3.8
 65 | environment with compyle installed and working with both OpenMP and OpenCL::
 66 | 
 67 |   $ conda create -c conda-forge -n py38 python=3.8 numpy pyopencl
 68 |   $ conda activate py38 # or a suitable such invocation
 69 |   $ pip install compyle
 70 | 
 71 | Note that the above implicitly installs the ``llvm-openmp`` package in the
 72 | environment which works out of the box with clang and provides OpenMP support.
 73 | 
 74 | .. _conda: https://docs.conda.io/
 75 | .. _conda-forge: https://conda-forge.org/
 76 | .. _XCode command line utilities: http://stackoverflow.com/questions/12228382/after-install-xcode-where-is-clang
 77 | 
 78 | 
 79 | Possible issues on MacOS
 80 | --------------------------
 81 | 
 82 | Ensure that you have gcc or clang installed by installing XCode. See
 83 | installing `XCode command line utilities`_ if you installed XCode but can't
 84 | find clang or gcc.
 85 | 
 86 | If you are getting strange errors of the form::
 87 | 
 88 |   lang: warning: libstdc++ is deprecated; move to libc++ with a minimum deployment target of OS X 10.9 [-Wdeprecated]
 89 |   ld: library not found for -lstdc++
 90 |   clang: error: linker command failed with exit code 1 (use -v to see invocation)
 91 | 
 92 | Then try this (on a bash shell)::
 93 | 
 94 |   $ export MACOSX_DEPLOYMENT_TARGET=10.9
 95 | 
 96 | And run your command again (replace the above with a suitable line on other
 97 | shells). This is necessary because your Python was compiled with an older
 98 | deployment target and the current version of XCode that you have installed is
 99 | not compatible with that. By setting the environment variable you allow
100 | compyle to use a newer version. If this works, it is a good idea to set this
101 | in your default environment (``.bashrc`` for bash shells) so you do not have
102 | to do this every time. You may also do this in the compyle configuration file,
103 | see :ref:`config`.
104 | 
105 | 
106 | 
107 | OpenMP on MacOS
108 | ~~~~~~~~~~~~~~~~
109 | 
110 | These instructions are a bit old and only if you are not using conda as
111 | discussed above.
112 | 
113 | │The default clang compiler available on MacOS uses an LLVM backend and does
114 | not support OpenMP_ out of the box. There are two ways to support OpenMP. The
115 | first involves installing the OpenMP support for clang. This can be done with
116 | brew_ using::
117 | 
118 |   $ brew install libomp
119 | 
120 | `LLVM <https://formulae.brew.sh/formula/llvm>`_ can also be installed using
121 | brew_, likewise. Once this is done, you
122 | can use the following config_::
123 | 
124 |     import os
125 |     os.environ['CC'] = '/opt/homebrew/opt/llvm@15/bin/clang'
126 |     os.environ['CXX'] = '/opt/homebrew/opt/llvm@15/bin/clang++'
127 |     OMP_CFLAGS=['-I/opt/homebrew/opt/libomp/include', '-I/opt/homebrew/opt/llvm@15/include', '-Xclang', '-fopenmp']
128 |     OMP_LINK=['-L/opt/homebrew/opt/libomp/lib', '-L/opt/homebrew/opt/llvm@15/lib', '-lomp']
129 | 
130 | The above config assumes that you have installed `llvm@15`. You can change the
131 | config according to the version at the time of installing. Once that is done,
132 | it should "just work". If you get strange errors, you can also try
133 | setting the ``MACOSX_DEPLOYMENT_TARGET``  as shown in the previous section.
134 | 
135 | Another option is to install GCC for MacOS available on brew_ using ::
136 | 
137 |     $ brew install gcc
138 | 
139 | Once this is done, you need to use this as your default compiler. The ``gcc``
140 | formula on brew currently ships with gcc version 9. Therefore, you can
141 | tell Python to use the GCC installed by brew by setting::
142 | 
143 |     $ export CC=gcc-9
144 |     $ export CXX=g++-9
145 | 
146 | Note that you still do need to have the command-line-tools for XCode
147 | installed, otherwise the important header files are not available. See
148 | `how-to-install-xcode-command-line-tools
149 | <https://stackoverflow.com/questions/9329243/how-to-install-xcode-command-line-tools>`_
150 | for more details. You may also want to set these environment variables in your
151 | ``.bashrc`` so you don't have to do this every time. You may also do this in
152 | the compyle configuration file, see :ref:`config`.
153 | 
154 | Once you do this, compyle will automatically use this version of GCC and will
155 | also work with OpenMP. Note that on some preliminary benchmarks, GCC's OpenMP
156 | implementation seems about 10% or so faster than the LLVM version. Your
157 | mileage may vary.
158 | 
159 | .. _brew: http://brew.sh/
160 | 
161 | 
162 | Setting up on Windows
163 | ----------------------
164 | 
165 | Windows will work but you need to make sure you have the right compiler
166 | installed. See this page for the details of what you need installed.
167 | 
168 | https://wiki.python.org/moin/WindowsCompilers
169 | 
170 | OpenMP will work if you have this installed. For recent Python versions
171 | (>=3.5), install the `Microsoft Build Tools for Visual Studio 2019
172 | <https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2019>`_
173 | 
174 | 
175 | Setting up OpenCL/CUDA
176 | -----------------------
177 | 
178 | This is too involved a topic to discuss here, instead look at the appropriate
179 | documentation for PyOpenCL_ and PyCUDA_. Once those packages work correctly,
180 | you should be all set. Note that if you are only using OpenCL/CUDA you do not
181 | need to have Cython or a C/C++ compiler. Some features on CUDA require the use
182 | of the CuPy_ library.
183 | 
184 | If you want to use OpenCL support, you will need to install the ``pyopencl``
185 | package (``conda install -c conda-forge pyopencl`` or ``pip install
186 | pyopencl``). For CUDA Support, you will need to install ``pycuda`` and
187 | ``cupy``. Of course this assumes you have the required hardware for this.
188 | 
189 | 
190 | .. _config:
191 | 
192 | Using the configuration file
193 | -----------------------------
194 | 
195 | Instead of setting environment variables and build options on the shell you
196 | can have them setup using a simple configuration file.
197 | 
198 | The file is located in ``~/.compyle/config.py``. Here ``~`` is your home
199 | directory which on Linux is ``/home/username``, on MacOS ``/Users/username``
200 | and on Windows the location is likely ``\Users\username``. This file is
201 | executed and certain options may be set there.
202 | 
203 | For example if you wish to set the environment variables ``CC`` and ``CXX``
204 | you could do this in the ``config.py``::
205 | 
206 |   import os
207 | 
208 |   os.environ['CC'] = 'gcc-9'
209 |   os.environ['CXX'] = 'g++-9'
210 | 
211 | If you are using an atypical compiler like icc, Cray, or PGI, you can set
212 | these up here too. You may also setup custom OpenMP related flags. For
213 | example, on a Cray system you may do the following::
214 | 
215 |   OMP_CFLAGS = ['-homp']
216 |   OMP_LINK = ['-homp']
217 | 
218 | The ``OMP_CFLAGS`` and ``OMP_LINK`` parameters should be lists. Other packages
219 | like pyzoltan or pysph may also use this file for customizations.
220 | 


--------------------------------------------------------------------------------
/docs/source/overview.rst:
--------------------------------------------------------------------------------
  1 | An overview
  2 | ==============
  3 | 
  4 | Compyle allows users to execute a restricted subset of Python (almost similar
  5 | to C) on a variety of HPC platforms. Currently we support multi-core execution
  6 | using Cython, and OpenCL and CUDA for GPU devices.
  7 | 
  8 | An introduction to compyle in the context of writing a molecular dynamics
  9 | simulator is available in our `SciPy 2020 paper`_. You may also `try Compyle`_
 10 | online on a Google Colab notebook if you wish.
 11 | 
 12 | Users start with code implemented in a very restricted Python syntax, this
 13 | code is then automatically transpiled, compiled and executed to run on either
 14 | one CPU core, or multiple CPU cores or on a GPU. Compyle offers source-to-source
 15 | transpilation, making it a very convenient tool for writing HPC libraries.
 16 | 
 17 | Compyle is not a magic bullet,
 18 | 
 19 | - Do not expect that you may get a tremendous speedup.
 20 | - Performance optimization can be hard and is platform specific. What works on
 21 |   the CPU may not work on the GPU and vice-versa. Compyle does not do anything
 22 |   to make this aspect easier. All the issues with memory bandwidth, cache, false
 23 |   sharing etc. still remain. Differences between memory architectures of CPUs
 24 |   and GPUs are not avoided at all -- you still have to deal with it. But you can
 25 |   do so from the comfort of one simple programming language, Python.
 26 | - Compyle makes it easy to write everything in pure Python and generate the
 27 |   platform specific code from Python. It provides a low-level tool to make it
 28 |   easy for you to generate whatever appropriate code.
 29 | - The restrictions Compyle imposes make it easy for you to think about your
 30 |   algorithms in that context and thereby allow you to build functionality that
 31 |   exploits the hardware as you see fit.
 32 | - Compyle hides the details of the backend to the extent possible. You can write
 33 |   your code in Python, you can reuse your functions and decompose your problem
 34 |   to maximize reuse. Traditionally you would end up implementing some code in C,
 35 |   some in Python, some in OpenCL/CUDA, some in string fragments that you put
 36 |   together. Then you'd have to manage each of the runtimes yourself, worry about
 37 |   compilation etc. Compyle minimizes that pain.
 38 | - By being written in Python, we make it easy to assemble these building blocks
 39 |   together to do fairly sophisticated things relatively easily from the same
 40 |   language.
 41 | - Compyle is fairly simple and does source translation making it generally
 42 |   easier to understand and debug. The core code-base is less than 7k lines of
 43 |   code.
 44 | - Compyle has relatively simple dependencies, for CPU support it requires
 45 |   Cython_ and a C-compiler which supports OpenMP_. On the GPU you need either
 46 |   PyOpenCL_ or PyCUDA_. In addition it depends on NumPy_ and Mako_.
 47 | 
 48 | 
 49 | .. _Cython: http://www.cython.org
 50 | .. _OpenMP: http://openmp.org/
 51 | .. _PyOpenCL: https://documen.tician.de/pyopencl/
 52 | .. _PyCUDA: https://documen.tician.de/pycuda/
 53 | .. _OpenCL: https://www.khronos.org/opencl/
 54 | .. _NumPy: http://numpy.scipy.org
 55 | .. _Mako: https://pypi.python.org/pypi/Mako
 56 | .. _SciPy 2020 paper: http://conference.scipy.org/proceedings/scipy2020/compyle_pr_ab.html
 57 | .. _try Compyle: https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing
 58 | 
 59 | While Compyle is simple and modest, it is quite powerful and convenient. In
 60 | fact, Compyle has its origins in PySPH_ which is a powerful Python package
 61 | supporting SPH, molecular dynamics, and other particle-based algorithms. The
 62 | basic elements of Compyle are used in PySPH_ to automatically generate HPC code
 63 | from code written in pure Python and execute it on multiple cores, and on GPUs
 64 | without the user having to change any of their code. Compyle generalizes this
 65 | code generation to make it available as a general tool.
 66 | 
 67 | .. _PySPH: http://pysph.readthedocs.io
 68 | 
 69 | 
 70 | These are the restrictions on the Python language that Compyle poses:
 71 | 
 72 | - Functions with a C-syntax.
 73 | - Function arguments must be declared using either type annotation or with a
 74 |   decorator or with default arguments.
 75 | - No Python data structures, i.e. no lists, tuples, or dictionaries.
 76 | - Contiguous Numpy arrays are supported but must be one dimensional.
 77 | - No memory allocation is allowed inside these functions.
 78 | - On OpenCL no recursion is supported.
 79 | - All function calls must not use dotted names, i.e. don't use ``math.sin``,
 80 |   instead just use ``sin``. This is because we do not perform any kind of name
 81 |   mangling of the generated code to make it easier to read.
 82 | 
 83 | Basically think of it as good old FORTRAN.
 84 | 
 85 | Technically we do support structs internally (we use it heavily in PySPH_) but
 86 | this is not yet exposed at the high-level and is very likely to be supported
 87 | in the future.
 88 | 
 89 | 
 90 | Simple example
 91 | --------------
 92 | 
 93 | Enough talk, lets look at some code.  Here is a very simple example::
 94 | 
 95 |    from compyle.api import Elementwise, annotate, wrap, get_config
 96 |    import numpy as np
 97 | 
 98 |    @annotate(i='int', x='doublep', y='doublep', double='a,b')
 99 |    def axpb(i, x, y, a, b):
100 |        y[i] = a*sin(x[i]) + b
101 | 
102 |    x = np.linspace(0, 1, 10000)
103 |    y = np.zeros_like(x)
104 |    a = 2.0
105 |    b = 3.0
106 | 
107 |    backend = 'cython'
108 |    get_config().use_openmp = True
109 |    x, y = wrap(x, y, backend=backend)
110 |    e = Elementwise(axpb, backend=backend)
111 |    e(x, y, a, b)
112 | 
113 | This will execute the elementwise operation in parallel using OpenMP with
114 | Cython. The code is auto-generated, compiled and called for you transparently.
115 | The first time this runs, it will take a bit of time to compile everything but
116 | the next time, this is cached and will run much faster.
117 | 
118 | If you just change the ``backend = 'opencl'``, the same exact code will be
119 | executed using PyOpenCL_ and if you change the backend to ``'cuda'``, it will
120 | execute via CUDA without any other changes to your code. This is obviously a
121 | very trivial example, there are more complex examples available as well.
122 | 
123 | To see the source code that is automatically generated for the above
124 | elementwise operation example use::
125 | 
126 |   e.source
127 | 
128 | This will contain the sources that are generated based on the user code alone.
129 | To see all the sources created, use::
130 | 
131 |   e.all_source
132 | 
133 | A word of warning though that this can be fairly long especially on a GPU and
134 | for other kind of operations may actually include multiple GPU kernels. This
135 | is largely for reference and debugging.
136 | 
137 | 
138 | More examples
139 | --------------
140 | 
141 | More complex examples (but still fairly simple) are available in the `examples
142 | <https://github.com/pypr/compyle/tree/main/examples>`_ directory.
143 | 
144 | - `axpb.py <https://github.com/pypr/compyle/tree/main/examples/axpb.py>`_: the
145 |   above example but for openmp and opencl compared with serial showing that in
146 |   some cases serial is actually faster than parallel!
147 | 
148 | - `vm_elementwise.py
149 |   <https://github.com/pypr/compyle/tree/main/examples/vm_elementwise.py>`_:
150 |   shows a simple N-body code with two-dimensional point vortices. The code uses
151 |   a simple elementwise operation and works with OpenMP and OpenCL.
152 | 
153 | - `vm_numba.py
154 |   <https://github.com/pypr/compyle/tree/main/examples/vm_numba.py>`_: shows
155 |   the same code written in numba for comparison. In our benchmarks, Compyle is
156 |   actually faster even in serial and in parallel it can be much faster when you
157 |   use all cores.
158 | 
159 | - `vm_kernel.py
160 |   <https://github.com/pypr/compyle/tree/main/examples/vm_kernel.py>`_: shows
161 |   how one can write a low-level OpenCL kernel in pure Python and use that. This
162 |   also shows how you can allocate and use local (or shared) memory which is
163 |   often very important for performance on GPGPUs. This code will only run via
164 |   PyOpenCL.
165 | 
166 | - `bench_vm.py
167 |   <https://github.com/pypr/compyle/tree/main/examples/bench_vm.py>`_:
168 |   Benchmarks the various vortex method results above for a comparison with
169 |   numba.
170 | 
171 | 
172 | Read on for more details about Compyle.
173 | 
174 | 
175 | Citing Compyle
176 | ---------------
177 | 
178 | If you find Compyle useful or just want to read a paper on it, please see:
179 | 
180 | - Aditya Bhosale and Prabhu Ramachandran, "Compyle: Python once, parallel
181 |   computing anywhere", Proceedings of the 19th Python in Science Conference
182 |   (SciPy 2020), July, 2020, Austin, Texas, USA.
183 |   `doi:10.25080/Majora-342d178e-005
184 |   <https://doi.org/10.25080/Majora-342d178e-005>`_ **Won best poster** `SciPy
185 |   2020 Paper`_.
186 | 
187 | Accompanying the paper is the
188 | 
189 |  - `Compyle poster presentation <https://docs.google.com/presentation/d/1LS9XO5pQXz8G5d27RP5oWLFxUA-Fr5OvfVUGsgg86TQ/edit#slide=id.p>`_
190 |  - and the `Compyle poster video <https://www.youtube.com/watch?v=h2YpPPL6nEY>`_
191 | 


--------------------------------------------------------------------------------
/examples/axpb.py:
--------------------------------------------------------------------------------
 1 | from compyle.api import Elementwise, annotate, wrap, get_config
 2 | import numpy as np
 3 | from numpy import sin
 4 | import time
 5 | 
 6 | 
 7 | @annotate(i='int', doublep='x, y, a, b')
 8 | def axpb(i, x, y, a, b):
 9 |     y[i] = a[i]*sin(x[i]) + b[i]
10 | 
11 | 
12 | def setup(backend, openmp=False):
13 |     get_config().use_openmp = openmp
14 |     e = Elementwise(axpb, backend=backend)
15 |     return e
16 | 
17 | 
18 | def data(n, backend):
19 |     x = np.linspace(0, 1, n)
20 |     y = np.zeros_like(x)
21 |     a = x*x
22 |     b = np.sqrt(x + 1)
23 |     return wrap(x, y, a, b, backend=backend)
24 | 
25 | 
26 | def compare(m=20):
27 |     N = 2**np.arange(1, 25)
28 |     backends = [['cython', False], ['cython', True]]
29 |     try:
30 |         import pyopencl
31 |         backends.append(['opencl', False])
32 |     except ImportError as e:
33 |         pass
34 | 
35 |     try:
36 |         import pycuda
37 |         backends.append(['cuda', False])
38 |     except ImportError as e:
39 |         pass
40 | 
41 |     timing = []
42 |     for backend in backends:
43 |         e = setup(*backend)
44 |         times = []
45 |         for n in N:
46 |             args = data(n, backend[0])
47 |             t = []
48 |             for j in range(m):
49 |                 start = time.time()
50 |                 e(*args)
51 |                 secs = time.time() - start
52 |                 t.append(secs)
53 |             times.append(np.average(t))
54 |         timing.append(times)
55 | 
56 |     return N, backends, np.array(timing)
57 | 
58 | 
59 | def plot_timing(n, timing, backends):
60 |     from matplotlib import pyplot as plt
61 |     backends[1][0] = 'openmp'
62 |     for t, backend in zip(timing[1:], backends[1:]):
63 |         plt.semilogx(n, timing[0]/t, label='serial/' + backend[0], marker='+')
64 |     plt.grid()
65 |     plt.xlabel('N')
66 |     plt.ylabel('Speedup')
67 |     plt.legend()
68 |     plt.show()
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     n, backends, times = compare()
73 |     plot_timing(n, times, backends)
74 | 


--------------------------------------------------------------------------------
/examples/axpb_jit.py:
--------------------------------------------------------------------------------
 1 | """Shows the use of annotate without any type information.
 2 | The type information is extracted from the arguments passed
 3 | and the function is annotated and compiled at runtime.
 4 | """
 5 | 
 6 | from compyle.api import annotate, Elementwise, wrap, get_config, declare
 7 | import numpy as np
 8 | from numpy import sin
 9 | 
10 | 
11 | @annotate
12 | def axpb(i, x, y, a, b):
13 |     xi = x[i]
14 |     y[i] = a * sin(xi) + b
15 | 
16 | 
17 | x = np.linspace(0, 1, 10000)
18 | y = np.zeros_like(x)
19 | a = 2.0
20 | b = 3.0
21 | 
22 | backend = 'opencl'
23 | get_config().use_openmp = True
24 | x, y = wrap(x, y, backend=backend)
25 | e = Elementwise(axpb, backend=backend)
26 | e(x, y, a, b)
27 | 


--------------------------------------------------------------------------------
/examples/bench_vm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | 
 4 | from compyle.config import get_config
 5 | import vm_numba as VN
 6 | import vm_elementwise as VE
 7 | import vm_kernel as VK
 8 | 
 9 | 
10 | def setup(mod, backend, openmp):
11 |     get_config().use_openmp = openmp
12 |     if mod == VE:
13 |         e = VE.Elementwise(VE.velocity, backend)
14 |     elif mod == VN:
15 |         e = VN.velocity
16 |     elif mod == VK:
17 |         e = VK.Kernel(VK.velocity, backend)
18 | 
19 |     return e
20 | 
21 | 
22 | def data(n, mod, backend):
23 |     if mod == VN:
24 |         args = mod.make_vortices(n)
25 |     else:
26 |         args = mod.make_vortices(n, backend)
27 |     return args
28 | 
29 | 
30 | def compare(m=5):
31 |     # Warm up the jit to prevent the timing from going off for the first point.
32 |     VN.velocity(*VN.make_vortices(100))
33 |     N = np.array([10, 50, 100, 200, 500, 1000, 2000, 4000, 6000,
34 |                   8000, 10000, 15000, 20000])
35 |     backends = [(VN, '', False), (VE, 'cython', False), (VE, 'cython', True),
36 |                 (VE, 'opencl', False), (VK, 'opencl', False)]
37 |     timing = []
38 |     for backend in backends:
39 |         e = setup(*backend)
40 |         times = []
41 |         for n in N:
42 |             args = data(n, backend[0], backend[1])
43 |             t = []
44 |             for j in range(m):
45 |                 start = time.time()
46 |                 e(*args)
47 |                 t.append(time.time() - start)
48 |             times.append(np.min(t))
49 |         timing.append(times)
50 | 
51 |     return N, np.array(timing)
52 | 
53 | 
54 | def plot_timing(n, timing):
55 |     from matplotlib import pyplot as plt
56 |     plt.plot(n, timing[0]/timing[1], label='numba/cython', marker='+')
57 |     plt.plot(n, timing[0]/timing[2], label='numba/openmp', marker='+')
58 |     plt.plot(n, timing[0]/timing[3], label='numba/opencl', marker='+')
59 |     plt.plot(n, timing[0]/timing[4], label='numba/opencl local', marker='+')
60 |     plt.grid()
61 |     plt.xlabel('N')
62 |     plt.ylabel('Speedup')
63 |     plt.legend()
64 |     plt.figure()
65 |     gflop = 12*n*n/1e9
66 |     plt.plot(n, gflop/timing[0], label='numba', marker='+')
67 |     plt.plot(n, gflop/timing[1], label='Cython', marker='+')
68 |     plt.plot(n, gflop/timing[2], label='OpenMP', marker='+')
69 |     plt.plot(n, gflop/timing[3], label='OpenCL', marker='+')
70 |     plt.plot(n, gflop/timing[4], label='OpenCL Local', marker='+')
71 |     plt.grid()
72 |     plt.xlabel('N')
73 |     plt.ylabel('GFLOPS')
74 |     plt.legend()
75 |     plt.show()
76 |     best = timing[:, -1].min()
77 |     print("Fastest time for n=", n[-1], best, "secs")
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     n, t = compare()
82 |     plot_timing(n, t)
83 | 


--------------------------------------------------------------------------------
/examples/julia_set.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from math import cos, sin
  3 | import numpy as np
  4 | from compyle.api import annotate, elementwise, get_config, wrap
  5 | 
  6 | 
  7 | @annotate
  8 | def julia(i, z, xa, ya, t):
  9 |     c0 = 0.7885*cos(t)
 10 |     c1 = 0.7885*sin(t)
 11 |     x = xa[i]
 12 |     y = ya[i]
 13 |     iters = 0
 14 |     while (x*x + y*y) < 400 and iters < 50:
 15 |         xn = x*x - y*y + c0
 16 |         y = x*y*2.0 + c1
 17 |         x = xn
 18 |         iters += 1
 19 |     z[i] = 1.0 - iters*0.02
 20 | 
 21 | 
 22 | def timer(x, y, z):
 23 |     s = time.perf_counter()
 24 |     n = 2000
 25 |     dt = 4*np.pi/n
 26 |     for i in range(n):
 27 |         julia(z, x, y, -dt*i)
 28 |     print("Took", time.perf_counter() - s, "seconds")
 29 | 
 30 | 
 31 | def plot(x, y, z, nx, ny):
 32 |     from mayavi import mlab
 33 |     mlab.figure(size=(600, 600))
 34 |     xmin, xmax = np.min(x.data), np.max(x.data)
 35 |     ymin, ymax = np.min(y.data), np.max(y.data)
 36 |     s = mlab.imshow(z.data.reshape((nx, ny)),
 37 |                     extent=[xmin, xmax, ymin, ymax, 0, 0],
 38 |                     colormap='jet')
 39 |     s.scene.z_plus_view()
 40 |     n = 2000
 41 |     dt = 4*np.pi/n
 42 |     for i in range(n):
 43 |         julia(z, x, y, -dt*i)
 44 |         z.pull()
 45 |         s.mlab_source.scalars = z.data.reshape((nx, ny))
 46 |         if i % 3 == 0:
 47 |             mlab.process_ui_events()
 48 |     mlab.show()
 49 | 
 50 | 
 51 | def save(x, y, z, gif_path='julia_set.gif'):
 52 |     import imageio as iio
 53 |     n = 250
 54 |     dt = 2*np.pi/n
 55 |     print(f"Writing {gif_path}")
 56 |     with iio.get_writer(gif_path, mode='I') as writer:
 57 |         for i in range(n):
 58 |             julia(z, x, y, -dt*i)
 59 |             z.pull()
 60 |             writer.append_data(
 61 |                 (z.data.reshape((nx, ny))*255).astype(np.uint8)
 62 |             )
 63 |             print(f"{i}/{n}", end='\r')
 64 |         print("Done.  ")
 65 |     try:
 66 |         from pygifsicle import optimize
 67 |         optimize(gif_path)
 68 |     except ImportError:
 69 |         print("Install pygifsicle for an optimized GIF")
 70 | 
 71 | 
 72 | if __name__ == '__main__':
 73 |     from compyle.utils import ArgumentParser
 74 |     p = ArgumentParser()
 75 |     p.add_argument('-n', action='store', type=int, dest='n',
 76 |                    default=512, help='Number of grid points in y.')
 77 |     p.add_argument(
 78 |         '--show', action='store_true', dest='show',
 79 |         default=False, help='Show animation (requires mayavi)'
 80 |     )
 81 |     p.add_argument(
 82 |         '--gif', action='store_true',
 83 |         default=False, help='Make a gif animation (requires imageio)'
 84 |     )
 85 |     cfg = get_config()
 86 |     cfg.suppress_warnings = True
 87 |     o = p.parse_args()
 88 |     julia = elementwise(julia)
 89 |     ny = o.n
 90 |     nx = int(4*ny//3)
 91 |     x, y = np.mgrid[-2:2:nx*1j, -1.5:1.5:ny*1j]
 92 |     x, y = x.ravel(), y.ravel()
 93 |     z = np.zeros_like(x)
 94 |     x, y, z = wrap(x, y, z)
 95 | 
 96 |     timer(x, y, z)
 97 | 
 98 |     if o.show:
 99 |         plot(x, y, z, nx, ny)
100 |     if o.gif:
101 |         save(x, y, z)
102 | 


--------------------------------------------------------------------------------
/examples/laplace.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import pi
  3 | import time
  4 | 
  5 | from compyle.config import get_config
  6 | from compyle.api import declare, annotate
  7 | from compyle.parallel import Elementwise
  8 | from compyle.array import get_backend, wrap
  9 | from compyle.low_level import cast
 10 | 
 11 | import compyle.array as carr
 12 | 
 13 | 
 14 | def bc(x, y):
 15 |     return np.sin(np.pi * (x + y))
 16 | 
 17 | 
 18 | @annotate
 19 | def laplace_step(i, u, res, err, nx, ny, dx2, dy2, dnr_inv):
 20 |     xid = cast(i % nx, "int")
 21 |     yid = cast(i / nx, "int")
 22 | 
 23 |     if xid == 0 or xid == nx - 1 or yid == 0 or yid == ny - 1:
 24 |         return
 25 | 
 26 |     res[i] = ((u[i - 1] + u[i + 1]) * dx2 +
 27 |               (u[i - nx] + u[i + nx]) * dy2) * dnr_inv
 28 | 
 29 |     diff = res[i] - u[i]
 30 | 
 31 |     err[i] = diff * diff
 32 | 
 33 | 
 34 | class Grid(object):
 35 |     def __init__(self, nx=10, ny=10, xmin=0., xmax=1.,
 36 |                  ymin=0., ymax=1., bc=lambda x: 0, backend=None):
 37 |         self.backend = get_backend(backend)
 38 |         self.xmin, self.xmax, self.ymin, self.ymax = xmin, xmax, ymin, ymax
 39 |         self.nx, self.ny = nx, ny
 40 |         self.dx = (xmax - xmin) / (nx - 1)
 41 |         self.dy = (ymax - ymin) / (ny - 1)
 42 |         self.x = np.arange(self.xmin, self.xmax + self.dx * 0.5, self.dx)
 43 |         self.y = np.arange(self.ymin, self.ymax + self.dy * 0.5, self.dy)
 44 |         self.bc = bc
 45 |         self.setup()
 46 | 
 47 |     def setup(self):
 48 |         u_host = np.zeros((self.nx, self.ny)).astype(np.float32)
 49 | 
 50 |         u_host[0, :] = self.bc(self.xmin, self.y)
 51 |         u_host[-1, :] = self.bc(self.xmax, self.y)
 52 |         u_host[:, 0] = self.bc(self.x, self.ymin)
 53 |         u_host[:, -1] = self.bc(self.x, self.ymax)
 54 | 
 55 |         self.u = wrap(u_host.flatten(), backend=self.backend)
 56 |         self.err = carr.zeros_like(self.u)
 57 | 
 58 |     def get(self):
 59 |         u_host = self.u.get()
 60 |         return np.resize(u_host, (self.nx, self.ny))
 61 | 
 62 |     def compute_err(self):
 63 |         return np.sqrt(carr.dot(self.err, self.err))
 64 | 
 65 |     def plot(self):
 66 |         import matplotlib.pyplot as plt
 67 |         plt.imshow(self.get())
 68 |         plt.show()
 69 | 
 70 | 
 71 | class LaplaceSolver(object):
 72 |     def __init__(self, grid, backend=None):
 73 |         self.grid = grid
 74 |         self.backend = get_backend(backend)
 75 |         self.step_method = Elementwise(laplace_step, backend=self.backend)
 76 |         self.res = self.grid.u.copy()
 77 | 
 78 |     def solve(self, max_iter=None, eps=1.0e-8):
 79 |         err = np.inf
 80 | 
 81 |         g = self.grid
 82 | 
 83 |         dx2 = g.dx ** 2
 84 |         dy2 = g.dy ** 2
 85 |         dnr_inv = 0.5 / (dx2 + dy2)
 86 | 
 87 |         count = 0
 88 | 
 89 |         while err > eps:
 90 |             if max_iter and count >= max_iter:
 91 |                 return err, count
 92 |             self.step_method(g.u, self.res, g.err, g.nx, g.ny,
 93 |                              dx2, dy2, dnr_inv)
 94 |             err = g.compute_err()
 95 | 
 96 |             tmp = g.u
 97 |             g.u = self.res
 98 |             self.res = tmp
 99 | 
100 |             count += 1
101 | 
102 |         return err, count
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     from compyle.utils import ArgumentParser
107 |     p = ArgumentParser()
108 |     p.add_argument('--nx', action='store', type=int, dest='nx',
109 |                    default=100, help='Number of grid points in x.')
110 |     p.add_argument('--ny', action='store', type=int, dest='ny',
111 |                    default=100, help='Number of grid points in y.')
112 |     p.add_argument(
113 |         '--show', action='store_true', dest='show',
114 |         default=False, help='Show plot at the end of simulation'
115 |     )
116 |     o = p.parse_args()
117 | 
118 |     grid = Grid(nx=o.nx, ny=o.ny, bc=bc, backend=o.backend)
119 | 
120 |     solver = LaplaceSolver(grid, backend=o.backend)
121 | 
122 |     start = time.time()
123 |     err, count = solver.solve(eps=1e-6)
124 |     end = time.time()
125 | 
126 |     print("Number of iterations = %s" % count)
127 |     print("Time taken = %g secs" % (end - start))
128 | 
129 |     if o.show:
130 |         solver.grid.plot()
131 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/compare_results.py:
--------------------------------------------------------------------------------
 1 | from hoomd_periodic import simulate
 2 | from md_nnps_periodic import MDNNPSSolverPeriodic
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | def run_simulations(num_particles, tf, dt):
 7 |     # run hoomd simulation
 8 |     simulate(num_particles, dt, tf, log=True)
 9 | 
10 |     # run compyle simulation
11 |     solver = MDNNPSSolverPeriodic(num_particles)
12 |     solver.solve(tf, dt, log_output=True)
13 |     solver.write_log('compyle-output.log')
14 | 
15 | 
16 | def plot_props(hoomd_fname, comp_fname):
17 |     data_hoomd = np.genfromtxt(fname=hoomd_fname, skip_header=True)
18 |     data_compyle = np.genfromtxt(fname=comp_fname)
19 | 
20 | 
21 |     plt.plot(data_hoomd[:,0], data_hoomd[:,1], label="HooMD")
22 |     plt.plot(data_hoomd[:,0], data_compyle[:,1], label="Compyle")
23 |     plt.xlabel("Timestep")
24 |     plt.ylabel("Potential Energy")
25 |     plt.legend()
26 |     plt.savefig("hoomd_pe.png", dpi=300)
27 | 
28 |     plt.clf()
29 | 
30 |     plt.plot(data_hoomd[:,0], data_hoomd[:,2], label="HooMD")
31 |     plt.plot(data_hoomd[:,0], data_compyle[:,2], label="Compyle")
32 |     plt.xlabel("Timestep")
33 |     plt.ylabel("Kinetic Energy")
34 |     plt.legend()
35 |     plt.savefig("hoomd_ke.png", dpi=300)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     run_simulations(2000, 200, 0.02)
40 |     plot_props('hoomd-output.log', 'compyle-output.log')
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/hoomd_periodic.py:
--------------------------------------------------------------------------------
 1 | import hoomd
 2 | import hoomd.md
 3 | import numpy as np
 4 | import time
 5 | 
 6 | 
 7 | def setup_positions(num_particles, dx):
 8 |     ndim = np.ceil(num_particles ** (1 / 3.))
 9 |     dim_length = ndim * dx
10 | 
11 |     xmax = 3 * (1 + round(dim_length * 1.5 / 3.))
12 |     ymax = 3 * (1 + round(dim_length * 1.5 / 3.))
13 |     zmax = 3 * (1 + round(dim_length * 1.5 / 3.))
14 | 
15 |     print(dim_length, xmax)
16 | 
17 |     xmin_eff = (xmax - dim_length) / 2.
18 |     xmax_eff = (xmax + dim_length) / 2.
19 | 
20 |     x, y, z = np.mgrid[xmin_eff:xmax_eff:dx, xmin_eff:xmax_eff:dx,
21 |                        xmin_eff:xmax_eff:dx]
22 |     x = x.ravel().astype(np.float32)[:num_particles]
23 |     y = y.ravel().astype(np.float32)[:num_particles]
24 |     z = z.ravel().astype(np.float32)[:num_particles]
25 |     return x, y, z, xmax
26 | 
27 | 
28 | def simulate(num_particles, dt, tf, profile=False, log=False):
29 |     x, y, z, L = setup_positions(num_particles, 2.)
30 |     positions = np.array((x, y, z)).T
31 |     hoomd.context.initialize("")
32 | 
33 |     snapshot = hoomd.data.make_snapshot(N=len(positions),
34 |                                         box=hoomd.data.boxdim(
35 |                                             Lx=L, Ly=L, Lz=L),
36 |                                         particle_types=['A'],
37 |                                         )
38 |     # need to get automated positions...
39 |     snapshot.particles.position[:] = positions - 0.5 * L
40 | 
41 |     snapshot.particles.typeid[:] = 0
42 | 
43 |     hoomd.init.read_snapshot(snapshot)
44 | 
45 |     nl = hoomd.md.nlist.cell(r_buff=0)
46 |     lj = hoomd.md.pair.lj(r_cut=3.0, nlist=nl)
47 |     lj.pair_coeff.set('A', 'A', epsilon=1.0, sigma=1.0)
48 | 
49 |     if log:
50 |         hoomd.analyze.log(filename="hoomd-output.log",
51 |                           quantities=['potential_energy', 'kinetic_energy'],
52 |                           period=100,
53 |                           overwrite=True)
54 | 
55 |     # Create integrator and forces
56 |     hoomd.md.integrate.mode_standard(dt=dt)
57 |     hoomd.md.integrate.nve(group=hoomd.group.all())
58 | 
59 |     nsteps = int(tf // dt)
60 |     start = time.time()
61 |     hoomd.run(nsteps, profile=profile)
62 |     end = time.time()
63 |     return end - start
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     import sys
68 |     print(simulate(int(sys.argv[1]), 0.02, 200., profile=True, log=True))
69 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/md_nnps.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import pi
  3 | import time
  4 | 
  5 | from compyle.config import get_config
  6 | from compyle.api import declare, annotate
  7 | from compyle.parallel import Elementwise, Reduction
  8 | from compyle.array import get_backend, wrap
  9 | import compyle.array as carr
 10 | 
 11 | from nnps import NNPSCountingSort, NNPSRadixSort
 12 | from md_simple import integrate_step1, integrate_step2, \
 13 |         boundary_condition, MDSolverBase
 14 | 
 15 | 
 16 | @annotate
 17 | def calculate_force(i, x, y, z, fx, fy, fz, pe, nbr_starts, nbr_lengths, nbrs):
 18 |     start_idx = nbr_starts[i]
 19 |     length = nbr_lengths[i]
 20 |     for k in range(start_idx, start_idx + length):
 21 |         j = nbrs[k]
 22 |         if i == j:
 23 |             continue
 24 |         xij = x[i] - x[j]
 25 |         yij = y[i] - y[j]
 26 |         zij = z[i] - z[j]
 27 |         rij2 = xij * xij + yij * yij + zij * zij
 28 |         irij2 = 1.0 / rij2
 29 |         irij6 = irij2 * irij2 * irij2
 30 |         irij12 = irij6 * irij6
 31 |         pe[i] += (2 * (irij12 - irij6))
 32 |         f_base = 24 * irij2 * (2 * irij12 - irij6)
 33 | 
 34 |         fx[i] += f_base * xij
 35 |         fy[i] += f_base * yij
 36 |         fz[i] += f_base * zij
 37 | 
 38 | 
 39 | @annotate
 40 | def step_method1(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax,
 41 |                  ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths,
 42 |                  nbrs):
 43 |     integrate_step1(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz)
 44 |     boundary_condition(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax,
 45 |                        ymin, ymax, zmin, zmax)
 46 | 
 47 | 
 48 | @annotate
 49 | def step_method2(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax,
 50 |                  ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths,
 51 |                  nbrs):
 52 |     calculate_force(i, x, y, z, fx, fy, fz, pe, nbr_starts, nbr_lengths, nbrs)
 53 |     integrate_step2(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz)
 54 | 
 55 | 
 56 | class MDNNPSSolver(MDSolverBase):
 57 |     def __init__(self, num_particles, x=None, y=None, z=None,
 58 |                  vx=None, vy=None, vz=None,
 59 |                  xmax=100., ymax=100., zmax=100., dx=2., init_T=0.,
 60 |                  backend=None, use_count_sort=False):
 61 |         super().__init__(num_particles, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz,
 62 |                          xmax=xmax, ymax=ymax, zmax=zmax, dx=dx, init_T=init_T,
 63 |                          backend=backend)
 64 |         self.nnps_algorithm = NNPSCountingSort \
 65 |             if use_count_sort else NNPSRadixSort
 66 |         self.nnps = self.nnps_algorithm(self.x, self.y, self.z, 3., 0.01,
 67 |                                         self.xmax, self.ymax, self.zmax,
 68 |                                         backend=self.backend)
 69 |         self.init_forces = Elementwise(calculate_force, backend=self.backend)
 70 |         self.step1 = Elementwise(step_method1, backend=self.backend)
 71 |         self.step2 = Elementwise(step_method2, backend=self.backend)
 72 | 
 73 |     def solve(self, t, dt, log_output=False):
 74 |         num_steps = int(t // dt)
 75 |         self.nnps.build()
 76 |         self.nnps.get_neighbors()
 77 |         self.init_forces(self.x, self.y, self.z, self.fx, self.fy, self.fz,
 78 |                          self.pe, self.nnps.nbr_starts,
 79 |                          self.nnps.nbr_lengths, self.nnps.nbrs)
 80 |         for i in range(num_steps):
 81 |             self.step1(self.x, self.y, self.z, self.vx, self.vy, self.vz,
 82 |                        self.fx, self.fy, self.fz,
 83 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
 84 |                        self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts,
 85 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
 86 |             self.nnps.build()
 87 |             self.nnps.get_neighbors()
 88 |             self.step2(self.x, self.y, self.z, self.vx, self.vy, self.vz,
 89 |                        self.fx, self.fy, self.fz,
 90 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
 91 |                        self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts,
 92 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
 93 | 
 94 |             if i % 100 == 0:
 95 |                 self.post_step(i, log_output=log_output)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     from compyle.utils import ArgumentParser
100 |     p = ArgumentParser()
101 |     p.add_argument(
102 |         '--use-count-sort', action='store_true', dest='use_count_sort',
103 |         default=False, help='Use count sort instead of radix sort'
104 |     )
105 |     p.add_argument(
106 |         '--show', action='store_true', dest='show',
107 |         default=False, help='Show plot'
108 |     )
109 |     p.add_argument(
110 |         '--log-output', action='store_true', dest='log_output',
111 |         default=False, help='Log output'
112 |     )
113 | 
114 | 
115 |     p.add_argument('-n', action='store', type=int, dest='n',
116 |                    default=100, help='Number of particles')
117 | 
118 |     p.add_argument('--tf', action='store', type=float, dest='t',
119 |                    default=40., help='Final time')
120 | 
121 |     p.add_argument('--dt', action='store', type=float, dest='dt',
122 |                    default=0.02, help='Time step')
123 | 
124 |     o = p.parse_args()
125 | 
126 |     solver = MDNNPSSolver(
127 |         o.n,
128 |         backend=o.backend,
129 |         use_count_sort=o.use_count_sort)
130 | 
131 |     start = time.time()
132 |     solver.solve(o.t, o.dt, log_output=o.log_output)
133 |     end = time.time()
134 |     print("Time taken for N = %i is %g secs" % (o.n, (end - start)))
135 |     if o.log_output:
136 |         solver.write_log('nnps_log.log')
137 |     if o.show:
138 |         solver.pull()
139 |         solver.plot()
140 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/md_nnps_periodic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import pi
  3 | import time
  4 | 
  5 | from compyle.config import get_config
  6 | from compyle.api import declare, annotate
  7 | from compyle.parallel import Elementwise, Reduction
  8 | from compyle.array import get_backend, wrap
  9 | from compyle.low_level import cast
 10 | import compyle.array as carr
 11 | 
 12 | from nnps import NNPSCountingSortPeriodic, NNPSRadixSortPeriodic
 13 | from md_simple import integrate_step1, integrate_step2, MDSolverBase
 14 | 
 15 | 
 16 | @annotate
 17 | def calculate_force(i, x, y, z, xmax, ymax, zmax, fx, fy, fz, pe,
 18 |                     nbr_starts, nbr_lengths, nbrs):
 19 |     start_idx = nbr_starts[i]
 20 |     length = nbr_lengths[i]
 21 |     halfx = 0.5 * xmax
 22 |     halfy = 0.5 * ymax
 23 |     halfz = 0.5 * zmax
 24 |     for k in range(start_idx, start_idx + length):
 25 |         j = nbrs[k]
 26 |         if i == j:
 27 |             continue
 28 |         xij = x[i] - x[j]
 29 |         yij = y[i] - y[j]
 30 |         zij = z[i] - z[j]
 31 |         signx = 1 if xij > 0 else -1
 32 |         signy = 1 if yij > 0 else -1
 33 |         signz = 1 if zij > 0 else -1
 34 |         xij = xij if abs(xij) < halfx else xij - signx * xmax
 35 |         yij = yij if abs(yij) < halfy else yij - signy * ymax
 36 |         zij = zij if abs(zij) < halfz else zij - signz * zmax
 37 |         rij2 = xij * xij + yij * yij + zij * zij
 38 |         irij2 = 1.0 / rij2
 39 |         irij6 = irij2 * irij2 * irij2
 40 |         irij12 = irij6 * irij6
 41 |         pe[i] += (2 * (irij12 - irij6))
 42 |         f_base = 24 * irij2 * (2 * irij12 - irij6)
 43 | 
 44 |         fx[i] += f_base * xij
 45 |         fy[i] += f_base * yij
 46 |         fz[i] += f_base * zij
 47 | 
 48 | 
 49 | @annotate
 50 | def step_method1(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax,
 51 |                  ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths,
 52 |                  nbrs):
 53 |     integrate_step1(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz)
 54 |     boundary_condition(i, x, y, z, fx, fy, fz, pe, xmin, xmax,
 55 |                        ymin, ymax, zmin, zmax)
 56 | 
 57 | 
 58 | @annotate
 59 | def step_method2(i, x, y, z, vx, vy, vz, fx, fy, fz, pe, xmin, xmax,
 60 |                  ymin, ymax, zmin, zmax, m, dt, nbr_starts, nbr_lengths,
 61 |                  nbrs):
 62 |     calculate_force(i, x, y, z, xmax, ymax, zmax, fx, fy, fz, pe,
 63 |                     nbr_starts, nbr_lengths, nbrs)
 64 |     integrate_step2(i, m, dt, x, y, z, vx, vy, vz, fx, fy, fz)
 65 | 
 66 | 
 67 | @annotate
 68 | def boundary_condition(i, x, y, z, fx, fy, fz, pe, xmin, xmax, ymin, ymax,
 69 |                        zmin, zmax):
 70 |     fx[i] = 0.
 71 |     fy[i] = 0.
 72 |     fz[i] = 0.
 73 |     pe[i] = 0.
 74 | 
 75 |     xwidth = xmax - xmin
 76 |     ywidth = ymax - ymin
 77 |     zwidth = zmax - zmin
 78 | 
 79 |     xoffset = cast(floor(x[i] / xmax), "int")
 80 |     yoffset = cast(floor(y[i] / ymax), "int")
 81 |     zoffset = cast(floor(z[i] / zmax), "int")
 82 | 
 83 |     x[i] -= xoffset * xwidth
 84 |     y[i] -= yoffset * ywidth
 85 |     z[i] -= zoffset * zwidth
 86 | 
 87 | 
 88 | class MDNNPSSolverPeriodic(MDSolverBase):
 89 |     def __init__(self, num_particles, x=None, y=None, z=None,
 90 |                  vx=None, vy=None, vz=None,
 91 |                  xmax=100., ymax=100., zmax=100., dx=2., init_T=0.,
 92 |                  backend=None, use_count_sort=False):
 93 |         super().__init__(num_particles, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz,
 94 |                          xmax=xmax, ymax=ymax, zmax=zmax, dx=dx, init_T=init_T,
 95 |                          backend=backend)
 96 |         self.nnps_algorithm = NNPSCountingSortPeriodic \
 97 |             if use_count_sort else NNPSRadixSortPeriodic
 98 |         self.nnps = self.nnps_algorithm(self.x, self.y, self.z, 3., 0.01,
 99 |                                         self.xmax, self.ymax, self.zmax,
100 |                                         backend=self.backend)
101 |         self.init_forces = Elementwise(calculate_force, backend=self.backend)
102 |         self.step1 = Elementwise(step_method1, backend=self.backend)
103 |         self.step2 = Elementwise(step_method2, backend=self.backend)
104 | 
105 |     def solve(self, t, dt, log_output=False):
106 |         num_steps = int(t // dt)
107 |         self.nnps.build()
108 |         self.nnps.get_neighbors()
109 |         self.init_forces(self.x, self.y, self.z, self.xmax, self.ymax,
110 |                          self.zmax, self.fx, self.fy, self.fz,
111 |                          self.pe, self.nnps.nbr_starts,
112 |                          self.nnps.nbr_lengths, self.nnps.nbrs)
113 |         for i in range(num_steps):
114 |             self.step1(self.x, self.y, self.z, self.vx, self.vy, self.vz,
115 |                        self.fx, self.fy, self.fz,
116 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
117 |                        self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts,
118 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
119 |             self.nnps.build()
120 |             self.nnps.get_neighbors()
121 |             self.step2(self.x, self.y, self.z, self.vx, self.vy, self.vz,
122 |                        self.fx, self.fy, self.fz,
123 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
124 |                        self.zmin, self.zmax, self.m, dt, self.nnps.nbr_starts,
125 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
126 | 
127 |             if i % 100 == 0:
128 |                 self.post_step(i, log_output=log_output)
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     from compyle.utils import ArgumentParser
133 |     p = ArgumentParser()
134 |     p.add_argument(
135 |         '--use-count-sort', action='store_true', dest='use_count_sort',
136 |         default=False, help='Use count sort instead of radix sort'
137 |     )
138 |     p.add_argument(
139 |         '--show', action='store_true', dest='show',
140 |         default=False, help='Show plot'
141 |     )
142 |     p.add_argument(
143 |         '--log-output', action='store_true', dest='log_output',
144 |         default=False, help='Log output'
145 |     )
146 | 
147 |     p.add_argument('-n', action='store', type=int, dest='n',
148 |                    default=100, help='Number of particles')
149 | 
150 |     p.add_argument('--tf', action='store', type=float, dest='t',
151 |                    default=40., help='Final time')
152 | 
153 |     p.add_argument('--dt', action='store', type=float, dest='dt',
154 |                    default=0.02, help='Time step')
155 | 
156 |     o = p.parse_args()
157 | 
158 |     solver = MDNNPSSolverPeriodic(
159 |         o.n,
160 |         backend=o.backend,
161 |         use_count_sort=o.use_count_sort)
162 | 
163 |     start = time.time()
164 |     solver.solve(o.t, o.dt, o.log_output)
165 |     end = time.time()
166 |     print("Time taken for N = %i is %g secs" % (o.n, (end - start)))
167 |     if o.log_output:
168 |         solver.write_log('nnps_periodic.log')
169 |     if o.show:
170 |         solver.pull()
171 |         solver.plot()
172 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/nnps_kernels.py:
--------------------------------------------------------------------------------
  1 | from compyle.api import declare, annotate
  2 | from compyle.parallel import serial
  3 | from compyle.low_level import atomic_inc, cast
  4 | from math import floor
  5 | import numpy as np
  6 | 
  7 | 
  8 | @annotate
  9 | def find_cell_id(x, y, z, h, eps, c):
 10 |     c[0] = cast(floor((x + eps) / h), "int")
 11 |     c[1] = cast(floor((y + eps) / h), "int")
 12 |     c[2] = cast(floor((z + eps) / h), "int")
 13 | 
 14 | 
 15 | @annotate
 16 | def flatten(p, q, r, qmax, rmax):
 17 |     return (p * qmax + q) * rmax + r
 18 | 
 19 | 
 20 | @serial
 21 | @annotate
 22 | def count_bins(i, x, y, z, h, eps, qmax, rmax, keys, bin_counts,
 23 |                sort_offsets):
 24 |     c = declare('matrix(3, "int")')
 25 |     find_cell_id(x[i], y[i], z[i], h, eps, c)
 26 |     key = flatten(c[0], c[1], c[2], qmax, rmax)
 27 |     keys[i] = key
 28 |     idx = atomic_inc(bin_counts[key])
 29 |     sort_offsets[i] = idx
 30 | 
 31 | 
 32 | @annotate
 33 | def sort_indices(i, keys, sort_offsets, start_indices, sorted_indices):
 34 |     key = keys[i]
 35 |     offset = sort_offsets[i]
 36 |     start_idx = start_indices[key]
 37 |     sorted_indices[start_idx + offset] = i
 38 | 
 39 | 
 40 | @annotate
 41 | def input_start_indices(i, counts):
 42 |     return 0 if i == 0 else counts[i - 1]
 43 | 
 44 | 
 45 | @annotate
 46 | def output_start_indices(i, item, indices):
 47 |     indices[i] = item
 48 | 
 49 | 
 50 | @annotate
 51 | def fill_keys(i, x, y, z, h, eps, qmax, rmax, indices, keys):
 52 |     c = declare('matrix(3, "int")')
 53 |     find_cell_id(x[i], y[i], z[i], h, eps, c)
 54 |     key = flatten(c[0], c[1], c[2], qmax, rmax)
 55 |     keys[i] = key
 56 |     indices[i] = i
 57 | 
 58 | 
 59 | @annotate
 60 | def input_scan_keys(i, keys):
 61 |     return 1 if i == 0 or keys[i] != keys[i - 1] else 0
 62 | 
 63 | 
 64 | @annotate
 65 | def output_scan_keys(i, item, prev_item, keys, start_indices):
 66 |     key = keys[i]
 67 |     if item != prev_item:
 68 |         start_indices[key] = i
 69 | 
 70 | 
 71 | @annotate
 72 | def fill_bin_counts(i, keys, start_indices, bin_counts, num_particles):
 73 |     if i == num_particles - 1:
 74 |         last_key = keys[num_particles - 1]
 75 |         bin_counts[last_key] = num_particles - start_indices[last_key]
 76 |     if i == 0 or keys[i] == keys[i - 1]:
 77 |         return
 78 |     key = keys[i]
 79 |     prev_key = keys[i - 1]
 80 |     bin_counts[prev_key] = start_indices[key] - start_indices[prev_key]
 81 | 
 82 | 
 83 | @annotate
 84 | def find_neighbor_lengths_knl(i, x, y, z, h, eps, qmax, rmax, start_indices,
 85 |                               sorted_indices, bin_counts, nbr_lengths,
 86 |                               max_key):
 87 |     d = h * h
 88 |     q_c = declare('matrix(3, "int")')
 89 |     find_cell_id(x[i], y[i], z[i], h, eps, q_c)
 90 | 
 91 |     for p in range(-1, 2):
 92 |         for q in range(-1, 2):
 93 |             for r in range(-1, 2):
 94 |                 cx = q_c[0] + p
 95 |                 cy = q_c[1] + q
 96 |                 cz = q_c[2] + r
 97 | 
 98 |                 key = flatten(cx, cy, cz, qmax, rmax)
 99 | 
100 |                 if key >= max_key or key < 0:
101 |                     continue
102 | 
103 |                 start_idx = start_indices[key]
104 |                 np = bin_counts[key]
105 | 
106 |                 for k in range(np):
107 |                     j = sorted_indices[start_idx + k]
108 |                     xij = x[i] - x[j]
109 |                     yij = y[i] - y[j]
110 |                     zij = z[i] - z[j]
111 |                     rij2 = xij * xij + yij * yij + zij * zij
112 | 
113 |                     if rij2 < d:
114 |                         nbr_lengths[i] += 1
115 | 
116 | 
117 | @annotate
118 | def find_neighbors_knl(i, x, y, z, h, eps, qmax, rmax, start_indices, sorted_indices,
119 |                        bin_counts, nbr_starts, nbrs, max_key):
120 |     d = h * h
121 |     q_c = declare('matrix(3, "int")')
122 |     find_cell_id(x[i], y[i], z[i], h, eps, q_c)
123 |     length = 0
124 |     nbr_start_idx = nbr_starts[i]
125 | 
126 |     for p in range(-1, 2):
127 |         for q in range(-1, 2):
128 |             for r in range(-1, 2):
129 |                 cx = q_c[0] + p
130 |                 cy = q_c[1] + q
131 |                 cz = q_c[2] + r
132 | 
133 |                 key = flatten(cx, cy, cz, qmax, rmax)
134 | 
135 |                 if key >= max_key or key < 0:
136 |                     continue
137 | 
138 |                 start_idx = start_indices[key]
139 |                 np = bin_counts[key]
140 | 
141 |                 for k in range(np):
142 |                     j = sorted_indices[start_idx + k]
143 |                     xij = x[i] - x[j]
144 |                     yij = y[i] - y[j]
145 |                     zij = z[i] - z[j]
146 |                     rij2 = xij * xij + yij * yij + zij * zij
147 | 
148 |                     if rij2 < d:
149 |                         nbrs[nbr_start_idx + length] = j
150 |                         length += 1
151 | 
152 | 
153 | @annotate
154 | def find_neighbor_lengths_periodic_knl(i, x, y, z, h, eps, xmax, ymax, zmax,
155 |                                        pmax, qmax, rmax, start_indices,
156 |                                        sorted_indices, bin_counts, nbr_lengths,
157 |                                        max_key):
158 |     d = h * h
159 |     q_c = declare('matrix(3, "int")')
160 |     xij, yij, zij = declare('double', 3)
161 |     find_cell_id(x[i], y[i], z[i], h, eps, q_c)
162 | 
163 |     for p in range(-1, 2):
164 |         for q in range(-1, 2):
165 |             for r in range(-1, 2):
166 |                 cx = q_c[0] + p
167 |                 cy = q_c[1] + q
168 |                 cz = q_c[2] + r
169 | 
170 |                 cx_f = cast(cx, "float")
171 |                 cy_f = cast(cy, "float")
172 |                 cz_f = cast(cz, "float")
173 | 
174 |                 xoffset = cast(floor(cx_f / pmax), "int")
175 |                 yoffset = cast(floor(cy_f / qmax), "int")
176 |                 zoffset = cast(floor(cz_f / rmax), "int")
177 | 
178 |                 cx -= xoffset * pmax
179 |                 cy -= yoffset * qmax
180 |                 cz -= zoffset * rmax
181 | 
182 |                 key = flatten(cx, cy, cz, qmax, rmax)
183 | 
184 |                 if key >= max_key or key < 0:
185 |                     continue
186 | 
187 |                 start_idx = start_indices[key]
188 |                 np = bin_counts[key]
189 | 
190 |                 for k in range(np):
191 |                     j = sorted_indices[start_idx + k]
192 |                     xij = abs(x[i] - x[j])
193 |                     yij = abs(y[i] - y[j])
194 |                     zij = abs(z[i] - z[j])
195 |                     xij = min(xij, xmax - xij)
196 |                     yij = min(yij, ymax - yij)
197 |                     zij = min(zij, zmax - zij)
198 |                     rij2 = xij * xij + yij * yij + zij * zij
199 | 
200 |                     if rij2 < d:
201 |                         nbr_lengths[i] += 1
202 | 
203 | 
204 | @annotate
205 | def find_neighbors_periodic_knl(i, x, y, z, h, eps, xmax, ymax, zmax,
206 |                                 pmax, qmax, rmax, start_indices, sorted_indices,
207 |                                 bin_counts, nbr_starts, nbrs, max_key):
208 |     d = h * h
209 |     q_c = declare('matrix(3, "int")')
210 |     xij, yij, zij = declare('double', 3)
211 |     find_cell_id(x[i], y[i], z[i], h, eps, q_c)
212 |     length = 0
213 |     nbr_start_idx = nbr_starts[i]
214 | 
215 |     for p in range(-1, 2):
216 |         for q in range(-1, 2):
217 |             for r in range(-1, 2):
218 |                 cx = q_c[0] + p
219 |                 cy = q_c[1] + q
220 |                 cz = q_c[2] + r
221 | 
222 |                 cx_f = cast(cx, "float")
223 |                 cy_f = cast(cy, "float")
224 |                 cz_f = cast(cz, "float")
225 | 
226 |                 xoffset = cast(floor(cx_f / pmax), "int")
227 |                 yoffset = cast(floor(cy_f / qmax), "int")
228 |                 zoffset = cast(floor(cz_f / rmax), "int")
229 | 
230 |                 cx -= xoffset * pmax
231 |                 cy -= yoffset * qmax
232 |                 cz -= zoffset * rmax
233 | 
234 |                 key = flatten(cx, cy, cz, qmax, rmax)
235 | 
236 |                 if key >= max_key or key < 0:
237 |                     continue
238 | 
239 |                 start_idx = start_indices[key]
240 |                 np = bin_counts[key]
241 | 
242 |                 for k in range(np):
243 |                     j = sorted_indices[start_idx + k]
244 |                     xij = abs(x[i] - x[j])
245 |                     yij = abs(y[i] - y[j])
246 |                     zij = abs(z[i] - z[j])
247 |                     xij = min(xij, xmax - xij)
248 |                     yij = min(yij, ymax - yij)
249 |                     zij = min(zij, zmax - zij)
250 |                     rij2 = xij * xij + yij * yij + zij * zij
251 | 
252 |                     if rij2 < d:
253 |                         nbrs[nbr_start_idx + length] = j
254 |                         length += 1
255 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/3D/performance_comparison.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | from md_nnps_periodic import MDNNPSSolverPeriodic
 4 | 
 5 | from compyle.config import get_config
 6 | from hoomd_periodic import simulate
 7 | 
 8 | 
 9 | def solve(n, backend, tf=4., dt=0.02, use_count_sort=False):
10 |     if backend == 'hoomd':
11 |         return simulate(n, dt, tf)
12 |     else:
13 |         solver = MDNNPSSolverPeriodic(
14 |             n, dx=2., backend=backend, use_count_sort=use_count_sort)
15 |         start = time.time()
16 |         solver.solve(tf, dt)
17 |         end = time.time()
18 |         print("Time taken for backend = %s, N = %i is %g secs" %
19 |               (backend, n, (end - start)))
20 |         return end - start
21 | 
22 | 
23 | def compare(backends, n_list, niter=3, use_count_sort=False):
24 |     t_list = {b: [] for b in backends}
25 |     speedups = {b: [] for b in backends}
26 |     for backend in backends:
27 |         for n in n_list:
28 |             print("Running for N = %i" % n)
29 |             t = 1e9
30 |             for it in range(niter):
31 |                 t = min(t, solve(n, backend, use_count_sort=use_count_sort))
32 |             t_list[backend].append(t)
33 | 
34 |     if 'hoomd' in backends:
35 |         for backend in backends:
36 |             for i, n in enumerate(n_list):
37 |                 speedups[backend].append(
38 |                     t_list['hoomd'][i] / t_list[backend][i])
39 |     else:
40 |         speedups = None
41 | 
42 |     return speedups, t_list
43 | 
44 | 
45 | def plot(n_list, speedups, t_list, label):
46 |     backend_label_map = {'hoomd': 'HooMD',
47 |                          'opencl': 'OpenCL', 'cuda': 'CUDA'}
48 |     import matplotlib.pyplot as plt
49 |     plt.figure()
50 | 
51 |     if speedups:
52 |         for backend, arr in speedups.items():
53 |             if backend == "hoomd":
54 |                 continue
55 |             plt.semilogx(n_list, arr, 'x-', label=backend_label_map[backend])
56 | 
57 |         plt.xlabel("Number of particles")
58 |         plt.ylabel("Speedup")
59 |         plt.legend()
60 |         plt.grid(True)
61 |         plt.savefig("%s_speedup_%s.png" %
62 |                     (label, "_".join(speedups.keys())), dpi=300)
63 | 
64 |     plt.clf()
65 | 
66 |     for backend, arr in t_list.items():
67 |         plt.loglog(n_list, arr, 'x-', label=backend_label_map[backend])
68 | 
69 |     plt.xlabel("Number of particles")
70 |     plt.ylabel("Time (secs)")
71 |     plt.legend()
72 |     plt.grid(True)
73 |     plt.savefig("%s_time_%s.png" % (label, "_".join(t_list.keys())), dpi=300)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     from argparse import ArgumentParser
78 |     p = ArgumentParser()
79 | 
80 |     p.add_argument(
81 |         '--use-count-sort', action='store_true', dest='use_count_sort',
82 |         default=False, help='Use count sort instead of radix sort'
83 |     )
84 |     o = p.parse_args()
85 | 
86 |     n_list = [1000 * (2 ** i) for i in range(11)]
87 |     backends = ["cuda", "hoomd"]
88 |     print("Running for", n_list)
89 |     speedups, t_list = compare(backends, n_list,
90 |                                use_count_sort=o.use_count_sort)
91 |     plot(n_list, speedups, t_list, "hoomd")
92 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/README.rst:
--------------------------------------------------------------------------------
 1 | Molecular Dynamics Example
 2 | --------------------------
 3 | 
 4 | We have 3 implementations of a simple molecular dynamics simulation
 5 | of an N body problem in Lennard Jones potential. The first implementation
 6 | is a simple :math:`O(N^2)` implementation that can be found in
 7 | :code:`md_simple.py`. The second implementation is using nearest neighbor
 8 | searching to reduce the complexity to :math:`O(N)` and can be
 9 | found in :code:`md_nnps.py`.
10 | 
11 | We also have two different implementations of nearest neighbor search
12 | algorithms, one using a radix sort on GPU and numpy sort on CPU
13 | and the other using a native counting sort implementation. The counting
14 | sort version is about 30% faster. Both these implementations can be
15 | found in :code:`nnps.py`.
16 | 
17 | This example has been discussed at length in 
18 | `this <http://procbuild.scipy.org/download/prabhuramachandran-compyle>`_ 
19 | SciPy 2020 paper.
20 | Following commands can be used to reproduce the performance results
21 | shown in the paper.
22 | 
23 | +------------------+---------------------------------------------------------------+
24 | | Figure 2         | `python performance_comparison.py -c omp_comp --nnps simple`  |
25 | +------------------+---------------------------------------------------------------+
26 | | Figure 3         | `python performance_comparison.py -c gpu_comp --nnps simple`  |
27 | +------------------+---------------------------------------------------------------+
28 | | Figure 4 & 5     | `python performance_comparison.py -c gpu_comp`                |
29 | +------------------+---------------------------------------------------------------+
30 | | Figure 6 & 7     | `python performance_comparison.py -c comp_algo`               |
31 | +------------------+---------------------------------------------------------------+
32 | | Figure 8         | `cd 3D && python performance_comparison.py --use-count-sort`  |
33 | +------------------+---------------------------------------------------------------+
34 | 
35 | To generate energy plots for HooMD and Compyle implementations, run the script
36 | :code:`3D/compare_results.py`
37 | 
38 | Users can use the google colab notebook 
39 | `here <https://colab.research.google.com/drive/1SGRiArYXV1LEkZtUeg9j0qQ21MDqQR2U?usp=sharing>`_
40 | to play around with the example.
41 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/md_nnps.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import pi
  3 | import time
  4 | 
  5 | from compyle.config import get_config
  6 | from compyle.api import declare, annotate
  7 | from compyle.parallel import Elementwise, Reduction
  8 | from compyle.array import get_backend, wrap
  9 | import compyle.array as carr
 10 | 
 11 | from nnps import NNPSCountingSort, NNPSRadixSort
 12 | from md_simple import integrate_step1, integrate_step2, \
 13 |     boundary_condition, MDSolverBase
 14 | 
 15 | 
 16 | @annotate
 17 | def calculate_force(i, x, y, fx, fy, pe, nbr_starts, nbr_lengths, nbrs):
 18 |     start_idx = nbr_starts[i]
 19 |     length = nbr_lengths[i]
 20 |     for k in range(start_idx, start_idx + length):
 21 |         j = nbrs[k]
 22 |         if i == j:
 23 |             continue
 24 |         xij = x[i] - x[j]
 25 |         yij = y[i] - y[j]
 26 |         rij2 = xij * xij + yij * yij
 27 |         irij2 = 1.0 / rij2
 28 |         irij6 = irij2 * irij2 * irij2
 29 |         irij12 = irij6 * irij6
 30 |         pe[i] += (2 * (irij12 - irij6))
 31 |         f_base = 24 * irij2 * (2 * irij12 - irij6)
 32 | 
 33 |         fx[i] += f_base * xij
 34 |         fy[i] += f_base * yij
 35 | 
 36 | 
 37 | @annotate
 38 | def step_method1(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 39 |                  ymin, ymax, m, dt, nbr_starts, nbr_lengths,
 40 |                  nbrs):
 41 |     integrate_step1(i, m, dt, x, y, vx, vy, fx, fy)
 42 |     boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 43 |                        ymin, ymax)
 44 | 
 45 | 
 46 | @annotate
 47 | def step_method2(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 48 |                  ymin, ymax, m, dt, nbr_starts, nbr_lengths,
 49 |                  nbrs):
 50 |     calculate_force(i, x, y, fx, fy, pe, nbr_starts, nbr_lengths, nbrs)
 51 |     integrate_step2(i, m, dt, x, y, vx, vy, fx, fy)
 52 | 
 53 | 
 54 | class MDNNPSSolver(MDSolverBase):
 55 |     def __init__(self, num_particles, x=None, y=None, vx=None, vy=None,
 56 |                  xmax=100., ymax=100., dx=1.5, init_T=0.,
 57 |                  backend=None, use_count_sort=False):
 58 |         super().__init__(num_particles, x=x, y=y, vx=vx, vy=vy,
 59 |                          xmax=xmax, ymax=ymax, dx=dx, init_T=init_T,
 60 |                          backend=backend)
 61 |         self.init_forces = Elementwise(calculate_force, backend=self.backend)
 62 |         self.step1 = Elementwise(step_method1, backend=self.backend)
 63 |         self.step2 = Elementwise(step_method2, backend=self.backend)
 64 |         self.nnps_algorithm = NNPSCountingSort \
 65 |             if use_count_sort else NNPSRadixSort
 66 |         self.nnps = self.nnps_algorithm(self.x, self.y, 3., self.xmax,
 67 |                                         self.ymax, backend=self.backend)
 68 | 
 69 |     def solve(self, t, dt):
 70 |         num_steps = int(t // dt)
 71 |         self.nnps.build()
 72 |         self.nnps.get_neighbors()
 73 |         self.init_forces(self.x, self.y, self.fx, self.fy,
 74 |                          self.pe, self.nnps.nbr_starts,
 75 |                          self.nnps.nbr_lengths, self.nnps.nbrs)
 76 |         for i in range(num_steps):
 77 |             self.step1(self.x, self.y, self.vx, self.vy, self.fx,
 78 |                        self.fy, self.pe, self.xmin, self.xmax, self.ymin,
 79 |                        self.ymax, self.m, dt, self.nnps.nbr_starts,
 80 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
 81 |             self.nnps.build()
 82 |             self.nnps.get_neighbors()
 83 |             self.step2(self.x, self.y, self.vx, self.vy, self.fx,
 84 |                        self.fy, self.pe, self.xmin, self.xmax, self.ymin,
 85 |                        self.ymax, self.m, dt, self.nnps.nbr_starts,
 86 |                        self.nnps.nbr_lengths, self.nnps.nbrs)
 87 |             if i % 100 == 0:
 88 |                 self.post_step(i)
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     from compyle.utils import ArgumentParser
 93 |     p = ArgumentParser()
 94 |     p.add_argument(
 95 |         '--use-count-sort', action='store_true', dest='use_count_sort',
 96 |         default=False, help='Use count sort instead of radix sort'
 97 |     )
 98 |     p.add_argument(
 99 |         '--show', action='store_true', dest='show',
100 |         default=False, help='Show plot at end of simulation'
101 |     )
102 | 
103 |     p.add_argument('-n', action='store', type=int, dest='n',
104 |                    default=100, help='Number of particles')
105 | 
106 |     p.add_argument('--tf', action='store', type=float, dest='t',
107 |                    default=40., help='Final time')
108 | 
109 |     p.add_argument('--dt', action='store', type=float, dest='dt',
110 |                    default=0.02, help='Time step')
111 | 
112 |     o = p.parse_args()
113 | 
114 |     solver = MDNNPSSolver(
115 |         o.n,
116 |         backend=o.backend,
117 |         use_count_sort=o.use_count_sort)
118 | 
119 |     start = time.time()
120 |     solver.solve(o.t, o.dt)
121 |     end = time.time()
122 |     print("Time taken for N = %i is %g secs" % (o.n, (end - start)))
123 |     if o.show:
124 |         solver.pull()
125 |         solver.plot()
126 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/md_simple.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import pi
  3 | import time
  4 | 
  5 | from compyle.config import get_config
  6 | from compyle.api import declare, annotate
  7 | from compyle.parallel import Elementwise, Reduction
  8 | from compyle.array import get_backend, wrap
  9 | 
 10 | import compyle.array as carr
 11 | 
 12 | 
 13 | @annotate
 14 | def calculate_energy(i, vx, vy, pe, num_particles):
 15 |     ke = 0.5 * (vx[i] * vx[i] + vy[i] * vy[i])
 16 |     return pe[i] + ke
 17 | 
 18 | 
 19 | @annotate
 20 | def calculate_force(i, x, y, fx, fy, pe, num_particles):
 21 |     force_cutoff = 3.
 22 |     force_cutoff2 = force_cutoff * force_cutoff
 23 |     for j in range(num_particles):
 24 |         if i == j:
 25 |             continue
 26 |         xij = x[i] - x[j]
 27 |         yij = y[i] - y[j]
 28 |         rij2 = xij * xij + yij * yij
 29 |         if rij2 > force_cutoff2:
 30 |             continue
 31 |         irij2 = 1.0 / rij2
 32 |         irij6 = irij2 * irij2 * irij2
 33 |         irij12 = irij6 * irij6
 34 |         pe[i] += (2 * (irij12 - irij6))
 35 |         f_base = 24 * irij2 * (2 * irij12 - irij6)
 36 | 
 37 |         fx[i] += f_base * xij
 38 |         fy[i] += f_base * yij
 39 | 
 40 | 
 41 | @annotate
 42 | def step_method1(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 43 |                  ymin, ymax, m, dt, num_particles):
 44 |     integrate_step1(i, m, dt, x, y, vx, vy, fx, fy)
 45 |     boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 46 |                        ymin, ymax)
 47 | 
 48 | 
 49 | @annotate
 50 | def step_method2(i, x, y, vx, vy, fx, fy, pe, xmin, xmax,
 51 |                  ymin, ymax, m, dt, num_particles):
 52 |     calculate_force(i, x, y, fx, fy, pe, num_particles)
 53 |     integrate_step2(i, m, dt, x, y, vx, vy, fx, fy)
 54 | 
 55 | 
 56 | @annotate
 57 | def integrate_step1(i, m, dt, x, y, vx, vy, fx, fy):
 58 |     axi = fx[i]
 59 |     ayi = fy[i]
 60 |     x[i] += vx[i] * dt + 0.5 * axi * dt * dt
 61 |     y[i] += vy[i] * dt + 0.5 * ayi * dt * dt
 62 |     vx[i] += 0.5 * axi * dt
 63 |     vy[i] += 0.5 * ayi * dt
 64 | 
 65 | 
 66 | @annotate
 67 | def integrate_step2(i, m, dt, x, y, vx, vy, fx, fy):
 68 |     axi = fx[i]
 69 |     ayi = fy[i]
 70 |     vx[i] += 0.5 * axi * dt
 71 |     vy[i] += 0.5 * ayi * dt
 72 | 
 73 | 
 74 | @annotate
 75 | def boundary_condition(i, x, y, vx, vy, fx, fy, pe, xmin, xmax, ymin, ymax):
 76 |     xwidth = xmax - xmin
 77 |     ywidth = ymax - ymin
 78 |     stiffness = 50.
 79 |     pe[i] = 0.
 80 |     if x[i] < 0.5:
 81 |         fx[i] = stiffness * (0.5 - x[i])
 82 |         pe[i] += 0.5 * stiffness * (0.5 - x[i]) * (0.5 - x[i])
 83 |     elif x[i] > xwidth - 0.5:
 84 |         fx[i] = stiffness * (xwidth - 0.5 - x[i])
 85 |         pe[i] += 0.5 * stiffness * (xwidth - 0.5 - x[i]) * (xwidth - 0.5 - x[i])
 86 |     else:
 87 |         fx[i] = 0.
 88 | 
 89 |     if y[i] < 0.5:
 90 |         fy[i] = stiffness * (0.5 - y[i])
 91 |         pe[i] += 0.5 * stiffness * (0.5 - y[i]) * (0.5 - y[i])
 92 |     elif y[i] > ywidth - 0.5:
 93 |         fy[i] = stiffness * (ywidth - 0.5 - y[i])
 94 |         pe[i] += 0.5 * stiffness * (ywidth - 0.5 - y[i]) * (ywidth - 0.5 - y[i])
 95 |     else:
 96 |         fy[i] = 0.
 97 | 
 98 | 
 99 | class MDSolverBase(object):
100 |     def __init__(self, num_particles, x=None, y=None, vx=None, vy=None,
101 |                  xmax=100., ymax=100., dx=1.5, init_T=0.,
102 |                  backend=None):
103 |         self.backend = get_backend(backend)
104 |         self.num_particles = num_particles
105 |         self.xmin, self.xmax = 0., xmax
106 |         self.ymin, self.ymax = 0., ymax
107 |         self.m = 1.
108 |         if x is None and y is None:
109 |             self.x, self.y = self.setup_positions(num_particles, dx)
110 |         if vx is None and vy is None:
111 |             self.vx, self.vy = self.setup_velocities(init_T, num_particles)
112 |         self.fx = carr.zeros_like(self.x, backend=self.backend)
113 |         self.fy = carr.zeros_like(self.x, backend=self.backend)
114 |         self.pe = carr.zeros_like(self.x, backend=self.backend)
115 |         self.energy_calc = Reduction("a+b", map_func=calculate_energy,
116 |                                      backend=self.backend)
117 | 
118 |     def setup_velocities(self, T, num_particles):
119 |         np.random.seed(123)
120 |         vx = np.random.uniform(0, 1., size=num_particles).astype(np.float64)
121 |         vy = np.random.uniform(0, 1., size=num_particles).astype(np.float64)
122 |         T_current = np.average(vx ** 2 + vy ** 2)
123 |         scaling_factor = (T / T_current) ** 0.5
124 |         vx = vx * scaling_factor
125 |         vy = vy * scaling_factor
126 |         return wrap(vx, vy, backend=self.backend)
127 | 
128 |     def setup_positions(self, num_particles, dx):
129 |         ndim = np.ceil(num_particles ** 0.5)
130 |         dim_length = ndim * dx
131 | 
132 |         self.xmax = dim_length * 3
133 |         self.ymax = dim_length * 3
134 | 
135 |         xmin_eff = ((self.xmax - self.xmin) - dim_length) / 2.
136 |         xmax_eff = ((self.xmax - self.xmin) + dim_length) / 2.
137 | 
138 |         x, y = np.mgrid[xmin_eff:xmax_eff:dx, xmin_eff:xmax_eff:dx]
139 |         x = x.ravel().astype(np.float64)[:num_particles]
140 |         y = y.ravel().astype(np.float64)[:num_particles]
141 |         return wrap(x, y, backend=self.backend)
142 | 
143 |     def post_step(self, step):
144 |         energy = self.energy_calc(self.vx, self.vy, self.pe,
145 |                                   self.num_particles)
146 |         print("Energy at time step =", step, "is", energy)
147 | 
148 |     def pull(self):
149 |         self.x.pull()
150 |         self.y.pull()
151 | 
152 |     def plot(self):
153 |         import matplotlib.pyplot as plt
154 |         plt.xlim(self.xmin, self.xmax)
155 |         plt.ylim(self.ymin, self.ymax)
156 |         plt.scatter(self.x.data, self.y.data, 4.2)
157 |         plt.show()
158 | 
159 | 
160 | class MDSolver(MDSolverBase):
161 |     def __init__(self, num_particles, x=None, y=None, vx=None, vy=None,
162 |                  xmax=100., ymax=100., dx=1.5, init_T=0.,
163 |                  backend=None):
164 |         super().__init__(num_particles, x=x, y=y, vx=vx, vy=vy,
165 |                          xmax=xmax, ymax=ymax, dx=dx, init_T=init_T,
166 |                          backend=backend)
167 |         self.init_forces = Elementwise(calculate_force, backend=self.backend)
168 |         self.step1 = Elementwise(step_method1, backend=self.backend)
169 |         self.step2 = Elementwise(step_method2, backend=self.backend)
170 | 
171 |     def solve(self, t, dt):
172 |         num_steps = int(t // dt)
173 |         self.init_forces(self.x, self.y, self.fx, self.fy, self.pe,
174 |                          self.num_particles)
175 |         for i in range(num_steps):
176 |             self.step1(self.x, self.y, self.vx, self.vy, self.fx, self.fy,
177 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
178 |                        self.m, dt, self.num_particles)
179 |             self.step2(self.x, self.y, self.vx, self.vy, self.fx, self.fy,
180 |                        self.pe, self.xmin, self.xmax, self.ymin, self.ymax,
181 |                        self.m, dt, self.num_particles)
182 |             if i % 100 == 0:
183 |                 self.post_step(i)
184 | 
185 | 
186 | if __name__ == '__main__':
187 |     from compyle.utils import ArgumentParser
188 |     p = ArgumentParser()
189 |     p.add_argument(
190 |         '--show', action='store_true', dest='show',
191 |         default=False, help='Show plot at end of simulation'
192 |     )
193 | 
194 |     p.add_argument('-n', action='store', type=int, dest='n',
195 |                    default=100, help='Number of particles')
196 | 
197 |     p.add_argument('--tf', action='store', type=float, dest='t',
198 |                    default=40., help='Final time')
199 | 
200 |     p.add_argument('--dt', action='store', type=float, dest='dt',
201 |                    default=0.02, help='Time step')
202 | 
203 |     o = p.parse_args()
204 | 
205 |     solver = MDSolver(o.n, backend=o.backend)
206 | 
207 |     start = time.time()
208 |     solver.solve(o.t, o.dt)
209 |     end = time.time()
210 |     print("Time taken for N = %i is %g secs" % (o.n, (end - start)))
211 |     if o.show:
212 |         solver.pull()
213 |         solver.plot()
214 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/nnps.py:
--------------------------------------------------------------------------------
  1 | from nnps_kernels import *
  2 | from compyle.config import get_config
  3 | from compyle.api import declare, annotate
  4 | from compyle.parallel import serial, Elementwise, Reduction, Scan
  5 | from compyle.array import get_backend, wrap
  6 | from compyle.low_level import atomic_inc, cast
  7 | from math import floor
  8 | from time import time
  9 | 
 10 | import numpy as np
 11 | import compyle.array as carr
 12 | 
 13 | 
 14 | class NNPS(object):
 15 |     def __init__(self, x, y, h, xmax, ymax, backend=None):
 16 |         self.backend = backend
 17 |         self.num_particles = x.length
 18 |         self.x, self.y = x, y
 19 |         self.h = h
 20 | 
 21 |         cmax = np.array([floor(xmax / h), floor(ymax / h)], dtype=np.int32)
 22 |         self.max_key = 1 + flatten(cmax[0], cmax[1], 1 + cmax[1])
 23 |         self.qmax = 1 + cmax[1]
 24 | 
 25 |         # neighbor kernels
 26 |         self.find_neighbor_lengths = Elementwise(find_neighbor_lengths_knl,
 27 |                                                  backend=self.backend)
 28 |         self.find_neighbors = Elementwise(find_neighbors_knl,
 29 |                                           backend=self.backend)
 30 |         self.scan_start_indices = Scan(input=input_start_indices,
 31 |                                        output=output_start_indices,
 32 |                                        scan_expr="a+b", dtype=np.int32,
 33 |                                        backend=self.backend)
 34 |         self.init_arrays()
 35 | 
 36 |     def init_arrays(self):
 37 |         # sort arrays
 38 |         self.bin_counts = carr.zeros(self.max_key, dtype=np.int32,
 39 |                                      backend=self.backend)
 40 |         self.start_indices = carr.zeros(self.max_key, dtype=np.int32,
 41 |                                         backend=self.backend)
 42 |         self.keys = carr.zeros(self.num_particles, dtype=np.int32,
 43 |                                backend=self.backend)
 44 |         self.sorted_indices = carr.zeros(self.num_particles, dtype=np.int32,
 45 |                                          backend=self.backend)
 46 | 
 47 |         # neighbor arrays
 48 |         self.nbr_lengths = carr.zeros(self.num_particles, dtype=np.int32,
 49 |                                       backend=self.backend)
 50 |         self.nbr_starts = carr.zeros(self.num_particles, dtype=np.int32,
 51 |                                      backend=self.backend)
 52 |         self.nbrs = carr.zeros(2 * self.num_particles, dtype=np.int32,
 53 |                                backend=self.backend)
 54 | 
 55 |     def reset_arrays(self):
 56 |         # sort arrays
 57 |         self.bin_counts.fill(0)
 58 |         self.start_indices.fill(0)
 59 |         self.sorted_indices.fill(0)
 60 | 
 61 |         # neighbors array
 62 |         self.nbr_lengths.fill(0)
 63 |         self.nbr_starts.fill(0)
 64 | 
 65 |     def get_neighbors(self):
 66 |         self.find_neighbor_lengths(self.x, self.y, self.h, self.qmax,
 67 |                                    self.start_indices, self.sorted_indices,
 68 |                                    self.bin_counts, self.nbr_lengths,
 69 |                                    self.max_key)
 70 |         self.scan_start_indices(counts=self.nbr_lengths,
 71 |                                 indices=self.nbr_starts)
 72 |         self.total_neighbors = int(self.nbr_lengths[-1] + self.nbr_starts[-1])
 73 |         self.nbrs.resize(self.total_neighbors)
 74 |         self.find_neighbors(self.x, self.y, self.h, self.qmax,
 75 |                             self.start_indices, self.sorted_indices,
 76 |                             self.bin_counts, self.nbr_starts,
 77 |                             self.nbrs, self.max_key)
 78 | 
 79 | 
 80 | class NNPSCountingSort(NNPS):
 81 |     def __init__(self, x, y, h, xmax, ymax, backend=None):
 82 |         super().__init__(x, y, h, xmax, ymax, backend=backend)
 83 |         # sort kernels
 84 |         self.count_bins = Elementwise(count_bins, backend=self.backend)
 85 |         self.sort_indices = Elementwise(sort_indices, backend=self.backend)
 86 | 
 87 |     def init_arrays(self):
 88 |         super().init_arrays()
 89 |         self.sort_offsets = carr.zeros(self.num_particles, dtype=np.int32,
 90 |                                        backend=self.backend)
 91 | 
 92 |     def reset_arrays(self):
 93 |         super().reset_arrays()
 94 |         # sort arrays
 95 |         self.sort_offsets.fill(0)
 96 | 
 97 |     def build(self):
 98 |         self.reset_arrays()
 99 |         self.count_bins(self.x, self.y, self.h, self.qmax, self.keys,
100 |                         self.bin_counts, self.sort_offsets)
101 |         self.scan_start_indices(counts=self.bin_counts,
102 |                                 indices=self.start_indices)
103 |         self.sort_indices(self.keys, self.sort_offsets, self.start_indices,
104 |                           self.sorted_indices)
105 | 
106 | 
107 | class NNPSRadixSort(NNPS):
108 |     def __init__(self, x, y, h, xmax, ymax, backend=None):
109 |         super().__init__(x, y, h, xmax, ymax, backend=backend)
110 |         self.max_bits = np.ceil(np.log2(self.max_key))
111 | 
112 |         # sort kernels
113 |         self.fill_keys = Elementwise(fill_keys, backend=self.backend)
114 |         self.fill_bin_counts = Elementwise(fill_bin_counts,
115 |                                            backend=self.backend)
116 |         self.scan_keys = Scan(input=input_scan_keys,
117 |                               output=output_scan_keys,
118 |                               scan_expr="a+b", dtype=np.int32,
119 |                               backend=self.backend)
120 | 
121 |     def init_arrays(self):
122 |         super().init_arrays()
123 |         # sort arrays
124 |         self.sorted_keys = carr.zeros(self.num_particles, dtype=np.int32,
125 |                                       backend=self.backend)
126 |         self.indices = carr.zeros(self.num_particles, dtype=np.int32,
127 |                                   backend=self.backend)
128 | 
129 |     def reset_arrays(self):
130 |         super().reset_arrays()
131 |         self.sorted_keys.fill(0)
132 | 
133 |     def build(self):
134 |         self.reset_arrays()
135 |         self.fill_keys(self.x, self.y, self.h, self.qmax, self.indices,
136 |                        self.keys)
137 |         self.sorted_keys, self.sorted_indices = carr.sort_by_keys(
138 |             [self.keys, self.indices],
139 |             key_bits=self.max_bits, backend=self.backend)
140 |         self.scan_keys(keys=self.sorted_keys,
141 |                        start_indices=self.start_indices)
142 |         self.fill_bin_counts(self.sorted_keys, self.start_indices,
143 |                              self.bin_counts, self.num_particles)
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     import sys
148 |     backend = sys.argv[1] if len(sys.argv) > 1 else 'cython'
149 |     np.random.seed(123)
150 |     num_particles = 20
151 |     x = np.random.uniform(0, 10., size=num_particles).astype(np.float32)
152 |     y = np.random.uniform(0, 10., size=num_particles).astype(np.float32)
153 |     x, y = wrap(x, y, backend=backend)
154 |     nnps = NNPSRadixSort(x, y, 3., 10., 10., backend=backend)
155 |     nnps.build()
156 |     nnps.get_neighbors()
157 |     print(nnps.start_indices)
158 |     print(nnps.bin_counts)
159 |     print(nnps.nbr_lengths)
160 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/nnps_kernels.py:
--------------------------------------------------------------------------------
  1 | from compyle.api import declare, annotate
  2 | from compyle.parallel import serial
  3 | from compyle.low_level import atomic_inc, cast
  4 | from math import floor
  5 | import numpy as np
  6 | 
  7 | 
  8 | @annotate
  9 | def find_cell_id(x, y, h, c):
 10 |     c[0] = cast(floor((x) / h), "int")
 11 |     c[1] = cast(floor((y) / h), "int")
 12 | 
 13 | 
 14 | @annotate
 15 | def flatten(p, q, qmax):
 16 |     return p * qmax + q
 17 | 
 18 | 
 19 | @serial
 20 | @annotate
 21 | def count_bins(i, x, y, h, cmax, keys, bin_counts,
 22 |                sort_offsets):
 23 |     c = declare('matrix(2, "int")')
 24 |     find_cell_id(x[i], y[i], h, c)
 25 |     key = flatten(c[0], c[1], cmax)
 26 |     keys[i] = key
 27 |     idx = atomic_inc(bin_counts[key])
 28 |     sort_offsets[i] = idx
 29 | 
 30 | 
 31 | @annotate
 32 | def sort_indices(i, keys, sort_offsets, start_indices, sorted_indices):
 33 |     key = keys[i]
 34 |     offset = sort_offsets[i]
 35 |     start_idx = start_indices[key]
 36 |     sorted_indices[start_idx + offset] = i
 37 | 
 38 | 
 39 | @annotate
 40 | def input_start_indices(i, counts):
 41 |     return 0 if i == 0 else counts[i - 1]
 42 | 
 43 | 
 44 | @annotate
 45 | def output_start_indices(i, item, indices):
 46 |     indices[i] = item
 47 | 
 48 | 
 49 | @annotate
 50 | def fill_keys(i, x, y, h, cmax, indices, keys):
 51 |     c = declare('matrix(2, "int")')
 52 |     find_cell_id(x[i], y[i], h, c)
 53 |     key = flatten(c[0], c[1], cmax)
 54 |     keys[i] = key
 55 |     indices[i] = i
 56 | 
 57 | 
 58 | @annotate
 59 | def input_scan_keys(i, keys):
 60 |     return 1 if i == 0 or keys[i] != keys[i - 1] else 0
 61 | 
 62 | 
 63 | @annotate
 64 | def output_scan_keys(i, item, prev_item, keys, start_indices):
 65 |     key = keys[i]
 66 |     if item != prev_item:
 67 |         start_indices[key] = i
 68 | 
 69 | 
 70 | @annotate
 71 | def fill_bin_counts(i, keys, start_indices, bin_counts, num_particles):
 72 |     if i == num_particles - 1:
 73 |         last_key = keys[num_particles - 1]
 74 |         bin_counts[last_key] = num_particles - start_indices[last_key]
 75 |     if i == 0 or keys[i] == keys[i - 1]:
 76 |         return
 77 |     key = keys[i]
 78 |     prev_key = keys[i - 1]
 79 |     bin_counts[prev_key] = start_indices[key] - start_indices[prev_key]
 80 | 
 81 | 
 82 | @annotate
 83 | def find_neighbor_lengths_knl(i, x, y, h, cmax, start_indices, sorted_indices,
 84 |                               bin_counts, nbr_lengths, max_key):
 85 |     d = h * h
 86 |     q_c = declare('matrix(2, "int")')
 87 |     find_cell_id(x[i], y[i], h, q_c)
 88 | 
 89 |     for p in range(-1, 2):
 90 |         for q in range(-1, 2):
 91 |             cx = q_c[0] + p
 92 |             cy = q_c[1] + q
 93 | 
 94 |             key = flatten(cx, cy, cmax)
 95 | 
 96 |             if key >= max_key or key < 0:
 97 |                 continue
 98 | 
 99 |             start_idx = start_indices[key]
100 |             np = bin_counts[key]
101 | 
102 |             for k in range(np):
103 |                 j = sorted_indices[start_idx + k]
104 |                 xij = x[i] - x[j]
105 |                 yij = y[i] - y[j]
106 |                 rij2 = xij * xij + yij * yij
107 | 
108 |                 if rij2 < d:
109 |                     nbr_lengths[i] += 1
110 | 
111 | 
112 | @annotate
113 | def find_neighbors_knl(i, x, y, h, cmax, start_indices, sorted_indices,
114 |                        bin_counts, nbr_starts, nbrs, max_key):
115 |     d = h * h
116 |     q_c = declare('matrix(2, "int")')
117 |     find_cell_id(x[i], y[i], h, q_c)
118 |     length = 0
119 |     nbr_start_idx = nbr_starts[i]
120 | 
121 |     for p in range(-1, 2):
122 |         for q in range(-1, 2):
123 |             cx = q_c[0] + p
124 |             cy = q_c[1] + q
125 | 
126 |             key = flatten(cx, cy, cmax)
127 | 
128 |             if key >= max_key or key < 0:
129 |                 continue
130 | 
131 |             start_idx = start_indices[key]
132 |             np = bin_counts[key]
133 | 
134 |             for k in range(np):
135 |                 j = sorted_indices[start_idx + k]
136 |                 xij = x[i] - x[j]
137 |                 yij = y[i] - y[j]
138 |                 rij2 = xij * xij + yij * yij
139 | 
140 |                 if rij2 < d:
141 |                     nbrs[nbr_start_idx + length] = j
142 |                     length += 1
143 | 


--------------------------------------------------------------------------------
/examples/molecular_dynamics/performance_comparison.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time
  3 | import md_simple
  4 | import md_nnps
  5 | 
  6 | from compyle.config import get_config
  7 | 
  8 | 
  9 | def solve(n, backend, solver_algo, tf=0.5, dt=0.02, use_count_sort=False):
 10 |     solver = solver_algo(n, backend=backend.replace("_omp", ""))
 11 |     start = time.time()
 12 |     solver.solve(tf, dt)
 13 |     end = time.time()
 14 |     print("Time taken for backend = %s, N = %i is %g secs" %
 15 |           (backend, n, (end - start)))
 16 |     return end - start
 17 | 
 18 | 
 19 | def compare(backends, n_list, solver_algo, niter=3):
 20 |     t_list = {b: [] for b in backends}
 21 |     speedups = {b: [] for b in backends}
 22 |     for n in n_list:
 23 |         print("Running for N = %i" % n)
 24 |         for backend in backends:
 25 |             if "omp" in backend:
 26 |                 get_config().use_openmp = True
 27 |             t = 1e9
 28 |             for it in range(niter):
 29 |                 t = min(t, solve(n, backend, solver_algo))
 30 |             t_list[backend].append(t)
 31 |             if "omp" in backend:
 32 |                 get_config().use_openmp = False
 33 | 
 34 |     if 'cython' in backends:
 35 |         for backend in backends:
 36 |             for i, n in enumerate(n_list):
 37 |                 speedups[backend].append(
 38 |                     t_list["cython"][i] / t_list[backend][i])
 39 |     else:
 40 |         speedups = None
 41 | 
 42 |     return speedups, t_list
 43 | 
 44 | 
 45 | def compare_implementations(backend, n_list, niter=3):
 46 |     import matplotlib.pyplot as plt
 47 |     sp, nnps_tlist = compare([backend], n_list,
 48 |                              md_nnps.MDSolver, niter=niter)
 49 |     sp, simple_tlist = compare([backend], n_list,
 50 |                                md_simple.MDSolver, niter=niter)
 51 | 
 52 |     speedup = [simple_tlist[backend][i] / nnps_tlist[backend][i]
 53 |                for i in range(len(n_list))]
 54 | 
 55 |     plt.loglog(n_list, nnps_tlist[backend], 'x-', label="Linear")
 56 |     plt.loglog(n_list, simple_tlist[backend], 'x-', label="Simple")
 57 | 
 58 |     plt.xlabel("Number of particles")
 59 |     plt.ylabel("Time (secs)")
 60 |     plt.legend()
 61 |     plt.grid(True)
 62 |     plt.savefig("time_comp_impl.png", dpi=300)
 63 | 
 64 |     plt.clf()
 65 | 
 66 |     plt.loglog(n_list, speedup, 'x-')
 67 | 
 68 |     plt.xlabel("Number of particles")
 69 |     plt.ylabel("Speedup")
 70 |     plt.grid(True)
 71 |     plt.savefig("speedup_comp_impl.png", dpi=300)
 72 | 
 73 | 
 74 | def plot(n_list, speedups, t_list, label):
 75 |     backend_label_map = {'cython': 'Cython', 'cython_omp': 'OpenMP',
 76 |                          'opencl': 'OpenCL', 'cuda': 'CUDA'}
 77 |     import matplotlib.pyplot as plt
 78 |     plt.figure()
 79 | 
 80 |     if speedups:
 81 |         for backend, arr in speedups.items():
 82 |             if backend == "cython":
 83 |                 continue
 84 |             plt.semilogx(n_list, arr, 'x-', label=backend_label_map[backend])
 85 | 
 86 |         plt.xlabel("Number of particles")
 87 |         plt.ylabel("Speedup")
 88 |         plt.legend()
 89 |         plt.grid(True)
 90 |         plt.savefig("%s_speedup_%s.png" %
 91 |                     (label, "_".join(speedups.keys())), dpi=300)
 92 | 
 93 |     plt.clf()
 94 | 
 95 |     for backend, arr in t_list.items():
 96 |         plt.loglog(n_list, arr, 'x-', label=backend_label_map[backend])
 97 | 
 98 |     plt.xlabel("Number of particles")
 99 |     plt.ylabel("Time (secs)")
100 |     plt.legend()
101 |     plt.grid(True)
102 |     plt.savefig("%s_time_%s.png" % (label, "_".join(t_list.keys())), dpi=300)
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     from argparse import ArgumentParser
107 |     p = ArgumentParser()
108 |     p.add_argument(
109 |         '-c', '--comparison', action='store', dest='comp', default='gpu_comp',
110 |         choices=['gpu_comp', 'omp_comp', 'comp_algo'],
111 |         help='Choose the comparison.'
112 |     )
113 |     p.add_argument(
114 |         '--nnps', action='store', dest='nnps', default='linear',
115 |         choices=['linear', 'simple'],
116 |         help='Choose algorithm.'
117 |     )
118 |     p.add_argument(
119 |         '--use-double', action='store_true', dest='use_double',
120 |         default=False,  help='Use double precision on the GPU.'
121 |     )
122 | 
123 |     o = p.parse_args()
124 |     get_config().use_double = o.use_double
125 |     solver_algo = (md_nnps.MDNNPSSolver if o.nnps == 'linear'
126 |                    else md_simple.MDSolver)
127 |     n_list = [10000 * (2 ** i) for i in range(10)] if o.nnps == 'linear' else \
128 |         [500 * (2 ** i) for i in range(8)]
129 | 
130 |     if o.comp == "gpu_comp":
131 |         backends = ["opencl", "cuda", "cython"]
132 |         print("Running for", n_list)
133 |         speedups, t_list = compare(backends, n_list, solver_algo)
134 |         plot(n_list, speedups, t_list, o.nnps)
135 |     elif o.comp == "omp_comp":
136 |         backends = ["cython_omp", "cython"]
137 |         print("Running for", n_list)
138 |         speedups, t_list = compare(backends, n_list, solver_algo)
139 |         plot(n_list, speedups, t_list, o.nnps)
140 |     elif o.comp == "comp_algo":
141 |         backend = "cython"
142 |         n_list = [500, 1000, 2000, 4000, 8000, 16000, 32000]
143 |         print("Running for", n_list)
144 |         compare_implementations(backend, n_list)
145 | 


--------------------------------------------------------------------------------
/examples/vm_elementwise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from math import pi
 3 | import time
 4 | 
 5 | from compyle.api import declare, annotate
 6 | from compyle.parallel import Elementwise
 7 | from compyle.array import wrap
 8 | 
 9 | 
10 | @annotate(double='xi, yi, xj, yj, gamma', result='doublep')
11 | def point_vortex(xi, yi, xj, yj, gamma, result):
12 |     xij = xi - xj
13 |     yij = yi - yj
14 |     r2ij = xij*xij + yij*yij
15 |     if r2ij < 1e-14:
16 |         result[0] = 0.0
17 |         result[1] = 0.0
18 |     else:
19 |         tmp = gamma/(2.0*pi*r2ij)
20 |         result[0] = -tmp*yij
21 |         result[1] = tmp*xij
22 | 
23 | 
24 | @annotate(int='i, nv', gdoublep='x, y, gamma, u, v')
25 | def velocity(i, x, y, gamma, u, v, nv):
26 |     j = declare('int')
27 |     tmp = declare('matrix(2)')
28 |     vx = 0.0
29 |     vy = 0.0
30 |     xi = x[i]
31 |     yi = y[i]
32 |     for j in range(nv):
33 |         point_vortex(xi, yi, x[j], y[j], gamma[j], tmp)
34 |         vx += tmp[0]
35 |         vy += tmp[1]
36 |     u[i] = vx
37 |     v[i] = vy
38 | 
39 | 
40 | def make_vortices(nv, backend):
41 |     x = np.linspace(-1, 1, nv)
42 |     y = x.copy()
43 |     gamma = np.ones(nv)
44 |     u = np.zeros_like(x)
45 |     v = np.zeros_like(x)
46 |     x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend)
47 |     return x, y, gamma, u, v, nv
48 | 
49 | 
50 | def run(nv, backend):
51 |     e = Elementwise(velocity, backend=backend)
52 |     args = make_vortices(nv, backend)
53 |     t1 = time.time()
54 |     e(*args)
55 |     print(time.time() - t1)
56 |     u = args[-3]
57 |     u.pull()
58 |     return e, args
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     from compyle.utils import ArgumentParser
63 |     p = ArgumentParser()
64 |     p.add_argument('-n', action='store', type=int, dest='n',
65 |                    default=10000, help='Number of particles.')
66 |     o = p.parse_args()
67 |     run(o.n, o.backend)
68 | 


--------------------------------------------------------------------------------
/examples/vm_elementwise_jit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from math import pi
 3 | import time
 4 | 
 5 | from compyle.api import declare, annotate
 6 | from compyle.parallel import Elementwise
 7 | from compyle.array import wrap
 8 | 
 9 | 
10 | @annotate
11 | def point_vortex(xi, yi, xj, yj, gamma, result):
12 |     xij = xi - xj
13 |     yij = yi - yj
14 |     r2ij = xij*xij + yij*yij
15 |     if r2ij < 1.0e-14:
16 |         result[0] = 0.0
17 |         result[1] = 0.0
18 |     else:
19 |         tmp = gamma/(2.0*pi*r2ij)
20 |         result[0] = -tmp*yij
21 |         result[1] = tmp*xij
22 | 
23 | 
24 | @annotate
25 | def velocity(i, x, y, gamma, u, v, nv):
26 |     tmp = declare('matrix(2)')
27 |     xi = x[i]
28 |     yi = y[i]
29 |     vx = 0.0
30 |     vy = 0.0
31 |     u[i] = 0.0
32 |     v[i] = 0.0
33 |     for j in range(nv):
34 |         point_vortex(xi, yi, x[j], y[j], gamma[j], tmp)
35 |         vx += tmp[0]
36 |         vy += tmp[1]
37 |     u[i] = vx
38 |     v[i] = vy
39 | 
40 | 
41 | def make_vortices(nv, backend):
42 |     x = np.linspace(-1, 1, nv)
43 |     y = x.copy()
44 |     gamma = np.ones(nv)
45 |     u = np.zeros_like(x)
46 |     v = np.zeros_like(x)
47 |     x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend)
48 |     return x, y, gamma, u, v, nv
49 | 
50 | 
51 | def run(nv, backend):
52 |     e = Elementwise(velocity, backend=backend)
53 |     args = make_vortices(nv, backend)
54 |     t1 = time.time()
55 |     e(*args)
56 |     print(time.time() - t1)
57 |     u = args[-3]
58 |     u.pull()
59 |     return e, args
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     from compyle.utils import ArgumentParser
64 |     p = ArgumentParser()
65 |     p.add_argument('-n', action='store', type=int, dest='n',
66 |                    default=10000, help='Number of particles.')
67 |     o = p.parse_args()
68 |     run(o.n, o.backend)
69 | 


--------------------------------------------------------------------------------
/examples/vm_kernel.py:
--------------------------------------------------------------------------------
  1 | """Shows the use of a raw opencl Kernel but written using pure Python. It
  2 | makes use of local memory allocated on the host.
  3 | 
  4 | Note that the local memory is allocated as a multiple of workgroup size times
  5 | the size of the data type automatically.
  6 | 
  7 | This is a raw opencl kernel so will not work on Cython!
  8 | 
  9 | """
 10 | import numpy as np
 11 | from math import pi
 12 | import time
 13 | 
 14 | from compyle.api import annotate, declare, wrap
 15 | from compyle.low_level import (Kernel, LocalMem, local_barrier,
 16 |                                LID_0, LDIM_0, GDIM_0)
 17 | 
 18 | 
 19 | @annotate(double='xi, yi, xj, yj, gamma', result='doublep')
 20 | def point_vortex(xi, yi, xj, yj, gamma, result):
 21 |     xij = xi - xj
 22 |     yij = yi - yj
 23 |     r2ij = xij*xij + yij*yij
 24 |     if r2ij < 1.0e-14:
 25 |         result[0] = 0.0
 26 |         result[1] = 0.0
 27 |     else:
 28 |         tmp = gamma/(2.0*pi*r2ij)
 29 |         result[0] = -tmp*yij
 30 |         result[1] = tmp*xij
 31 | 
 32 | 
 33 | @annotate(nv='int', gdoublep='x, y, gamma, u, v', ldoublep='xc, yc, gc')
 34 | def velocity(x, y, gamma, u, v, xc, yc, gc, nv):
 35 |     i, gid, nb = declare('int', 3)
 36 |     j, ti, nt, jb = declare('int', 4)
 37 |     ti = LID_0
 38 |     nt = LDIM_0
 39 |     gid = GID_0
 40 |     i = gid*nt + ti
 41 |     idx = declare('int')
 42 |     tmp = declare('matrix(2)')
 43 |     uj, vj = declare('double', 2)
 44 |     nb = GDIM_0
 45 | 
 46 |     if i < nv:
 47 |         xi = x[i]
 48 |         yi = y[i]
 49 |     uj = 0.0
 50 |     vj = 0.0
 51 |     for jb in range(nb):
 52 |         idx = jb*nt + ti
 53 |         if idx < nv:
 54 |             xc[ti] = x[idx]
 55 |             yc[ti] = y[idx]
 56 |             gc[ti] = gamma[idx]
 57 |         else:
 58 |             gc[ti] = 0.0
 59 |         local_barrier()
 60 | 
 61 |         if i < nv:
 62 |             for j in range(nt):
 63 |                 point_vortex(xi, yi, xc[j], yc[j], gc[j], tmp)
 64 |                 uj += tmp[0]
 65 |                 vj += tmp[1]
 66 | 
 67 |         local_barrier()
 68 | 
 69 |     if i < nv:
 70 |         u[i] = uj
 71 |         v[i] = vj
 72 | 
 73 | 
 74 | def make_vortices(nv, backend):
 75 |     x = np.linspace(-1, 1, nv)
 76 |     y = x.copy()
 77 |     gamma = np.ones(nv)
 78 |     u = np.zeros_like(x)
 79 |     v = np.zeros_like(x)
 80 |     x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend)
 81 |     xc, yc, gc = (LocalMem(1, backend), LocalMem(1, backend),
 82 |                   LocalMem(1, backend))
 83 |     return x, y, gamma, u, v, xc, yc, gc, nv
 84 | 
 85 | 
 86 | def run(nv, backend):
 87 |     e = Kernel(velocity, backend=backend)
 88 |     args = make_vortices(nv, backend)
 89 |     t1 = time.time()
 90 |     gs = ((nv + 128 - 1)//128)*128
 91 |     e(*args, global_size=(gs,))
 92 |     print(time.time() - t1)
 93 |     u = args[3]
 94 |     u.pull()
 95 |     print(u.data)
 96 |     return e, args
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     from compyle.utils import ArgumentParser
101 |     p = ArgumentParser()
102 |     p.add_argument('-n', action='store', type=int, dest='n',
103 |                    default=10000, help='Number of particles.')
104 |     o = p.parse_args()
105 |     assert o.backend in ['opencl', 'cuda'], ("Only OpenCL/CUDA backend is "
106 |                                              "supported.")
107 |     run(o.n, o.backend)
108 | 


--------------------------------------------------------------------------------
/examples/vm_numba.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from math import pi
 3 | import time
 4 | 
 5 | from numba import jit
 6 | 
 7 | 
 8 | @jit
 9 | def point_vortex(xi, yi, xj, yj, gamma, result):
10 |     xij = xi - xj
11 |     yij = yi - yj
12 |     r2ij = xij*xij + yij*yij
13 |     if r2ij < 1e-14:
14 |         result[0] = 0.0
15 |         result[1] = 0.0
16 |     else:
17 |         tmp = gamma/(2.0*pi*r2ij)
18 |         result[0] = -tmp*yij
19 |         result[1] = tmp*xij
20 | 
21 | 
22 | @jit
23 | def velocity(x, y, gamma, u, v, nv):
24 |     tmp = np.zeros(2)
25 |     for i in range(nv):
26 |         xi = x[i]
27 |         yi = y[i]
28 |         u[i] = 0.0
29 |         v[i] = 0.0
30 |         for j in range(nv):
31 |             point_vortex(xi, yi, x[j], y[j], gamma[j], tmp)
32 |             u[i] += tmp[0]
33 |             v[i] += tmp[1]
34 | 
35 | 
36 | def make_vortices(nv):
37 |     x = np.linspace(-1, 1, nv)
38 |     y = x.copy()
39 |     gamma = np.ones(nv)
40 |     u = np.zeros_like(x)
41 |     v = np.zeros_like(x)
42 |     return x, y, gamma, u, v, nv
43 | 
44 | 
45 | def run(nv):
46 |     args = make_vortices(nv)
47 |     t1 = time.time()
48 |     velocity(*args)
49 |     print(time.time() - t1)
50 |     u = args[-3]
51 |     print(u)
52 |     return velocity, args
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     from argparse import ArgumentParser
57 |     p = ArgumentParser()
58 |     p.add_argument('-n', action='store',
59 |                    type=int, dest='n', default=10000)
60 |     o = p.parse_args()
61 |     run(o.n)
62 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "wheel>=0.29.0",
4 |     "setuptools>=42.0.0",
5 |     "numpy>=2.0,<3",
6 |     "Cython>=0.20",
7 |     "mako",
8 |     "pytools"
9 | ]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mako
2 | pytools
3 | cython
4 | numpy
5 | pytest
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from setuptools import setup, find_packages
 3 | 
 4 | try:
 5 |     from Cython.Distutils import Extension
 6 |     from Cython.Build import cythonize
 7 | except ImportError:
 8 |     from distutils.core import Extension
 9 | 
10 |     def cythonize(*args, **kw):
11 |         return args[0]
12 | 
13 | 
14 | def get_version():
15 |     import os
16 |     data = {}
17 |     fname = os.path.join('compyle', '__init__.py')
18 |     exec(compile(open(fname).read(), fname, 'exec'), data)
19 |     return data.get('__version__')
20 | 
21 | 
22 | install_requires = ['mako', 'pytools', 'cython', 'numpy']
23 | tests_require = ['pytest']
24 | if sys.version_info[0] < 3:
25 |     tests_require += ['mock>=1.0']
26 | docs_require = ['sphinx']
27 | cuda_require = ['pycuda', 'cupy']
28 | opencl_require = ['pyopencl']
29 | 
30 | classes = '''
31 | Development Status :: 4 - Beta
32 | Intended Audience :: Developers
33 | Intended Audience :: Science/Research
34 | Natural Language :: English
35 | Operating System :: MacOS :: MacOS X
36 | Operating System :: Microsoft :: Windows
37 | Operating System :: POSIX
38 | Operating System :: Unix
39 | Programming Language :: Python
40 | Programming Language :: Python :: 2.7
41 | Programming Language :: Python :: 3
42 | Topic :: Scientific/Engineering
43 | Topic :: Software Development :: Code Generators
44 | Topic :: Software Development :: Compilers
45 | Topic :: Software Development :: Libraries
46 | Topic :: Utilities
47 | '''
48 | classifiers = [x.strip() for x in classes.splitlines() if x]
49 | 
50 | ext_modules = [
51 |         Extension(
52 |             name="compyle.thrust.sort",
53 |             sources=["compyle/thrust/sort.pyx"],
54 |             language="c++"
55 |             ),
56 |         ]
57 | 
58 | setup(
59 |     name='compyle',
60 |     version=get_version(),
61 |     author='Prabhu Ramachandran',
62 |     author_email='prabhu@aero.iitb.ac.in',
63 |     description='Execute a subset of Python on HPC platforms',
64 |     long_description=open('README.rst').read(),
65 |     license="BSD-3-Clause",
66 |     url='https://github.com/pypr/compyle',
67 |     classifiers=classifiers,
68 |     packages=find_packages(),
69 |     ext_modules=cythonize(ext_modules, language="c++"),
70 |     install_requires=install_requires,
71 |     extras_require={
72 |         "docs": docs_require,
73 |         "tests": tests_require,
74 |         "dev": docs_require + tests_require,
75 |         "cuda": cuda_require,
76 |         "opencl": opencl_require,
77 |     },
78 | )
79 | 


--------------------------------------------------------------------------------