├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .mailmap ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── doc ├── Makefile ├── pyop2.tex └── sphinx │ ├── Makefile │ └── source │ ├── architecture.rst │ ├── backends.rst │ ├── caching.rst │ ├── concepts.rst │ ├── conf.py │ ├── images │ ├── assembly.svg │ ├── csr.svg │ ├── direct_arg.svg │ ├── indirect_arg.svg │ ├── indirect_arg_flattened.svg │ ├── iteration_spaces.svg │ ├── mixed_assembly.svg │ ├── mixed_sparsity.svg │ ├── mixed_sparsity2.svg │ ├── mpi_matrix.svg │ ├── pyop2_architecture.svg │ ├── pyop2_colouring.svg │ ├── pyop2_device_data_state.svg │ └── pyop2_mpi_mesh.svg │ ├── index.rst │ ├── installation.rst │ ├── ir.rst │ ├── kernels.rst │ ├── linear_algebra.rst │ ├── mixed.rst │ ├── mpi.rst │ ├── plan.rst │ ├── profiling.rst │ └── user.rst ├── pyop2 ├── __init__.py ├── _version.py ├── caching.py ├── codegen │ ├── __init__.py │ ├── builder.py │ ├── c │ │ ├── inverse.c │ │ └── solve.c │ ├── loopycompat.py │ ├── node.py │ ├── optimise.py │ ├── rep2loopy.py │ └── representation.py ├── compilation.py ├── configuration.py ├── datatypes.py ├── exceptions.py ├── global_kernel.py ├── local_kernel.py ├── logger.py ├── mpi-compat.h ├── mpi.py ├── op2.py ├── parloop.py ├── profiling.py ├── sparsity.pyx ├── types │ ├── __init__.py │ ├── access.py │ ├── dat.py │ ├── data_carrier.py │ ├── dataset.py │ ├── glob.py │ ├── halo.py │ ├── map.py │ ├── mat.py │ └── set.py ├── utils.py └── version.py ├── requirements-ext.txt ├── requirements-git.txt ├── requirements-minimal.txt ├── requirements.txt ├── scripts ├── pyop2-clean └── spydump ├── setup.cfg ├── setup.py ├── test └── unit │ ├── test_api.py │ ├── test_caching.py │ ├── test_callables.py │ ├── test_configuration.py │ ├── test_dats.py │ ├── test_direct_loop.py │ ├── test_extrusion.py │ ├── test_global_reduction.py │ ├── test_globals.py │ ├── test_indirect_loop.py │ ├── test_iteration_space_dats.py │ ├── test_linalg.py │ ├── test_linalg_complex.py │ ├── test_matrices.py │ ├── test_petsc.py │ ├── test_subset.py │ └── test_vector_map.py └── versioneer.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set default behaviour, in case users don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Whitespace 5 | * whitespace=tab-in-indent,space-before-tab,trailing-space,tabwidth=2 6 | *.{py,pyx,pxd,pxi} whitespace=tab-in-indent,space-before-tab,trailing-space,tabwidth=4 7 | Makefile whitespace=space-before-tab,trailing-space,tabwidth=2 8 | pyop2/_version.py export-subst 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | # Trigger the workflow on push or pull request, 4 | # but only for the master branch 5 | on: 6 | push: 7 | branches: 8 | - master 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | jobs: 14 | test: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | # Don't immediately kill all if one Python version fails 18 | fail-fast: false 19 | matrix: 20 | python-version: ['3.9', '3.10', '3.11', '3.12'] 21 | env: 22 | CC: mpicc 23 | PETSC_DIR: ${{ github.workspace }}/petsc 24 | PETSC_ARCH: default 25 | PETSC_CONFIGURE_OPTIONS: --with-debugging=1 --with-shared-libraries=1 --with-c2html=0 --with-fortran-bindings=0 26 | RDMAV_FORK_SAFE: 1 27 | PYOP2_CI_TESTS: 1 28 | timeout-minutes: 60 29 | 30 | steps: 31 | - name: Install system dependencies 32 | shell: bash 33 | run: | 34 | sudo apt update 35 | sudo apt install build-essential mpich libmpich-dev \ 36 | libblas-dev liblapack-dev gfortran 37 | 38 | - name: Set correct Python version 39 | uses: actions/setup-python@v2 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | 43 | - name: Clone PETSc 44 | uses: actions/checkout@v2 45 | with: 46 | repository: firedrakeproject/petsc 47 | path: ${{ env.PETSC_DIR }} 48 | 49 | - name: Build and install PETSc 50 | shell: bash 51 | working-directory: ${{ env.PETSC_DIR }} 52 | run: | 53 | ./configure ${PETSC_CONFIGURE_OPTIONS} 54 | make 55 | 56 | - name: Build and install petsc4py 57 | shell: bash 58 | working-directory: ${{ env.PETSC_DIR }}/src/binding/petsc4py 59 | run: | 60 | python -m pip install --upgrade pip 61 | python -m pip install --upgrade wheel cython numpy 62 | python -m pip install --no-deps . 63 | 64 | - name: Checkout PyOP2 65 | uses: actions/checkout@v2 66 | with: 67 | path: PyOP2 68 | 69 | - name: Install PyOP2 dependencies 70 | shell: bash 71 | working-directory: PyOP2 72 | run: | 73 | # xargs is used to force installation of requirements in the order we specified. 74 | xargs -l1 python -m pip install < requirements-ext.txt 75 | xargs -l1 python -m pip install < requirements-git.txt 76 | python -m pip install pulp 77 | python -m pip install -U flake8 78 | python -m pip install -U pytest-timeout 79 | 80 | - name: Install PyOP2 (Python <3.12) 81 | if: ${{ matrix.python-version != '3.12' }} 82 | shell: bash 83 | working-directory: PyOP2 84 | run: python -m pip install . 85 | 86 | # Not sure if this is a bug in setuptools or something PyOP2 is doing wrong 87 | - name: Install PyOP2 (Python == 3.12) 88 | if: ${{ matrix.python-version == '3.12' }} 89 | shell: bash 90 | working-directory: PyOP2 91 | run: | 92 | python -m pip install -U setuptools 93 | python setup.py install 94 | 95 | - name: Run linting 96 | shell: bash 97 | working-directory: PyOP2 98 | run: make lint 99 | 100 | - name: Run tests 101 | shell: bash 102 | working-directory: PyOP2 103 | run: | 104 | # Running parallel test cases separately works around a bug in pytest-mpi 105 | pytest -k "not parallel" --tb=native --timeout=480 --timeout-method=thread -o faulthandler_timeout=540 -v test 106 | mpiexec -n 3 pytest -k "parallel[3]" --tb=native --timeout=480 --timeout-method=thread -o faulthandler_timeout=540 -v test 107 | timeout-minutes: 10 108 | 109 | - name: Build documentation 110 | if: ${{ matrix.python-version == '3.10' }} 111 | shell: bash 112 | working-directory: PyOP2 113 | run: | 114 | python -m pip install sphinx 115 | make -C doc/sphinx html 116 | - name: Upload to github pages 117 | if: ${{ github.ref == 'refs/heads/master' && github.event_name == 'push' && matrix.python-version== '3.10' }} 118 | uses: crazy-max/ghaction-github-pages@v2.2.0 119 | with: 120 | build_dir: PyOP2/doc/sphinx/build/html 121 | jekyll: false 122 | env: 123 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 124 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build 2 | build 3 | dist 4 | MANIFEST 5 | PyOP2.egg-info 6 | *.py[cdo] 7 | 8 | # Extension modules 9 | sparsity.so 10 | sparsity.c 11 | sparsity.cpython*.so 12 | # Docs 13 | pyop2.coffee.rst 14 | pyop2.rst 15 | pyop2.pdf 16 | pyop2.aux 17 | pyop2.log 18 | 19 | # Testing 20 | .pytest-incremental 21 | .tox 22 | .vagrant 23 | 24 | # Meshes 25 | *.edge 26 | *.ele 27 | *.msh 28 | *.node 29 | *.geo 30 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Gheorghe-Teodor Bercea 2 | George Boutsioukis 3 | David A Ham 4 | David A Ham 5 | Miklós Homolya 6 | Nicolas Loriant 7 | Nicolas Loriant 8 | Nicolas Loriant 9 | Nicolas Loriant 10 | Nicolas Loriant 11 | Fabio Luporini 12 | Graham Markall 13 | Graham Markall 14 | Andrew McRae 15 | Andrew McRae 16 | Lawrence Mitchell 17 | Lawrence Mitchell 18 | Kaho Sato 19 | Reuben W. Nixon-Hill 20 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | PyOP2 has received contributions from the following: 2 | 3 | Institutions 4 | ------------ 5 | 6 | Imperial College London 7 | The University of Edinburgh 8 | 9 | Individuals 10 | ----------- 11 | 12 | Gheorghe-Teodor Bercea 13 | Simon Funke 14 | Ben Grabham 15 | David A Ham 16 | Nicolas Loriant 17 | Fabio Luporini 18 | Graham Markall 19 | Lawrence Mitchell 20 | Florian Rathgeber 21 | Francis Russell 22 | Kaho Sato 23 | Reuben W. Nixon-Hill 24 | Nacime Bouziani 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PyOP2 2 | 3 | We value third-party contributions. To keep things simple for you and us, 4 | please adhere to the following contributing guidelines. 5 | 6 | ## Getting Started 7 | 8 | * You will need a [GitHub account](https://github.com/signup/free). 9 | * Submit a [ticket for your issue][0], assuming one does not already exist. 10 | * Clearly describe the issue including steps to reproduce when it is a bug. 11 | * Make sure you specify the version that you know has the issue. 12 | * Bonus points for submitting a failing test along with the ticket. 13 | * If you don't have push access, fork the repository on GitHub. 14 | 15 | ## Making Changes 16 | 17 | * Create a topic branch for your feature or bug fix. 18 | * Make commits of logical units. 19 | * Make sure your commits adhere to the coding guidelines below. 20 | * Make sure your commit messages are in the [proper format][1]: The first line 21 | of the message should have 50 characters or less, separated by a blank line 22 | from the (optional) body. The body should be wrapped at 70 characters and 23 | paragraphs separated by blank lines. Bulleted lists are also fine. 24 | * Make sure you have added the necessary tests for your changes. 25 | * Run _all_ the tests to assure nothing else was accidentally broken. 26 | 27 | ## Coding guidelines 28 | 29 | [PEP 0008][2] is enforced, with the exception of [E501][3] and [E226][3]: 30 | * Indent by 4 spaces, tabs are *strictly forbidden*. 31 | * Lines should not exceed 79 characters where possible without severely 32 | impacting legibility. If breaking a line would make the code much less 33 | readable it's fine to overrun by a little bit. 34 | * No trailing whitespace at EOL or trailing blank lines at EOF. 35 | 36 | ## Checking your commit conforms to coding guidelines 37 | 38 | Install a Git pre-commit hook automatically checking for tab and whitespace 39 | errors before committing and also calls `flake8` on your changed files. In the 40 | `.git/hooks` directory of your local Git repository, run the following: 41 | 42 | ``` 43 | git config --local core.whitespace "space-before-tab, tab-in-indent, trailing-space, tabwidth=4" 44 | wget https://gist.github.com/kynan/d233073b66e860c41484/raw/pre-commit 45 | chmod +x pre-commit 46 | ``` 47 | 48 | Make sure the `pre-commit.sample` hook is still in place, since it is required. 49 | 50 | ## Submitting Changes 51 | 52 | * We can only accept your contribution if you have signed the Contributor 53 | License Agreement (CLA). 54 | * Push your changes to a topic branch in your fork of the repository. 55 | * Submit a pull request to the repository in the OP2 organization. 56 | 57 | [0]: https://github.com/OP2/PyOP2/issues 58 | [1]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html 59 | [2]: http://www.python.org/dev/peps/pep-0008/ 60 | [3]: http://pep8.readthedocs.org/en/latest/intro.html#error-codes 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Imperial College London and others. Please see the 2 | AUTHORS file in the main source directory for a full list of copyright 3 | holders. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | * The name of Imperial College London or that of other 14 | contributors may not be used to endorse or promote products 15 | derived from this software without specific prior written 16 | permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 19 | ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 27 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 28 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 | DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pyop2 *.c 2 | include versioneer.py 3 | include pyop2/_version.py 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTEST = py.test 2 | 3 | TEST_BASE_DIR = test 4 | 5 | UNIT_TEST_DIR = $(TEST_BASE_DIR)/unit 6 | 7 | SPHINX_DIR = doc/sphinx 8 | SPHINX_BUILD_DIR = $(SPHINX_DIR)/build 9 | SPHINX_TARGET = html 10 | SPHINX_TARGET_DIR = $(SPHINX_BUILD_DIR)/$(SPHINX_TARGET) 11 | SPHINXOPTS = -a 12 | 13 | PORT = 8000 14 | 15 | MESHES_DIR = demo/meshes 16 | 17 | GIT_REV = $(shell git rev-parse --verify --short HEAD) 18 | 19 | all: ext 20 | 21 | .PHONY : help test lint unit doc update_docs ext ext_clean meshes 22 | 23 | help: 24 | @echo "make COMMAND with COMMAND one of:" 25 | @echo " test : run lint and unit tests" 26 | @echo " lint : run flake8 code linter" 27 | @echo " unit : run unit tests" 28 | @echo " unit_BACKEND : run unit tests for BACKEND" 29 | @echo " doc : build sphinx documentation" 30 | @echo " serve : launch local web server to serve up documentation" 31 | @echo " update_docs : build sphinx documentation and push to GitHub" 32 | @echo " ext : rebuild Cython extension" 33 | @echo " ext_clean : delete generated extension" 34 | @echo " meshes : download demo meshes" 35 | @echo 36 | @echo "Available OpenCL contexts: $(OPENCL_CTXS)" 37 | 38 | test: lint unit 39 | 40 | lint: 41 | @flake8 42 | 43 | unit: 44 | cd $(TEST_BASE_DIR); $(PYTEST) unit 45 | 46 | doc: 47 | make -C $(SPHINX_DIR) $(SPHINX_TARGET) SPHINXOPTS=$(SPHINXOPTS) 48 | 49 | serve: 50 | make -C $(SPHINX_DIR) livehtml 51 | 52 | update_docs: 53 | if [ ! -d $(SPHINX_TARGET_DIR)/.git ]; then \ 54 | mkdir -p $(SPHINX_BUILD_DIR); \ 55 | cd $(SPHINX_BUILD_DIR); git clone `git config --get remote.origin.url` $(SPHINX_TARGET); \ 56 | fi 57 | cd $(SPHINX_TARGET_DIR); git fetch -p; git checkout -f gh-pages; git reset --hard origin/gh-pages 58 | make -C $(SPHINX_DIR) $(SPHINX_TARGET) SPHINXOPTS=$(SPHINXOPTS) 59 | cd $(SPHINX_TARGET_DIR); git add .; git commit -am "Update documentation to revision $(GIT_REV)"; git push origin gh-pages 60 | 61 | ext: ext_clean 62 | python setup.py build_ext -i 63 | 64 | ext_clean: 65 | rm -rf build pyop2/compute_ind.c pyop2/compute_ind.so pyop2/sparsity.c pyop2/sparsity.so 66 | 67 | meshes: 68 | make -C $(MESHES_DIR) meshes 69 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://travis-ci.org/OP2/PyOP2.png?branch=master 2 | :target: https://travis-ci.org/OP2/PyOP2 3 | :alt: build status 4 | 5 | .. contents:: 6 | 7 | This repository is archived, PyOP2 can now be found inside the `Firedrake repository `_ 8 | ======== 9 | 10 | Installing PyOP2 11 | ================ 12 | 13 | PyOP2 requires Python 3.6 or later. 14 | 15 | The main testing platform for PyOP2 is Ubuntu 18.04 64-bit with Python 16 | 3.6. Later Ubuntu versions should also work. Some users successfully 17 | use PyOP2 on Mac OS X. 18 | 19 | Installation of the dependencies is somewhat involved, and therefore 20 | the recommended way to obtain PyOP2 is by using the `Firedrake 21 | installation script 22 | `__. This will give 23 | you a Python 3 venv that contains a working PyOP2 installation. 24 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | pdflatex pyop2.tex 3 | -------------------------------------------------------------------------------- /doc/pyop2.tex: -------------------------------------------------------------------------------- 1 | \documentclass[a4paper]{article} 2 | 3 | \usepackage{fullpage} 4 | 5 | \author{Graham Markall} 6 | \title{PyOP2 Draft Proposal} 7 | 8 | 9 | \begin{document} 10 | 11 | \maketitle 12 | 13 | \section{Motivation} 14 | 15 | This is part of an attempt at defining an implementation of OP2 that generates code at runtime (later referred to as PyOP2, for reasons which will be explained later). Coarsely, the compile-time translator iterates over \verb|op_par_loop| calls in the source code and performs the following operations: 16 | 17 | \begin{itemize} 18 | \item Generates a host stub for the kernel that is called. 19 | \item Generates a wrapper around the OP2 kernel, that, for example, stages data into and out of shared memory. 20 | \item Inserts a call to the original OP2 kernel inline in the generated wrapper, but leaves the kernel untouched. 21 | \end{itemize} 22 | 23 | \noindent The OP2 runtime manages: 24 | 25 | \begin{itemize} 26 | \item Transfer of data to/from the device. 27 | \item Planning parallel execution. 28 | \item Invoking the host stubs for kernels. 29 | \end{itemize} 30 | 31 | The question of which parts of the ROSE-based translator should be used arises. The position outlined in this document is that: 32 | 33 | \begin{itemize} 34 | \item The code that performs the generation of the host stub should be replaced by support in the runtime that calls the plan function and executes the kernel for each colour according to the plan. 35 | \item The plan function from OP2 should be re-used as-is. 36 | \item Since this leaves effectively no source-to-source transformation to perform (only inserting an essentially unmodified kernel into generated code) it should be possible to avoid the use of ROSE altogether. Should transformation need to be performed on OP2 kernels in future, this functionality may be added, either by integrating ROSE or using a simpler framework, since the operations performed in a kernel are limited to a fairly restricted subset of C/CUDA. 37 | \item In order to speed development, maintainability and integration with MCFC and Fluidity, a sensible choice of language for the re-implementation is Python (hence PyOP2). 38 | \end{itemize} 39 | 40 | The remainder of this document describes the PyOP2 API, and how this API may be implemented. One may also refer to the implementation folder in the same repository as this document, for a skeleton API implementation and a complete (though non-functioning without an API implementation) version of the Airfoil code written using PyOP2. 41 | 42 | \section{API} 43 | 44 | \subsection{Declaring data} 45 | 46 | Each data item is an instance of an object of one of the types \verb|Set|, \verb|Dat|, \verb|Mat|, \verb|Map|, \verb|Global| or \verb|Const|. Each of these objects may be constructed as follows: 47 | 48 | \begin{description} 49 | \item[\texttt{Set(size, name)}] Construct a set with \verb|size| elements named \verb|name|. The name is for debugging purposes. 50 | \item[\texttt{Dat(set, dim, type, data, name)}] Construct a dat that holds a data item of type \verb|type| and dimension \verb|dim| for each element of the set \verb|set|. The data specifies the data to initialise the dat with, and may be a list or tuple. The name is for debugging purposes. 51 | \item[\texttt{Mat(row\_set, col\_set, dim, type, name)}] Construct a matrix which has entries that are the product of the two sets. The elements are of dimension \verb|dim| and type \verb|type|. The name is for debugging purposes. 52 | \item[\texttt{Map(from, to, dim, values, name)}] Construct a mapping from one set to another. The \verb|dim| of the map indicates how many different relations between the two sets the map holds. \verb|values| is used to initialise the mapping, and may be a list or tuple. The name is used for debugging. 53 | \item[\texttt{Global(name, val)}] Constructs a global value. The name is used for debugging purposes. \verb|val| is used to specify an initial value and may be a scalar, a list or a tuple. 54 | \item[\texttt{Const(dim, type, value, name)}] Construct a constant value of dimension \verb|dim|, type \verb|type|, and value \verb|value|. The name is used for debugging purposes. 55 | \end{description} 56 | 57 | \subsection{Declaring kernels} 58 | 59 | To construct a kernel object with name \verb|name|, that implements the code string \verb|code|: 60 | 61 | \begin{verbatim} 62 | Kernel(name, code) 63 | \end{verbatim} 64 | 65 | The name is used only for debugging purposes. The code is an OP2 kernel, with the same semantics as are used in the current implementations of OP2. 66 | 67 | \subsection{Invoking a parallel loop} 68 | 69 | A parallel loop object is constructed with the following syntax: 70 | 71 | \begin{verbatim} 72 | ParLoop(kernel, iteration_space, *args) 73 | \end{verbatim} 74 | 75 | The arguments to the kernel are as follows: 76 | 77 | \begin{description} 78 | \item[\texttt{kernel}] is a \verb|Kernel| object. 79 | \item[\texttt{iteration\_space}] is an \verb|IterationSpace| object or a \verb|Set| object. 80 | \item[\texttt{args}] is any number of \verb|Arg| objects. 81 | \end{description} 82 | 83 | At the time of construction, the \verb|ParLoop| object proceeds with compiling the kernel if it is in the uncompiled state, and then checks if a plan has already been constructed for the given iteration space and access descriptors. If there is no suitable plan, then the planner is called. Once a plan has been obtained, the ParLoop object calls the kernel for each colour in the plan. 84 | 85 | The \verb|IterationSpace| object is used to declare an iteration space that consists of a set as well as extra indices over a local matrix or vector. For example, one may pass \verb|IterationSpace(elements, 3, 3)| when assembling a matrix over elements, or \verb|IterationSpace(elements, 3)| when assembling a vector. 86 | 87 | The \verb|Arg| class should not be used directly, but instead one of the subclasses of \verb|Arg| should be used: 88 | 89 | \begin{description} 90 | \item[\texttt{ArgDat(dat, index, map, access)}] is used to pass a \verb|Dat| argument. The \verb|index| parameter selects which of the relations in the \verb|map| should be used to access the data indirectly. If the runtime system is to gather together all the values of the dat that are pointed to by all the different relations in the mapping, then \verb|idx_all| may be passed as the \verb|index| argument. If the dataset is to be accessed directly, then \verb|None| should be passed as int \verb|index| and \verb|map| parameters. \verb|access| is one of \verb|read|, \verb|write|, \verb|inc| or \verb|rw|, with similar meaning to in the current OP2 implementation. 91 | \item[\texttt{ArgMat(mat, row\_idx, row\_map, col\_idx, col\_map, access)}] is used to pass a \verb|Mat| argument. The index and map arguments are used similarly into the \verb|ArgDat|, with the exception that the \verb|row_map| is used to index into the rows of the matrix and the \verb|col_map| is used to index into the columns of the matrix. The \verb|access| parameter works as for the \verb|ArgDat| case. 92 | \item[\texttt{ArgGbl(var, access)}] is for passing a \verb|Global| argument. \verb|var| is an instance of a \verb|Global|, and \verb|access| specifies the access method in the same way as for the previous two cases. 93 | \end{description} 94 | 95 | \section{Implementation considerations and issues} 96 | 97 | This is a list of notes for now: 98 | 99 | \begin{itemize} 100 | \item All classes must be designed so that their representation uniquely describes an object with its particular state, in order for caching of compiled code to work. 101 | \item There are several possibilities for implementing compilation and dynamic linking of code: 102 | \begin{itemize} 103 | \item Instant, from the FEniCS Project for compilation, caching and linking of CPU code 104 | \item PyCUDA/PyOpenCL from Andreas Kl\"ockner for GPU/accelerator code 105 | \item CodePy, also from Andreas Kl\"ockner for C/C++ code compilation and dynamic linking into the Python interpreter. 106 | \end{itemize} 107 | \item The possibilities for an interface allowing different OP2 backends to be implemented include: 108 | \begin{itemize} 109 | \item Each backend overrides the classes in \verb|op2.py| so that they implement the functionality required to run on their target. 110 | \item We define a ``backend API'' that is used to implement a backend. The implementation of classes in \verb|op2.py| don't change, but instead it contains code to drive the backend. This appears more preferable since I believe it will allow a cleaner separation between the user-facing API and the backend implementation. 111 | \end{itemize} 112 | \end{itemize} 113 | 114 | \end{document} 115 | -------------------------------------------------------------------------------- /doc/sphinx/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | APIDOCOPTS = -f 6 | SPHINXOPTS = 7 | SPHINXBUILD = sphinx-build 8 | PAPER = 9 | BUILDDIR = build 10 | 11 | # Internal variables. 12 | PAPEROPT_a4 = -D latex_paper_size=a4 13 | PAPEROPT_letter = -D latex_paper_size=letter 14 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 15 | # the i18n builder cannot share the environment and doctrees with the others 16 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 17 | 18 | .PHONY: help clean livehtml html dirhtml singlehtml pickle json htmlhelp qthelp \ 19 | devhelp epub latex latexpdf text man changes linkcheck doctest gettext apidoc 20 | 21 | help: 22 | @echo "Please use \`make ' where is one of" 23 | @echo " html to make standalone HTML files" 24 | @echo " dirhtml to make HTML files named index.html in directories" 25 | @echo " singlehtml to make a single large HTML file" 26 | @echo " pickle to make pickle files" 27 | @echo " json to make JSON files" 28 | @echo " htmlhelp to make HTML files and a HTML help project" 29 | @echo " qthelp to make HTML files and a qthelp project" 30 | @echo " devhelp to make HTML files and a Devhelp project" 31 | @echo " epub to make an epub" 32 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 33 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " linkcheck to check all external links for integrity" 41 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 42 | 43 | apidoc: 44 | sphinx-apidoc ../../pyop2 -o source/ -T $(APIDOCOPTS) 45 | 46 | clean: 47 | -rm -rf $(BUILDDIR)/* 48 | 49 | buildhtml: 50 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 51 | 52 | html: apidoc buildhtml 53 | @echo 54 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 55 | 56 | dirhtml: apidoc 57 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 58 | @echo 59 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 60 | 61 | singlehtml: apidoc 62 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 63 | @echo 64 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 65 | 66 | pickle: apidoc 67 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 68 | @echo 69 | @echo "Build finished; now you can process the pickle files." 70 | 71 | json: apidoc 72 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 73 | @echo 74 | @echo "Build finished; now you can process the JSON files." 75 | 76 | htmlhelp: apidoc 77 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 78 | @echo 79 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 80 | ".hhp project file in $(BUILDDIR)/htmlhelp." 81 | 82 | qthelp: apidoc 83 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 84 | @echo 85 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 86 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 87 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyOP2.qhcp" 88 | @echo "To view the help file:" 89 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyOP2.qhc" 90 | 91 | devhelp: apidoc 92 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 93 | @echo 94 | @echo "Build finished." 95 | @echo "To view the help file:" 96 | @echo "# mkdir -p $$HOME/.local/share/devhelp/PyOP2" 97 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyOP2" 98 | @echo "# devhelp" 99 | 100 | epub: apidoc 101 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 102 | @echo 103 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 104 | 105 | latex: apidoc 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo 108 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 109 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 110 | "(use \`make latexpdf' here to do that automatically)." 111 | 112 | latexpdf: apidoc 113 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 114 | @echo "Running LaTeX files through pdflatex..." 115 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 116 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 117 | 118 | text: apidoc 119 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 120 | @echo 121 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 122 | 123 | man: apidoc 124 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 125 | @echo 126 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 127 | 128 | texinfo: apidoc 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo 131 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 132 | @echo "Run \`make' in that directory to run these through makeinfo" \ 133 | "(use \`make info' here to do that automatically)." 134 | 135 | info: apidoc 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo "Running Texinfo files through makeinfo..." 138 | make -C $(BUILDDIR)/texinfo info 139 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 140 | 141 | gettext: apidoc 142 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 143 | @echo 144 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 145 | 146 | changes: apidoc 147 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 148 | @echo 149 | @echo "The overview file is in $(BUILDDIR)/changes." 150 | 151 | linkcheck: apidoc 152 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 153 | @echo 154 | @echo "Link check complete; look for any errors in the above output " \ 155 | "or in $(BUILDDIR)/linkcheck/output.txt." 156 | 157 | doctest: apidoc 158 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 159 | @echo "Testing of doctests in the sources finished, look at the " \ 160 | "results in $(BUILDDIR)/doctest/output.txt." 161 | -------------------------------------------------------------------------------- /doc/sphinx/source/architecture.rst: -------------------------------------------------------------------------------- 1 | .. _architecture: 2 | 3 | PyOP2 Architecture 4 | ================== 5 | 6 | As described in :ref:`concepts`, PyOP2 exposes an API that allows users to 7 | declare the topology of unstructured meshes in the form of :class:`Sets 8 | ` and :class:`Maps ` and data in the form of 9 | :class:`Dats `, :class:`Mats `, :class:`Globals 10 | ` and :class:`Consts `. Computations on this data 11 | are described by :class:`Kernels ` described in :ref:`kernels` 12 | and executed by :func:`parallel loops `. 13 | 14 | The API is the frontend to the PyOP2 runtime compilation architecture, which 15 | supports the generation and just-in-time (JIT) compilation of low-level code 16 | for a range of backends described in :doc:`backends` and the efficient 17 | scheduling of parallel computations. A schematic overview of the PyOP2 18 | architecture is given below: 19 | 20 | .. figure:: images/pyop2_architecture.svg 21 | :align: center 22 | 23 | Schematic overview of the PyOP2 architecture 24 | 25 | From an outside perspective, PyOP2 is a conventional Python library, with 26 | performance critical library functions implemented in Cython_. A user's 27 | application code makes calls to the PyOP2 API, most of which are conventional 28 | library calls. The exception are :func:`~pyop2.par_loop` calls, which 29 | encapsulate PyOP2's runtime core functionality performing backend-specific 30 | code generation. Executing a parallel loop comprises the following steps: 31 | 32 | 1. Compute a parallel execution plan, including information for efficient 33 | staging of data and partitioning and colouring of the iteration set for 34 | conflict-free parallel execution. This process is described in :doc:`plan` 35 | and does not apply to the sequential backend. 36 | 2. Generate backend-specific code for executing the computation for a given 37 | set of :func:`~pyop2.par_loop` arguments as detailed in :doc:`backends` 38 | according to the execution plan computed in the previous step. 39 | 3. Pass the generated code to a backend-specific toolchain for just-in-time 40 | compilation, producing a shared library callable as a Python module which 41 | is dynamically loaded. This module is cached on disk to save recompilation 42 | when the same :func:`~pyop2.par_loop` is called again for the same backend. 43 | 4. Build the backend-specific list of arguments to be passed to the generated 44 | code, which may initiate host to device data transfer for the CUDA and 45 | OpenCL backends. 46 | 5. Call into the generated module to perform the actual computation. For 47 | distributed parallel computations this involves separate calls for the 48 | regions owned by the current processor and the halo as described in 49 | :doc:`mpi`. 50 | 6. Perform any necessary reductions for :class:`Globals `. 51 | 7. Call the backend-specific matrix assembly procedure on any 52 | :class:`~pyop2.Mat` arguments. 53 | 54 | .. _backend-support: 55 | 56 | Multiple Backend Support 57 | ------------------------ 58 | 59 | The backend is selected by passing the keyword argument ``backend`` to the 60 | :func:`~pyop2.init` function. If omitted, the ``sequential`` backend is 61 | selected by default. This choice can be overridden by exporting the 62 | environment variable ``PYOP2_BACKEND``, which allows switching backends 63 | without having to touch the code. Once chosen, the backend cannot be changed 64 | for the duration of the running Python interpreter session. 65 | 66 | PyOP2 provides a single API to the user, regardless of which backend the 67 | computations are running on. All classes and functions that form the public 68 | API defined in :mod:`pyop2.op2` are interfaces, whose concrete implementations 69 | are initialised according to the chosen backend. A metaclass takes care of 70 | instantiating a backend-specific version of the requested class and setting 71 | the corresponding docstrings such that this process is entirely transparent to 72 | the user. The implementation of the PyOP2 backends is completely orthogonal to 73 | the backend selection process and free to use established practices of 74 | object-oriented design. 75 | 76 | .. _Cython: http://cython.org 77 | -------------------------------------------------------------------------------- /doc/sphinx/source/caching.rst: -------------------------------------------------------------------------------- 1 | .. _caching: 2 | 3 | Caching in PyOP2 4 | ================ 5 | 6 | PyOP2 makes heavy use of caches to ensure performance is not adversely 7 | affected by too many runtime computations. The caching in PyOP2 takes 8 | a number of forms: 9 | 10 | 1. Disk-based caching of generated code 11 | 12 | Since compiling a generated code module may be an expensive 13 | operation, PyOP2 caches the generated code on disk such that 14 | subsequent runs of the same simulation will not have to pay a 15 | compilation cost. 16 | 17 | 2. In memory caching of generated code function pointers 18 | 19 | Once code has been generated and loaded into the running PyOP2 20 | process, we cache the resulting callable function pointer for the 21 | lifetime of the process, such that subsequent calls to the same 22 | generated code are fast. 23 | 24 | 3. In memory caching of expensive to build objects 25 | 26 | Some PyOP2 objects, in particular :class:`~pyop2.Sparsity` objects, 27 | can be expensive to construct. Since a sparsity does not change if 28 | it is built again with the same arguments, we only construct the 29 | sparsity once for each unique set of arguments. 30 | 31 | The caching strategies for PyOP2 follow from two axioms: 32 | 33 | 1. For PyOP2 :class:`~pyop2.Set`\s and :class:`~pyop2.Map`\s, equality 34 | is identity 35 | 2. Caches of generated code should depend on metadata, but not data 36 | 37 | The first axiom implies that two :class:`~pyop2.Set`\s or 38 | :class:`~pyop2.Map`\s compare equal if and only if they are the same 39 | object. The second implies that generated code must be *independent* 40 | of the absolute size of the data the :func:`~pyop2.par_loop` that 41 | generated it executed over. For example, the size of the iteration 42 | set should not be part of the key, but the arity of any maps and size 43 | and type of every data item should be. 44 | 45 | On consequence of these rules is that there are effectively two 46 | separate types of cache in PyOP2, object and class caches, 47 | distinguished by where the cache itself lives. 48 | 49 | Class caches 50 | ------------ 51 | 52 | These are used to cache objects that depend on metadata, but not 53 | object instances, such are generated code. They are implemented by 54 | the cacheable class inheriting from :class:`~.Cached`. 55 | 56 | .. note:: 57 | 58 | There is currently no eviction strategy for class caches, should 59 | they grow too large, for example by executing many different parallel 60 | loops, an out of memory error can occur 61 | 62 | Object caches 63 | ------------- 64 | 65 | These are used to cache objects that are built on top of 66 | :class:`~pyop2.Set`\s and :class:`~pyop2.Map`\s. They are implemented by the 67 | cacheable class inheriting from :class:`~.ObjectCached` and the 68 | caching instance defining a ``_cache`` attribute. 69 | 70 | The motivation for these caches is that cache key for objects such as 71 | sparsities relies on an identical sparsity being built if the 72 | arguments are identical. So that users of the API do not have to 73 | worry too much about carrying around "temporary" objects forever such 74 | that they will hit caches, PyOP2 builds up a hierarchy of caches of 75 | transient objects on top of the immutable sets and maps. 76 | 77 | So, for example, the user can build and throw away 78 | :class:`~pyop2.DataSet`\s as normal in their code. Internally, however, 79 | these instances are cached on the set they are built on top of. Thus, 80 | in the following snippet, we have that ``ds`` and ``ds2`` are the same 81 | object: 82 | 83 | .. code-block:: python 84 | 85 | s = op2.Set(1) 86 | ds = op2.DataSet(s, 10) 87 | ds2 = op2.DataSet(s, 10) 88 | assert ds is ds2 89 | 90 | The setup of these caches is such that the lifetime of objects in the 91 | cache is tied to the lifetime of both the caching and the cached 92 | object. In the above example, as long as the user program holds a 93 | reference to one of ``s``, ``ds`` or ``ds2`` all three objects will 94 | remain live. As soon as all references are lost, all three become 95 | candidates for garbage collection. 96 | 97 | .. note:: 98 | 99 | The cache eviction strategy for these caches relies on the Python 100 | garbage collector, and hence on the user not holding onto 101 | references to some of either the cached or the caching objects for 102 | too long. Should the objects on which the caches live persist, an 103 | out of memory error may occur. 104 | 105 | Debugging cache leaks 106 | --------------------- 107 | 108 | To debug potential problems with the cache, PyOP2 can be instructed to 109 | print the size of both object and class caches at program exit. This 110 | can be done by setting the environment variable 111 | ``PYOP2_PRINT_CACHE_SIZE`` to 1 before running a PyOP2 program, or 112 | passing the ``print_cache_size`` to :func:`~pyop2.init`. 113 | -------------------------------------------------------------------------------- /doc/sphinx/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # PyOP2 documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Aug 14 10:10:00 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys 15 | import os 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | #sys.path.insert(0, os.path.abspath('.')) 21 | sys.path.insert(0, os.path.abspath('../../..')) 22 | 23 | # -- General configuration ----------------------------------------------------- 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be extensions 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.imgmath'] 31 | autodoc_default_flags = ['members', 'undoc-members'] 32 | # Both the class’ and the __init__ method’s docstring are concatenated and 33 | # inserted into the class definition 34 | autoclass_content = 'both' 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | #source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'PyOP2' 50 | copyright = u'2012-2013, Imperial College et al' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '2020.0' 58 | # The full version, including alpha/beta/rc tags. 59 | release = version 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = [] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all documents. 76 | #default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | #add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | #add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | #show_authors = False 88 | 89 | # The name of the Pygments (syntax highlighting) style to use. 90 | pygments_style = 'sphinx' 91 | 92 | # A list of ignored prefixes for module index sorting. 93 | #modindex_common_prefix = [] 94 | 95 | autodoc_member_order = "bysource" 96 | 97 | # -- Options for HTML output --------------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | html_theme = 'default' 102 | 103 | # Theme options are theme-specific and customize the look and feel of a theme 104 | # further. For a list of options available for each theme, see the 105 | # documentation. 106 | #html_theme_options = {} 107 | 108 | # Add any paths that contain custom themes here, relative to this directory. 109 | #html_theme_path = [] 110 | 111 | # The name for this set of Sphinx documents. If None, it defaults to 112 | # " v documentation". 113 | #html_title = None 114 | 115 | # A shorter title for the navigation bar. Default is the same as html_title. 116 | #html_short_title = None 117 | 118 | # The name of an image file (relative to this directory) to place at the top 119 | # of the sidebar. 120 | #html_logo = None 121 | 122 | # The name of an image file (within the static path) to use as favicon of the 123 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 124 | # pixels large. 125 | #html_favicon = None 126 | 127 | # Add any paths that contain custom static files (such as style sheets) here, 128 | # relative to this directory. They are copied after the builtin static files, 129 | # so a file named "default.css" will overwrite the builtin "default.css". 130 | html_static_path = ['_static'] 131 | 132 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 133 | # using the given strftime format. 134 | #html_last_updated_fmt = '%b %d, %Y' 135 | 136 | # If true, SmartyPants will be used to convert quotes and dashes to 137 | # typographically correct entities. 138 | #html_use_smartypants = True 139 | 140 | # Custom sidebar templates, maps document names to template names. 141 | #html_sidebars = {} 142 | 143 | # Additional templates that should be rendered to pages, maps page names to 144 | # template names. 145 | #html_additional_pages = {} 146 | 147 | # If false, no module index is generated. 148 | #html_domain_indices = True 149 | 150 | # If false, no index is generated. 151 | #html_use_index = True 152 | 153 | # If true, the index is split into individual pages for each letter. 154 | #html_split_index = False 155 | 156 | # If true, links to the reST sources are added to the pages. 157 | #html_show_sourcelink = True 158 | 159 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 160 | #html_show_sphinx = True 161 | 162 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 163 | #html_show_copyright = True 164 | 165 | # If true, an OpenSearch description file will be output, and all pages will 166 | # contain a tag referring to it. The value of this option must be the 167 | # base URL from which the finished HTML is served. 168 | #html_use_opensearch = '' 169 | 170 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 171 | #html_file_suffix = None 172 | 173 | # Output file base name for HTML help builder. 174 | htmlhelp_basename = 'PyOP2doc' 175 | 176 | 177 | # -- Options for LaTeX output -------------------------------------------------- 178 | 179 | latex_elements = { 180 | # The paper size ('letterpaper' or 'a4paper'). 181 | #'papersize': 'letterpaper', 182 | 183 | # The font size ('10pt', '11pt' or '12pt'). 184 | #'pointsize': '10pt', 185 | 186 | # Additional stuff for the LaTeX preamble. 187 | #'preamble': '', 188 | } 189 | 190 | # Grouping the document tree into LaTeX files. List of tuples 191 | # (source start file, target name, title, author, documentclass [howto/manual]). 192 | latex_documents = [ 193 | ('index', 'PyOP2.tex', u'PyOP2 Documentation', 194 | u'Imperial College et al', 'manual'), 195 | ] 196 | 197 | # The name of an image file (relative to this directory) to place at the top of 198 | # the title page. 199 | #latex_logo = None 200 | 201 | # For "manual" documents, if this is true, then toplevel headings are parts, 202 | # not chapters. 203 | #latex_use_parts = False 204 | 205 | # If true, show page references after internal links. 206 | #latex_show_pagerefs = False 207 | 208 | # If true, show URL addresses after external links. 209 | #latex_show_urls = False 210 | 211 | # Documents to append as an appendix to all manuals. 212 | #latex_appendices = [] 213 | 214 | # If false, no module index is generated. 215 | #latex_domain_indices = True 216 | 217 | 218 | # -- Options for manual page output -------------------------------------------- 219 | 220 | # One entry per manual page. List of tuples 221 | # (source start file, name, description, authors, manual section). 222 | man_pages = [ 223 | ('index', 'pyop2', u'PyOP2 Documentation', 224 | [u'Imperial College et al'], 1) 225 | ] 226 | 227 | # If true, show URL addresses after external links. 228 | #man_show_urls = False 229 | 230 | 231 | # -- Options for Texinfo output ------------------------------------------------ 232 | 233 | # Grouping the document tree into Texinfo files. List of tuples 234 | # (source start file, target name, title, author, 235 | # dir menu entry, description, category) 236 | texinfo_documents = [ 237 | ('index', 'PyOP2', u'PyOP2 Documentation', 238 | u'Imperial College et al', 'PyOP2', 'One line description of project.', 239 | 'Miscellaneous'), 240 | ] 241 | 242 | # Documents to append as an appendix to all manuals. 243 | #texinfo_appendices = [] 244 | 245 | # If false, no module index is generated. 246 | #texinfo_domain_indices = True 247 | 248 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 249 | #texinfo_show_urls = 'footnote' 250 | -------------------------------------------------------------------------------- /doc/sphinx/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PyOP2 documentation master file, created by 2 | sphinx-quickstart on Tue Aug 14 10:10:00 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to PyOP2's documentation! 7 | ================================= 8 | 9 | .. warning:: 10 | The prose documentation contained here is significantly out-of-date and thus 11 | contains many inaccuracies. It is, nevertheless, quite a useful resource for 12 | people new to PyOP2. Please read with care. 13 | 14 | The API documentation, however, is updated regularly and can be considered 15 | accurate. 16 | 17 | Contents: 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | installation 23 | concepts 24 | kernels 25 | ir 26 | architecture 27 | backends 28 | linear_algebra 29 | plan 30 | mixed 31 | mpi 32 | caching 33 | profiling 34 | user 35 | pyop2 36 | 37 | 38 | Indices and tables 39 | ================== 40 | 41 | * :ref:`genindex` 42 | * :ref:`modindex` 43 | * :ref:`search` 44 | 45 | -------------------------------------------------------------------------------- /doc/sphinx/source/installation.rst: -------------------------------------------------------------------------------- 1 | ../../../README.rst -------------------------------------------------------------------------------- /doc/sphinx/source/kernels.rst: -------------------------------------------------------------------------------- 1 | .. _kernels: 2 | 3 | PyOP2 Kernels 4 | ============= 5 | 6 | Kernels in PyOP2 define the local operations that are to be performed for each 7 | element of the iteration set the kernel is executed over. There must be a one 8 | to one match between the arguments declared in the kernel signature and the 9 | actual arguments passed to the parallel loop executing this kernel. As 10 | described in :doc:`concepts`, data is accessed directly on the iteration set 11 | or via mappings passed in the :func:`~pyop2.par_loop` call. 12 | 13 | The kernel only sees data corresponding to the current element of the 14 | iteration set it is invoked for. Any data read by the kernel i.e. accessed as 15 | :data:`~pyop2.READ`, :data:`~pyop2.RW` or :data:`~pyop2.INC` is automatically 16 | gathered via the mapping relationship in the *staging in* phase and the kernel 17 | is passed pointers to the staging memory. Similarly, after the kernel has been 18 | invoked, any modified data i.e. accessed as :data:`~pyop2.WRITE`, 19 | :data:`~pyop2.RW` or :data:`~pyop2.INC` is scattered back out via the 20 | :class:`~pyop2.Map` in the *staging out* phase. It is only safe for a kernel 21 | to manipulate data in the way declared via the access descriptor in the 22 | parallel loop call. Any modifications to an argument accessed read-only would 23 | not be written back since the staging out phase is skipped for this argument. 24 | Similarly, the result of reading an argument declared as write-only is 25 | undefined since the data has not been staged in. 26 | 27 | .. _kernel-api: 28 | 29 | Kernel API 30 | ---------- 31 | 32 | Consider a :func:`~pyop2.par_loop` computing the midpoint of a triangle given 33 | the three vertex coordinates. Note that we make use of a covenience in the 34 | PyOP2 syntax, which allow declaring an anonymous :class:`~pyop2.DataSet` of a 35 | dimension greater one by using the ``**`` operator. We omit the actual data in 36 | the declaration of the :class:`~pyop2.Map` ``cell2vertex`` and 37 | :class:`~pyop2.Dat` ``coordinates``. :: 38 | 39 | vertices = op2.Set(num_vertices) 40 | cells = op2.Set(num_cells) 41 | 42 | cell2vertex = op2.Map(cells, vertices, 3, [...]) 43 | 44 | coordinates = op2.Dat(vertices ** 2, [...], dtype=float) 45 | midpoints = op2.Dat(cells ** 2, dtype=float) 46 | 47 | op2.par_loop(midpoint, cells, 48 | midpoints(op2.WRITE), 49 | coordinates(op2.READ, cell2vertex)) 50 | 51 | Kernels are implemented in a restricted subset of C99 and are declared by 52 | passing a *C code string* and the *kernel function name*, which must match the 53 | name in the C kernel signature, to the :class:`~pyop2.Kernel` constructor: :: 54 | 55 | midpoint = op2.Kernel(""" 56 | void midpoint(double p[2], double *coords[2]) { 57 | p[0] = (coords[0][0] + coords[1][0] + coords[2][0]) / 3.0; 58 | p[1] = (coords[0][1] + coords[1][1] + coords[2][1]) / 3.0; 59 | }""", "midpoint") 60 | 61 | Since kernels cannot return any value, the return type is always ``void``. The 62 | kernel argument ``p`` corresponds to the third :func:`~pyop2.par_loop` 63 | argument ``midpoints`` and ``coords`` to the fourth argument ``coordinates`` 64 | respectively. Argument names need not agree, the matching is by position. 65 | 66 | Data types of kernel arguments must match the type of data passed to the 67 | parallel loop. The Python types :class:`float` and :class:`numpy.float64` 68 | correspond to a C :class:`double`, :class:`numpy.float32` to a C 69 | :class:`float`, :class:`int` or :class:`numpy.int64` to a C :class:`long` and 70 | :class:`numpy.int32` to a C :class:`int`. 71 | 72 | Direct :func:`~pyop2.par_loop` arguments such as ``midpoints`` are passed to 73 | the kernel as a ``double *``, indirect arguments such as ``coordinates`` as a 74 | ``double **`` with the first indirection due to the map and the second 75 | indirection due the data dimension. The kernel signature above uses arrays 76 | with explicit sizes to draw attention to the fact that these are known. We 77 | could have interchangibly used a kernel signature with plain pointers: 78 | 79 | .. code-block:: c 80 | 81 | void midpoint(double * p, double ** coords) 82 | 83 | Argument creation supports an optional flag ``flatten``, which is used 84 | for kernels which expect data to be laid out by component: :: 85 | 86 | midpoint = op2.Kernel(""" 87 | void midpoint(double p[2], double *coords[1]) { 88 | p[0] = (coords[0][0] + coords[1][0] + coords[2][0]) / 3.0; 89 | p[1] = (coords[3][0] + coords[4][0] + coords[5][0]) / 3.0; 90 | }""", "midpoint") 91 | 92 | op2.par_loop(midpoint, cells, 93 | midpoints(op2.WRITE), 94 | coordinates(op2.READ, cell2vertex, flatten=True)) 95 | 96 | .. _data-layout: 97 | 98 | Data layout 99 | ----------- 100 | 101 | Data for a :class:`~pyop2.Dat` declared on a :class:`~pyop2.Set` is 102 | stored contiguously for all elements of the set. For each element, 103 | this is a contiguous chunk of data of a shape given by the 104 | :class:`~pyop2.DataSet` ``dim`` and the datatype of the 105 | :class:`~pyop2.Dat`. The size of this chunk is the product of the 106 | extents of the ``dim`` tuple times the size of the datatype. 107 | 108 | During execution of the :func:`~pyop2.par_loop`, the kernel is called 109 | for each element of the iteration set and passed data for each of its 110 | arguments corresponding to the current set element ``i`` only. 111 | 112 | For a directly accessed argument such as ``midpoints`` above, the 113 | kernel is passed a pointer to the beginning of the chunk of data for 114 | the element ``i`` the kernel is currently called for. In CUDA/OpenCL 115 | ``i`` is the global thread id since the kernel is launched in parallel 116 | for all elements. 117 | 118 | .. figure:: images/direct_arg.svg 119 | :align: center 120 | 121 | Data layout for a directly accessed :class:`~pyop2.Dat` argument with 122 | ``dim`` 2 123 | 124 | For an indirectly accessed argument such as ``coordinates`` above, 125 | PyOP2 gathers pointers to the data via the :class:`~pyop2.Map` 126 | ``cell2vertex`` used for the indirection. The kernel is passed a list 127 | of pointers of length corresponding to the *arity* of the 128 | :class:`~pyop2.Map`, in the example above 3. Each of these points to 129 | the data chunk for the element in the target :class:`~pyop2.Set` given 130 | by :class:`~pyop2.Map` entries ``(i, 0)``, ``(i, 1)`` and ``(i, 2)``. 131 | 132 | .. figure:: images/indirect_arg.svg 133 | :align: center 134 | 135 | Data layout for a :class:`~pyop2.Dat` argument with ``dim`` 2 indirectly 136 | accessed through a :class:`~pyop2.Map` of ``arity`` 3 137 | 138 | If the argument is created with the keyword argument ``flatten`` set 139 | to ``True``, a flattened vector of pointers is passed to the kernel. 140 | This vector is of length ``dim * arity`` (where ``dim`` is the product 141 | of the extents of the ``dim`` tuple), which is 6 in the example above. 142 | Each entry points to a single data value of the :class:`~pyop2.Dat`. 143 | The ordering is by component of ``dim`` i.e. the first component of 144 | each data item for each element in the target set pointed to by the 145 | map followed by the second component etc. 146 | 147 | .. figure:: images/indirect_arg_flattened.svg 148 | :align: center 149 | 150 | Data layout for a flattened :class:`~pyop2.Dat` argument with ``dim`` 2 151 | indirectly accessed through a :class:`~pyop2.Map` of ``arity`` 3 152 | 153 | .. _local-iteration-spaces: 154 | 155 | Local iteration spaces 156 | ---------------------- 157 | 158 | PyOP2 supports complex kernels with large local working set sizes, which may 159 | not run very efficiently on architectures with a limited amount of registers 160 | and on-chip resources. In many cases the resource usage is proportional to the 161 | size of the *local iteration space* the kernel operates on. 162 | 163 | Consider a finite-element local assembly kernel for vector-valued basis 164 | functions of second order on triangles. There are kernels more complex and 165 | computing considerably larger local tensors commonly found in finite-element 166 | computations, in particular for higher-order basis functions, and this kernel 167 | only serves to illustrate the concept. For each element in the iteration set, 168 | this kernel computes a 12x12 local tensor: 169 | 170 | .. code-block:: c 171 | 172 | void kernel(double A[12][12], ...) { 173 | ... 174 | // loops over the local iteration space 175 | for (int j = 0; j < 12; j++) { 176 | for (int k = 0; k < 12; k++) { 177 | A[j][k] += ... 178 | } 179 | } 180 | } 181 | 182 | PyOP2 invokes this kernel for each element in the iteration set: 183 | 184 | .. code-block:: c 185 | 186 | for (int ele = 0; ele < nele; ++ele) { 187 | double A[12][12]; 188 | ... 189 | kernel(A, ...); 190 | } 191 | 192 | To improve the efficiency of executing complex kernels on manycore 193 | platforms, their operation can be distributed among several threads 194 | which each compute a single point in this local iteration space to 195 | increase the level of parallelism and to lower the amount of resources 196 | required per thread. In the case of the kernel above we obtain: 197 | 198 | .. code-block:: c 199 | 200 | void mass(double A[1][1], ..., int j, int k) { 201 | ... 202 | A[0][0] += ... 203 | } 204 | 205 | Note how the doubly nested loop over basis function is hoisted out of the 206 | kernel, which receives its position in the local iteration space to compute as 207 | additional arguments ``j`` and ``k``. PyOP2 then calls the kernel for 208 | each element of the local iteration space for each set element: 209 | 210 | .. code-block:: c 211 | 212 | for (int ele = 0; ele < nele; ++ele) { 213 | double A[1][1]; 214 | ... 215 | for (int j = 0; j < 12; j++) { 216 | for (int k = 0; k < 12; k++) { 217 | kernel(A, ..., j, k); 218 | } 219 | } 220 | } 221 | 222 | On manycore platforms, the local iteration space does not translate into a 223 | loop nest, but rather into a larger number of threads being launched to 224 | compute each of its elements: 225 | 226 | .. figure:: images/iteration_spaces.svg 227 | :align: center 228 | 229 | Local iteration space for a kernel computing a 12x12 local tensor 230 | 231 | PyOP2 needs to be told to loop over this local iteration space by 232 | indexing the corresponding maps with an 233 | :class:`~pyop2.base.IterationIndex` :data:`~pyop2.i` in the 234 | :func:`~pyop2.par_loop` call. 235 | -------------------------------------------------------------------------------- /doc/sphinx/source/mixed.rst: -------------------------------------------------------------------------------- 1 | .. _mixed: 2 | 3 | Mixed Types 4 | =========== 5 | 6 | When solving linear systems of equations as they arise for instance in the 7 | finite-element method (FEM), one is often interested in *coupled* solutions of 8 | more than one quantity. In fluid dynamics, a common example is solving a 9 | coupled system of velocity and pressure as it occurs in some formulations of 10 | the Navier-Stokes equations. 11 | 12 | Mixed Set, DataSet, Map and Dat 13 | ------------------------------- 14 | 15 | PyOP2 provides the mixed types :class:`~pyop2.MixedSet` 16 | :class:`~pyop2.MixedDataSet`, :class:`~pyop2.MixedMap` and 17 | :class:`~pyop2.MixedDat` for a :class:`~pyop2.Set`, :class:`~pyop2.DataSet`, 18 | :class:`~pyop2.Map` and :class:`~pyop2.Dat` respectively. A mixed type is 19 | constructed from a list or other iterable of its base type and provides the 20 | same attributes and methods. Under most circumstances types and mixed types 21 | behave the same way and can be treated uniformly. Mixed types allow iteration 22 | over their constituent parts and for convenience the base types are also 23 | iterable, yielding themselves. 24 | 25 | A :class:`~pyop2.MixedSet` is defined from a list of sets: :: 26 | 27 | s1, s2 = op2.Set(N), op2.Set(M) 28 | ms = op2.MixedSet([s1, s2]) 29 | 30 | There are a number of equivalent ways of defining a 31 | :class:`~pyop2.MixedDataSet`: :: 32 | 33 | mds = op2.MixedDataSet([s1, s2], (1, 2)) 34 | mds = op2.MixedDataSet([s1**1, s2**2]) 35 | mds = op2.MixedDataSet(ms, (1, 2)) 36 | mds = ms**(1, 2) 37 | 38 | A :class:`~pyop2.MixedDat` with no associated data is defined in one of the 39 | following ways: :: 40 | 41 | md = op2.MixedDat(mds) 42 | md = op2.MixedDat([s1**1, s2**2]) 43 | md = op2.MixedDat([op2.Dat(s1**1), op2.Dat(s2**2)]) 44 | 45 | Finally, a :class:`~pyop2.MixedMap` is defined from a list of maps, all of 46 | which must share the same source :class:`~pyop2.Set`: :: 47 | 48 | it = op2.Set(S) 49 | mm = op2.MixedMap([op2.Map(it, s1, 2), op2.Map(it, s2, 3)]) 50 | 51 | Block Sparsity and Mat 52 | ---------------------- 53 | 54 | When declaring a :class:`~pyop2.Sparsity` on pairs of mixed maps, the 55 | resulting sparsity pattern has a square block structure with as many block 56 | rows and columns as there are components in the :class:`~pyop2.MixedDataSet` 57 | forming its row and column space. In the most general case a 58 | :class:`~pyop2.Sparsity` is constructed as follows: :: 59 | 60 | it = op2.Set(...) # Iteration set 61 | sr0, sr1 = op2.Set(...), op2.Set(...) # Sets for row spaces 62 | sc0, sc1 = op2.Set(...), op2.Set(...) # Sets for column spaces 63 | # MixedMaps for the row and column spaces 64 | mr = op2.MixedMap([op2.Map(it, sr0, ...), op2.Map(it, sr1, ...)]) 65 | mc = op2.MixedMap([op2.Map(it, sc0, ...), op2.Map(it, sc1, ...)]) 66 | # MixedDataSets for the row and column spaces 67 | dsr = op2.MixedDataSet([sr0**1, sr1**1]) 68 | dsc = op2.MixedDataSet([sc0**1, sc1**1]) 69 | # Blocked sparsity 70 | sparsity = op2.Sparsity((dsr, dsc), [(mr, mc), ...]) 71 | 72 | The relationships of each component of the mixed maps and datasets to the 73 | blocks of the :class:`~pyop2.Sparsity` is shown in the following diagram: 74 | 75 | .. figure:: images/mixed_sparsity.svg 76 | :align: center 77 | 78 | The contribution of sets, maps and datasets to the blocked sparsity. 79 | 80 | Block sparsity patterns are computed separately for each block as described in 81 | :ref:`sparsity_pattern` and the same validity rules apply. A 82 | :class:`~pyop2.Mat` defined on a block :class:`~pyop2.Sparsity` has the same 83 | block structure, which is implemented using a PETSc_ MATNEST_. 84 | 85 | Mixed Assembly 86 | -------------- 87 | 88 | When assembling into a :class:`~pyop2.MixedDat` or a block 89 | :class:`~pyop2.Mat`, the :class:`~pyop2.Kernel` produces a local tensor of the 90 | same block structure, which is a combination of :ref:`local-iteration-spaces` 91 | of all its subblocks. This is entirely transparent to the kernel however, 92 | which sees the combined local iteration space. PyOP2 ensures that indirectly 93 | accessed data is gathered and scattered via the correct maps and packed 94 | together into a contiguous vector to be passed to the kernel. Contributions 95 | from the local tensor are assembled into the correct blocks of the 96 | :class:`~pyop2.MixedDat` or :class:`~pyop2.Mat`. 97 | 98 | Consider the following example :func:`~pyop2.par_loop` assembling a block 99 | :class:`~pyop2.Mat`: 100 | 101 | .. code-block:: python 102 | 103 | it, cells, nodes = op2.Set(...), op2.Set(...), op2.Set(...) 104 | mds = op2.MixedDataSet([nodes, cells]) 105 | mmap = op2.MixedMap([op2.Map(it, nodes, 2, ...), op2.Map(it, cells, 1, ...)]) 106 | mat = op2.Mat(op2.Sparsity(mds, mmap)) 107 | d = op2.MixedDat(mds) 108 | 109 | op2.par_loop(kernel, it, 110 | mat(op2.INC, (mmap[op2.i[0]], mmap[op2.i[1]])), 111 | d(op2.read, mmap)) 112 | 113 | The ``kernel`` for this :func:`~pyop2.par_loop` assembles a 3x3 local tensor 114 | and is passed an input vector of length 3 for each iteration set element: 115 | 116 | .. code-block:: c 117 | 118 | void kernel(double v[3][3] , double **d ) { 119 | for (int i = 0; i<3; i++) 120 | for (int j = 0; j<3; j++) 121 | v[i][j] += d[i][0] * d[j][0]; 122 | } 123 | 124 | The top-left 2x2 block of the local tensor is assembled into the (0,0) block 125 | of the matrix, the top-right 2x1 block into (0,1), the bottom-left 1x2 block 126 | into (1,0) and finally the bottom-right 1x1 block into (1,1). Note that for 127 | the (0,0) block only the first component of the :class:`~pyop2.MixedDat` is 128 | read and for the (1,1) block only the second component. For the (0,1) and 129 | (1,0) blocks, both components of the :class:`~pyop2.MixedDat` are accessed. 130 | 131 | This diagram illustrates the assembly of the block :class:`~pyop2.Mat`: 132 | 133 | .. figure:: images/mixed_assembly.svg 134 | :align: center 135 | 136 | Assembling into the blocks of a global matrix :math:`A`: block 137 | :math:`A^{0,0}` uses maps :math:`\iota^{1,0}` and :math:`\iota^{2,0}`, 138 | :math:`A^{0,1}` uses :math:`\iota^{1,0}` and :math:`\iota^{2,1}`, 139 | :math:`A^{1,0}` uses :math:`\iota^{1,1}` and :math:`\iota^{2,0}` and finally 140 | :math:`A^{1,1}` uses :math:`\iota^{1,1}` and :math:`\iota^{2,1}` for the row 141 | and column spaces respectively. 142 | 143 | .. _PETSc: http://www.mcs.anl.gov/petsc/ 144 | .. _MATNEST: http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MATNEST.html 145 | -------------------------------------------------------------------------------- /doc/sphinx/source/mpi.rst: -------------------------------------------------------------------------------- 1 | .. _mpi: 2 | 3 | MPI 4 | === 5 | 6 | Distributed parallel computations with MPI in PyOP2 require the mesh to be 7 | partitioned among the processors. To be able to compute over entities on their 8 | boundaries, partitions need to access data owned by neighboring processors. 9 | This region, called the *halo*, needs to be kept up to date and is therefore 10 | exchanged between the processors as required. 11 | 12 | Local Numbering 13 | --------------- 14 | 15 | The partition of each :class:`~pyop2.Set` local to each process consists of 16 | entities *owned* by the process and the *halo*, which are entities owned by 17 | other processes but required to compute on the boundary of the owned entities. 18 | Each of these sections is again divided into two sections required to 19 | efficiently overlap communication and computation and avoid communication 20 | during matrix assembly as described below. Each locally stored 21 | :class:`~pyop2.Set` entitity therefore belongs to one of four categories: 22 | 23 | * **Core**: Entities owned by this processor which can be processed without 24 | accessing halo data. 25 | * **Owned**: Entities owned by this processor which access halo data when 26 | processed. 27 | * **Exec halo**: Off-processor entities which are redundantly executed over 28 | because they touch owned entities. 29 | * **Non-exec halo**: Off-processor entities which are not processed, but read 30 | when computing the exec halo. 31 | 32 | The following diagram illustrates the four sections for a mesh distributed 33 | among two processors: 34 | 35 | .. figure:: images/pyop2_mpi_mesh.svg 36 | :align: center 37 | 38 | A mesh distributed among two processors with the entities of each mesh 39 | partition divided into *core*, *owned*, *exec halo* and *non-exec halo*. 40 | Matching halo sections are highlighted in matching colours. The owned 41 | section of process 0 correspondonds to the non-exec section of process 1. 42 | 43 | For data defined on the :class:`~pyop2.Set` to be stored contiguously per 44 | section, local :class:`~pyop2.Set` entities must be numbered such that core 45 | entities are first, followed by owned, exec halo and non-exec halo in that 46 | order. A good partitioning maximises the size of the core section and 47 | minimises the halo regions. We can therefore assume that the vast majority of 48 | local :class:`~pyop2.Set` entities are in the core section. 49 | 50 | Computation-communication Overlap 51 | --------------------------------- 52 | 53 | The ordering of :class:`~pyop2.Set` entities into four sections allow for a 54 | very efficient overlap of computation and communication. Core entities that do 55 | not access any halo data can be processed entirely without access to halo data 56 | immediately after the halo exchange has been initiated. Execution over the 57 | owned and exec halo regions requires up to date halo data and can only start 58 | once the halo exchange is completed. Depending on the latency and bandwidth 59 | of communication and the size of the core section relative to the halo, the 60 | halo exchange may complete before the computation on the core section. 61 | 62 | The entire process is given below: :: 63 | 64 | halo_exchange_begin() # Initiate halo exchange 65 | maybe_set_dat_dirty() # Mark Dats as modified 66 | compute_if_not_empty(itset.core_part) # Compute core region 67 | halo_exchange_end() # Wait for halo exchange 68 | compute_if_not_empty(itset.owned_part) # Compute owned region 69 | reduction_begin() # Initiate reductions 70 | if needs_exec_halo: # Any indirect Dat not READ? 71 | compute_if_not_empty(itset.exec_part) # Compute exec halo region 72 | reduction_end() # Wait for reductions 73 | maybe_set_halo_update_needed() # Mark halos as out of date 74 | assemble() # Finalise matrix assembly 75 | 76 | Any reductions depend on data from the core and owned sections and are 77 | initiated as soon as the owned section has been processed and execute 78 | concurrently with computation on the exec halo. Similar to 79 | `halo_exchange_begin` and `halo_exchange_end`, `reduction_begin` and 80 | `reduction_end` do no work at all if none of the :func:`~pyop2.par_loop` 81 | arguments requires a reduction. If the :func:`~pyop2.par_loop` assembles a 82 | :class:`~pyop2.Mat`, the matrix assembly is finalised at the end. 83 | 84 | By dividing entities into sections according to their relation to the halo, 85 | there is no need to check whether or not a given entity touches the halo or 86 | not during computations on each section. This avoids branching in kernels or 87 | wrapper code and allows launching separate kernels for GPU execution of each 88 | section. The :func:`~pyop2.par_loop` execution therefore has the above 89 | structure for all backends. 90 | 91 | Halo exchange 92 | ------------- 93 | 94 | Exchanging halo data is only required if the halo data is actually read, which 95 | is the case for :class:`~pyop2.Dat` arguments to a :func:`~pyop2.par_loop` 96 | used in :data:`pyop2.READ` or :data:`pyop2.RW` mode. PyOP2 keeps track 97 | whether or not the halo region may have been modified. This is the case for 98 | :class:`Dats ` used in :data:`pyop2.INC`, :data:`pyop2.WRITE` or 99 | :data:`pyop2.RW` mode or when a :class:`~pyop2.Solver` or a user requests 100 | access to the data. A halo exchange is triggered only for halos marked as out 101 | of date. 102 | 103 | Distributed Assembly 104 | -------------------- 105 | 106 | For an MPI distributed matrix or vector, assembling owned entities at the 107 | boundary can contribute to off-process degrees of freedom and vice versa. 108 | 109 | There are different ways of accounting for these off-process contributions. 110 | PETSc_ supports insertion and subsequent communication of off-process matrix 111 | and vector entries, however its implementation is not thread safe. Concurrent 112 | insertion into PETSc_ MPI matrices *is* thread safe if off-process insertions 113 | are not cached and concurrent writes to rows are avoided, which is done 114 | through colouring as described in :ref:`plan-colouring`. 115 | 116 | PyOP2 therefore disables PETSc_'s off-process insertion feature and instead 117 | redundantly computes over all off process entities that touch local dofs, 118 | which is the *exec halo* section described above. The price for this is 119 | maintaining a larger halo, since we also need halo data, the *non-exec halo* 120 | section, to perform the redundant computation. Halos grow by about a factor 121 | two, however in practice this is still small compared to the interior region 122 | of a partition and the main cost of halo exchange is the latency, which is 123 | independent of the exchanged data volume. 124 | 125 | .. _PETSc: http://www.mcs.anl.gov/petsc/ 126 | -------------------------------------------------------------------------------- /doc/sphinx/source/plan.rst: -------------------------------------------------------------------------------- 1 | .. _plan: 2 | 3 | Parallel Execution Plan 4 | ======================= 5 | 6 | For all PyOP2 backends with the exception of sequential, a parallel execution 7 | plan is computed for each :func:`~pyop2.par_loop`. It contains information 8 | guiding the code generator on how to partition, stage and colour the data for 9 | efficient parallel processing. 10 | 11 | .. _plan-partitioning: 12 | 13 | Partitioning 14 | ------------ 15 | 16 | The iteration set is split into a number of equally sized and contiguous 17 | mini-partitions such that the working set of each mini-partition fits into 18 | shared memory or last level cache. This is unrelated to the partitioning 19 | required for MPI as described in :ref:`mpi`. 20 | 21 | .. _plan-renumbering: 22 | 23 | Local Renumbering and Staging 24 | ----------------------------- 25 | 26 | While a mini-partition is a contiguous chunk of the iteration set, the 27 | indirectly accessed data it references is not necessarily contiguous. For each 28 | mini-partition and unique :class:`~pyop2.Dat`-:class:`~pyop2.Map` pair, a 29 | mapping from local indices within the partition to global indices is 30 | constructed as the sorted array of unique :class:`~pyop2.Map` indices accessed 31 | by this partition. At the same time, a global-to-local mapping is constructed 32 | as its inverse. 33 | 34 | Data for indirectly accessed :class:`~pyop2.Dat` arguments is staged in shared 35 | device memory as described in :ref:`backends`. For each partition, the 36 | local-to-global mapping indicates where data to be staged in is read from and 37 | the global-to-local mapping gives the location in shared memory data has been 38 | staged at. The amount of shared memory required is computed from the size of 39 | the local-to-global mapping. 40 | 41 | .. _plan-colouring: 42 | 43 | Colouring 44 | --------- 45 | 46 | A two-level colouring is used to avoid race conditions. Partitions are 47 | coloured such that partitions of the same colour can be executed concurrently 48 | and threads executing on a partition in parallel are coloured such that no two 49 | threads indirectly reference the same data. Only :func:`~pyop2.par_loop` 50 | arguments performing an indirect reduction or assembling a matrix require 51 | colouring. Matrices are coloured per row. 52 | 53 | For each element of a :class:`~pyop2.Set` indirectly accessed in a 54 | :func:`~pyop2.par_loop`, a bit vector is used to record which colours 55 | indirectly reference it. To colour each thread within a partition, the 56 | algorithm proceeds as follows: 57 | 58 | 1. Loop over all indirectly accessed arguments and collect the colours of all 59 | :class:`~pyop2.Set` elements referenced by the current thread in a bit mask. 60 | 2. Choose the next available colour as the colour of the current thread. 61 | 3. Loop over all :class:`~pyop2.Set` elements indirectly accessed by the 62 | current thread again and set the new colour in their colour mask. 63 | 64 | Since the bit mask is a 32-bit integer, up to 32 colours can be processed in a 65 | single pass, which is sufficient for most applications. If not all threads can 66 | be coloured with 32 distinct colours, the mask is reset and another pass is 67 | made, where each newly allocated colour is offset by 32. Should another pass 68 | be required, the offset is increased to 64 and so on until all threads are 69 | coloured. 70 | 71 | .. figure:: images/pyop2_colouring.svg 72 | :align: center 73 | 74 | Thread colouring within a mini-partition for a :class:`~pyop2.Dat` on 75 | vertices indirectly accessed in a computation over the edges. The edges are 76 | coloured such that no two edges touch the same vertex within the partition. 77 | 78 | The colouring of mini-partitions is done in the same way, except that all 79 | :class:`~pyop2.Set` elements indirectly accessed by the entire partition are 80 | referenced, not only those accessed by a single thread. 81 | -------------------------------------------------------------------------------- /doc/sphinx/source/profiling.rst: -------------------------------------------------------------------------------- 1 | Profiling 2 | ========= 3 | 4 | Profiling PyOP2 programs 5 | ------------------------ 6 | 7 | Profiling a PyOP2 program is as simple as profiling any other Python 8 | code. You can profile the jacobi demo in the PyOP2 ``demo`` folder as 9 | follows: :: 10 | 11 | python -m cProfile -o jacobi.dat jacobi.py 12 | 13 | This will run the entire program under cProfile_ and write the profiling 14 | data to ``jacobi.dat``. Omitting ``-o`` will print a summary to stdout, 15 | which is not very helpful in most cases. 16 | 17 | Creating a graph 18 | ................ 19 | 20 | There is a much more intuitive way of representing the profiling data 21 | using the excellent gprof2dot_ to generate a graph. Install from `PyPI 22 | `__ with :: 23 | 24 | sudo pip install gprof2dot 25 | 26 | Use as follows to create a PDF: :: 27 | 28 | gprof2dot -f pstats -n 1 jacobi.dat | dot -Tpdf -o jacobi.pdf 29 | 30 | ``-f pstats`` tells ``gprof2dot`` that it is dealing with Python 31 | cProfile_ data (and not actual *gprof* data) and ``-n 1`` ignores 32 | everything that makes up less than 1% of the total runtime - most likely 33 | you are not interested in that (the default is 0.5). 34 | 35 | Consolidating profiles from different runs 36 | .......................................... 37 | 38 | To aggregate profiling data from different runs, save the following as 39 | ``concat.py``: :: 40 | 41 | """Usage: concat.py PATTERN FILE""" 42 | 43 | import sys 44 | from glob import glob 45 | from pstats import Stats 46 | 47 | if len(sys.argv) != 3: 48 | print __doc__ 49 | sys.exit(1) 50 | files = glob(sys.argv[1]) 51 | s = Stats(files[0]) 52 | for f in files[1:]: s.add(f) 53 | s.dump_stats(sys.argv[2]) 54 | 55 | With profiles from different runs named ``.*.part``, use it 56 | as :: 57 | 58 | python concat.py '.*.part' .dat 59 | 60 | and then call ``gprof2dot`` as before. 61 | 62 | Using PyOP2's internal timers 63 | ----------------------------- 64 | 65 | PyOP2 automatically times the execution of certain regions: 66 | 67 | * Sparsity building 68 | * Plan construction 69 | * Parallel loop kernel execution 70 | * Halo exchange 71 | * Reductions 72 | * PETSc Krylov solver 73 | 74 | To output those timings, call :func:`~pyop2.profiling.summary` in your 75 | PyOP2 program or run with the environment variable 76 | ``PYOP2_PRINT_SUMMARY`` set to 1. 77 | 78 | To query e.g. the timer for parallel loop execution programatically, 79 | use the :func:`~pyop2.profiling.timing` helper: :: 80 | 81 | from pyop2 import timing 82 | timing("ParLoop compute") # get total time 83 | timing("ParLoop compute", total=False) # get average time per call 84 | 85 | To add additional timers to your own code, you can use the 86 | :func:`~pyop2.profiling.timed_region` and 87 | :func:`~pyop2.profiling.timed_function` helpers: :: 88 | 89 | from pyop2.profiling import timed_region, timed_function 90 | 91 | with timed_region("my code"): 92 | # my code 93 | 94 | @timed_function("my function") 95 | def my_func(): 96 | # my func 97 | 98 | Line-by-line profiling 99 | ---------------------- 100 | 101 | To get a line-by-line profile of a given function, install Robert Kern's 102 | `line profiler`_ and: 103 | 104 | 1. Import the :func:`~pyop2.profiling.profile` decorator: :: 105 | 106 | from pyop2.profiling import profile 107 | 108 | 2. Decorate the function to profile with ``@profile`` 109 | 3. Run your script with ``kernprof.py -l `` 110 | 4. Generate an annotated source file with :: 111 | 112 | python -m line_profiler 113 | 114 | Note that ``kernprof.py`` injects the ``@profile`` decorator into the 115 | Python builtins namespace. PyOP2 provides a passthrough version of this 116 | decorator which does nothing if ``profile`` is not found in 117 | ``__builtins__``. This means you can run your script regularly without 118 | having to remove the decorators again. 119 | 120 | The :func:`~pyop2.profiling.profile` decorator also works with the 121 | memory profiler (see below). PyOP2 therefore provides the 122 | :func:`~pyop2.profiling.lineprof` decorator which is only enabled when 123 | running with ``kernprof.py``. 124 | 125 | A number of PyOP2 internal functions are decorated such that running 126 | your PyOP2 application with ``kernprof.py`` will produce a line-by-line 127 | profile of the parallel loop computation (but not the generated code!). 128 | 129 | Memory profiling 130 | ---------------- 131 | 132 | To profile the memory usage of your application, install Fabian 133 | Pedregosa's `memory profiler`_ and: 134 | 135 | 1. Import the :func:`~pyop2.profiling.profile` decorator: :: 136 | 137 | from pyop2.profiling import profile 138 | 139 | 2. Decorate the function to profile with ``@profile``. 140 | 3. Run your script with :: 141 | 142 | python -m memory_profiler 143 | 144 | to get a line-by-line memory profile of your function. 145 | 4. Run your script with :: 146 | 147 | memprof run --python 148 | 149 | to record memory usage of your program over time. 150 | 5. Generate a plot of the memory profile with ``memprof plot``. 151 | 152 | Note that ``memprof`` and ``python -m memory_profiler`` inject the 153 | ``@profile`` decorator into the Python builtins namespace. PyOP2 154 | provides a passthrough version of this decorator which does nothing if 155 | ``profile`` is not found in ``__builtins__``. This means you can run 156 | your script regularly without having to remove the decorators again. 157 | 158 | The :func:`~pyop2.profiling.profile` decorator also works with the line 159 | profiler (see below). PyOP2 therefore provides the 160 | :func:`~pyop2.profiling.memprof` decorator which is only enabled when 161 | running with ``memprof``. 162 | 163 | A number of PyOP2 internal functions are decorated such that running 164 | your PyOP2 application with ``memprof run`` will produce a memory 165 | profile of the parallel loop computation (but not the generated code!). 166 | 167 | .. _cProfile: https://docs.python.org/2/library/profile.html#cProfile 168 | .. _gprof2dot: https://code.google.com/p/jrfonseca/wiki/Gprof2Dot 169 | .. _line profiler: https://pythonhosted.org/line_profiler/ 170 | .. _memory profiler: https://github.com/fabianp/memory_profiler 171 | -------------------------------------------------------------------------------- /doc/sphinx/source/user.rst: -------------------------------------------------------------------------------- 1 | pyop2 user documentation 2 | ======================== 3 | 4 | :mod:`pyop2` Package 5 | -------------------- 6 | 7 | .. automodule:: pyop2 8 | :members: 9 | :show-inheritance: 10 | :inherited-members: 11 | 12 | Initialization and finalization 13 | ............................... 14 | 15 | .. autofunction:: init 16 | .. autofunction:: exit 17 | 18 | Data structures 19 | ............... 20 | 21 | .. autoclass:: Set 22 | :inherited-members: 23 | .. autoclass:: ExtrudedSet 24 | :inherited-members: 25 | .. autoclass:: Subset 26 | :inherited-members: 27 | .. autoclass:: MixedSet 28 | :inherited-members: 29 | .. autoclass:: DataSet 30 | :inherited-members: 31 | .. autoclass:: MixedDataSet 32 | :inherited-members: 33 | .. autoclass:: Map 34 | :inherited-members: 35 | .. autoclass:: MixedMap 36 | :inherited-members: 37 | .. autoclass:: Sparsity 38 | :inherited-members: 39 | 40 | .. autoclass:: Const 41 | :inherited-members: 42 | .. autoclass:: Global 43 | :inherited-members: 44 | .. autoclass:: Dat 45 | :inherited-members: 46 | .. autoclass:: MixedDat 47 | :inherited-members: 48 | .. autoclass:: Mat 49 | :inherited-members: 50 | 51 | Parallel loops, kernels and linear solves 52 | ......................................... 53 | 54 | .. autofunction:: par_loop 55 | .. autofunction:: solve 56 | 57 | .. autoclass:: Kernel 58 | :inherited-members: 59 | .. autoclass:: Solver 60 | :inherited-members: 61 | 62 | .. autodata:: i 63 | .. autodata:: READ 64 | .. autodata:: WRITE 65 | .. autodata:: RW 66 | .. autodata:: INC 67 | .. autodata:: MIN 68 | .. autodata:: MAX 69 | -------------------------------------------------------------------------------- /pyop2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyOP2 is a library for parallel computations on unstructured meshes. 3 | """ 4 | from pyop2.op2 import * # noqa 5 | from pyop2.version import __version_info__ # noqa: just expose 6 | 7 | from pyop2._version import get_versions 8 | __version__ = get_versions()['version'] 9 | del get_versions 10 | 11 | from . import _version 12 | __version__ = _version.get_versions()['version'] 13 | -------------------------------------------------------------------------------- /pyop2/codegen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OP2/PyOP2/4f6c72098ec9f75ef3956783a894a7b0dd6c2ba7/pyop2/codegen/__init__.py -------------------------------------------------------------------------------- /pyop2/codegen/c/inverse.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef PYOP2_WORK_ARRAYS 5 | #define PYOP2_WORK_ARRAYS 6 | #define BUF_SIZE 30 7 | static PetscBLASInt ipiv_buffer[BUF_SIZE]; 8 | static PetscScalar work_buffer[BUF_SIZE*BUF_SIZE]; 9 | #endif 10 | 11 | #ifndef PYOP2_INV_LOG_EVENTS 12 | #define PYOP2_INV_LOG_EVENTS 13 | PetscLogEvent ID_inv_memcpy = -1; 14 | PetscLogEvent ID_inv_getrf = -1; 15 | PetscLogEvent ID_inv_getri = -1; 16 | static PetscBool log_active_inv = 0; 17 | #endif 18 | 19 | void inverse(PetscScalar* __restrict__ Aout, const PetscScalar* __restrict__ A, PetscBLASInt N) 20 | { 21 | PetscLogIsActive(&log_active_inv); 22 | if (log_active_inv){PetscLogEventBegin(ID_inv_memcpy,0,0,0,0);} 23 | PetscBLASInt info; 24 | PetscBLASInt *ipiv = N <= BUF_SIZE ? ipiv_buffer : malloc(N*sizeof(*ipiv)); 25 | PetscScalar *Awork = N <= BUF_SIZE ? work_buffer : malloc(N*N*sizeof(*Awork)); 26 | memcpy(Aout, A, N*N*sizeof(PetscScalar)); 27 | if (log_active_inv){PetscLogEventEnd(ID_inv_memcpy,0,0,0,0);} 28 | 29 | if (log_active_inv){PetscLogEventBegin(ID_inv_getrf,0,0,0,0);} 30 | LAPACKgetrf_(&N, &N, Aout, &N, ipiv, &info); 31 | if (log_active_inv){PetscLogEventEnd(ID_inv_getrf,0,0,0,0);} 32 | 33 | if(info == 0){ 34 | if (log_active_inv){PetscLogEventBegin(ID_inv_getri,0,0,0,0);} 35 | LAPACKgetri_(&N, Aout, &N, ipiv, Awork, &N, &info); 36 | if (log_active_inv){PetscLogEventEnd(ID_inv_getri,0,0,0,0);} 37 | } 38 | 39 | if(info != 0){ 40 | fprintf(stderr, "Getri throws nonzero info."); 41 | abort(); 42 | } 43 | if ( N > BUF_SIZE ) { 44 | free(Awork); 45 | free(ipiv); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /pyop2/codegen/c/solve.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef PYOP2_WORK_ARRAYS 5 | #define PYOP2_WORK_ARRAYS 6 | #define BUF_SIZE 30 7 | static PetscBLASInt ipiv_buffer[BUF_SIZE]; 8 | static PetscScalar work_buffer[BUF_SIZE*BUF_SIZE]; 9 | #endif 10 | 11 | #ifndef PYOP2_SOLVE_LOG_EVENTS 12 | #define PYOP2_SOLVE_LOG_EVENTS 13 | PetscLogEvent ID_solve_memcpy = -1; 14 | PetscLogEvent ID_solve_getrf = -1; 15 | PetscLogEvent ID_solve_getrs = -1; 16 | static PetscBool log_active_solve = 0; 17 | #endif 18 | 19 | void solve(PetscScalar* __restrict__ out, const PetscScalar* __restrict__ A, const PetscScalar* __restrict__ B, PetscBLASInt N) 20 | { 21 | PetscLogIsActive(&log_active_solve); 22 | if (log_active_solve){PetscLogEventBegin(ID_solve_memcpy,0,0,0,0);} 23 | PetscBLASInt info; 24 | PetscBLASInt *ipiv = N <= BUF_SIZE ? ipiv_buffer : malloc(N*sizeof(*ipiv)); 25 | memcpy(out,B,N*sizeof(PetscScalar)); 26 | PetscScalar *Awork = N <= BUF_SIZE ? work_buffer : malloc(N*N*sizeof(*Awork)); 27 | memcpy(Awork,A,N*N*sizeof(PetscScalar)); 28 | if (log_active_solve){PetscLogEventEnd(ID_solve_memcpy,0,0,0,0);} 29 | 30 | PetscBLASInt NRHS = 1; 31 | const char T = 'T'; 32 | if (log_active_solve){PetscLogEventBegin(ID_solve_getrf,0,0,0,0);} 33 | LAPACKgetrf_(&N, &N, Awork, &N, ipiv, &info); 34 | if (log_active_solve){PetscLogEventEnd(ID_solve_getrf,0,0,0,0);} 35 | 36 | if(info == 0){ 37 | if (log_active_solve){PetscLogEventBegin(ID_solve_getrs,0,0,0,0);} 38 | LAPACKgetrs_(&T, &N, &NRHS, Awork, &N, ipiv, out, &N, &info); 39 | if (log_active_solve){PetscLogEventEnd(ID_solve_getrs,0,0,0,0);} 40 | } 41 | 42 | if(info != 0){ 43 | fprintf(stderr, "Gesv throws nonzero info."); 44 | abort(); 45 | } 46 | 47 | if ( N > BUF_SIZE ) { 48 | free(ipiv); 49 | free(Awork); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /pyop2/codegen/loopycompat.py: -------------------------------------------------------------------------------- 1 | # Everything in this file was formerly in loopy/transform/callable.py 2 | # but was removed in https://github.com/inducer/loopy/pull/327. It has 3 | # been kept here for compatibility but should be phased out. 4 | 5 | # Note that since this code is copypasted, the linter has been turned off. 6 | 7 | # flake8: noqa 8 | 9 | from loopy.kernel.instruction import CallInstruction, MultiAssignmentBase, \ 10 | CInstruction, _DataObliviousInstruction 11 | from loopy.symbolic import CombineMapper, IdentityMapper 12 | from loopy.symbolic import simplify_via_aff 13 | from loopy.kernel.function_interface import CallableKernel 14 | from loopy.translation_unit import TranslationUnit 15 | 16 | 17 | # Tools to match caller to callee args by (guessed) automatic reshaping 18 | # 19 | # (This is undocumented and not recommended, but it is currently needed 20 | # to support Firedrake.) 21 | 22 | class DimChanger(IdentityMapper): 23 | """ 24 | Mapper to change the dimensions of an argument. 25 | .. attribute:: callee_arg_dict 26 | A mapping from the argument name (:class:`str`) to instances of 27 | :class:`loopy.kernel.array.ArrayBase`. 28 | .. attribute:: desried_shape 29 | A mapping from argument name (:class:`str`) to an instance of 30 | :class:`tuple`. 31 | """ 32 | def __init__(self, callee_arg_dict, desired_shape): 33 | self.callee_arg_dict = callee_arg_dict 34 | self.desired_shape = desired_shape 35 | super().__init__() 36 | 37 | def map_subscript(self, expr): 38 | if expr.aggregate.name not in self.callee_arg_dict: 39 | return super().map_subscript(expr) 40 | callee_arg_dim_tags = self.callee_arg_dict[expr.aggregate.name].dim_tags 41 | flattened_index = sum(dim_tag.stride*idx for dim_tag, idx in 42 | zip(callee_arg_dim_tags, expr.index_tuple)) 43 | new_indices = [] 44 | 45 | from operator import mul 46 | from functools import reduce 47 | stride = reduce(mul, self.desired_shape[expr.aggregate.name], 1) 48 | 49 | for length in self.desired_shape[expr.aggregate.name]: 50 | stride /= length 51 | ind = flattened_index // int(stride) 52 | flattened_index -= (int(stride) * ind) 53 | new_indices.append(simplify_via_aff(ind)) 54 | 55 | return expr.aggregate.index(tuple(new_indices)) 56 | 57 | 58 | def _match_caller_callee_argument_dimension_for_single_kernel( 59 | caller_knl, callee_knl): 60 | """ 61 | :returns: a copy of *caller_knl* with the instance of 62 | :class:`loopy.kernel.function_interface.CallableKernel` addressed by 63 | *callee_function_name* in the *caller_knl* aligned with the argument 64 | dimensions required by *caller_knl*. 65 | """ 66 | from loopy.kernel.array import ArrayBase 67 | from loopy.kernel.data import auto 68 | 69 | for insn in caller_knl.instructions: 70 | if not isinstance(insn, CallInstruction) or ( 71 | insn.expression.function.name != 72 | callee_knl.name): 73 | # Call to a callable kernel can only occur through a 74 | # CallInstruction. 75 | continue 76 | 77 | def _shape_1_if_empty(shape_caller, shape_callee): 78 | assert isinstance(shape_caller, tuple) 79 | if shape_caller == () and shape_caller!=shape_callee: 80 | return (1,) 81 | else: 82 | return shape_caller 83 | 84 | from loopy.kernel.function_interface import ( 85 | ArrayArgDescriptor, get_arg_descriptor_for_expression, 86 | get_kw_pos_association) 87 | _, pos_to_kw = get_kw_pos_association(callee_knl) 88 | arg_id_to_shape = {} 89 | for arg_id, arg in insn.arg_id_to_arg().items(): 90 | arg_id = pos_to_kw[arg_id] 91 | 92 | arg_descr = get_arg_descriptor_for_expression(caller_knl, arg) 93 | if isinstance(arg_descr, ArrayArgDescriptor): 94 | arg_id_to_shape[arg_id] = arg_descr.shape 95 | else: 96 | arg_id_to_shape[arg_id] = (1, ) 97 | 98 | dim_changer = DimChanger( 99 | callee_knl.arg_dict, 100 | arg_id_to_shape) 101 | 102 | new_callee_insns = [] 103 | for callee_insn in callee_knl.instructions: 104 | if isinstance(callee_insn, MultiAssignmentBase): 105 | new_callee_insns.append(callee_insn 106 | .with_transformed_expressions(dim_changer)) 107 | 108 | elif isinstance(callee_insn, (CInstruction, 109 | _DataObliviousInstruction)): 110 | # The layout of the args to a CInstructions is not going to be matched to the caller_kernel, 111 | # they are appended with unmatched args. 112 | # We only use Cinstructions exceptionally, e.g. for adding profile instructions, 113 | # without arguments that required to be matched, so this is ok. 114 | new_callee_insns.append(callee_insn) 115 | else: 116 | raise NotImplementedError("Unknown instruction %s." % 117 | type(insn)) 118 | 119 | new_args = [arg if not isinstance(arg, ArrayBase) 120 | else arg.copy(shape=arg_id_to_shape[arg.name], 121 | dim_tags=None, strides=auto, order="C") 122 | for arg in callee_knl.args] 123 | 124 | # subkernel with instructions adjusted according to the new dimensions 125 | new_callee_knl = callee_knl.copy(instructions=new_callee_insns, 126 | args=new_args) 127 | 128 | return new_callee_knl 129 | 130 | 131 | class _FunctionCalledChecker(CombineMapper): 132 | def __init__(self, func_name): 133 | self.func_name = func_name 134 | super().__init__() 135 | 136 | def combine(self, values): 137 | return any(values) 138 | 139 | def map_call(self, expr): 140 | if expr.function.name == self.func_name: 141 | return True 142 | return self.combine( 143 | tuple( 144 | self.rec(child) for child in expr.parameters) 145 | ) 146 | 147 | map_call_with_kwargs = map_call 148 | 149 | def map_constant(self, expr): 150 | return False 151 | 152 | def map_type_cast(self, expr): 153 | return self.rec(expr.child) 154 | 155 | def map_algebraic_leaf(self, expr): 156 | return False 157 | 158 | def map_kernel(self, kernel): 159 | return any(self.rec(insn.expression) for insn in kernel.instructions if 160 | isinstance(insn, MultiAssignmentBase)) 161 | 162 | 163 | def _match_caller_callee_argument_dimension_(program, callee_function_name): 164 | """ 165 | Returns a copy of *program* with the instance of 166 | :class:`loopy.kernel.function_interface.CallableKernel` addressed by 167 | *callee_function_name* in the *program* aligned with the argument 168 | dimensions required by *caller_knl*. 169 | .. note:: 170 | The callee kernel addressed by *callee_function_name*, should be 171 | called at only one location throughout the program, as multiple 172 | invocations would demand complex renaming logic which is not 173 | implemented yet. 174 | """ 175 | assert isinstance(program, TranslationUnit) 176 | assert isinstance(callee_function_name, str) 177 | assert callee_function_name not in program.entrypoints 178 | assert callee_function_name in program.callables_table 179 | 180 | is_invoking_callee = _FunctionCalledChecker( 181 | callee_function_name).map_kernel 182 | 183 | caller_knl, = [in_knl_callable.subkernel for in_knl_callable in 184 | program.callables_table.values() if isinstance(in_knl_callable, 185 | CallableKernel) and 186 | is_invoking_callee(in_knl_callable.subkernel)] 187 | 188 | from pymbolic.primitives import Call 189 | assert len([insn for insn in caller_knl.instructions if (isinstance(insn, 190 | CallInstruction) and isinstance(insn.expression, Call) and 191 | insn.expression.function.name == callee_function_name)]) == 1 192 | new_callee_kernel = _match_caller_callee_argument_dimension_for_single_kernel( 193 | caller_knl, program[callee_function_name]) 194 | return program.with_kernel(new_callee_kernel) 195 | -------------------------------------------------------------------------------- /pyop2/codegen/node.py: -------------------------------------------------------------------------------- 1 | """Generic abstract node class and utility functions for creating 2 | expression DAG languages.""" 3 | 4 | import collections 5 | 6 | 7 | class Node(object): 8 | """Abstract node class. 9 | 10 | Nodes are not meant to be modified. 11 | 12 | A node can reference other nodes; they are called children. A node 13 | might contain data, or reference other objects which are not 14 | themselves nodes; they are not called children. 15 | 16 | Both the children (if any) and non-child data (if any) are 17 | required to create a node, or determine the equality of two 18 | nodes. For reconstruction, however, only the new children are 19 | necessary. 20 | """ 21 | 22 | __slots__ = ('hash_value',) 23 | 24 | # Non-child data as the first arguments of the constructor. 25 | # To be (potentially) overridden by derived node classes. 26 | __front__ = () 27 | 28 | # Non-child data as the last arguments of the constructor. 29 | # To be (potentially) overridden by derived node classes. 30 | __back__ = () 31 | 32 | def _cons_args(self, children): 33 | """Constructs an argument list for the constructor with 34 | non-child data from 'self' and children from 'children'. 35 | 36 | Internally used utility function. 37 | """ 38 | front_args = [getattr(self, name) for name in self.__front__] 39 | back_args = [getattr(self, name) for name in self.__back__] 40 | 41 | return tuple(front_args) + tuple(children) + tuple(back_args) 42 | 43 | def __reduce__(self): 44 | # Gold version: 45 | return type(self), self._cons_args(self.children) 46 | 47 | def reconstruct(self, *args): 48 | """Reconstructs the node with new children from 49 | 'args'. Non-child data are copied from 'self'. 50 | 51 | Returns a new object. 52 | """ 53 | return type(self)(*self._cons_args(args)) 54 | 55 | def __repr__(self): 56 | cons_args = self._cons_args(self.children) 57 | return "%s(%s)" % (type(self).__name__, ", ".join(map(repr, cons_args))) 58 | 59 | def __eq__(self, other): 60 | """Provides equality testing with quick positive and negative 61 | paths based on :func:`id` and :meth:`__hash__`. 62 | """ 63 | if self is other: 64 | return True 65 | elif hash(self) != hash(other): 66 | return False 67 | else: 68 | return self.is_equal(other) 69 | 70 | def __ne__(self, other): 71 | return not self.__eq__(other) 72 | 73 | def __hash__(self): 74 | """Provides caching for hash values.""" 75 | try: 76 | return self.hash_value 77 | except AttributeError: 78 | self.hash_value = self.get_hash() 79 | return self.hash_value 80 | 81 | def is_equal(self, other): 82 | """Equality predicate. 83 | 84 | This is the method to potentially override in derived classes, 85 | not :meth:`__eq__` or :meth:`__ne__`. 86 | """ 87 | if type(self) != type(other): 88 | return False 89 | self_consargs = self._cons_args(self.children) 90 | other_consargs = other._cons_args(other.children) 91 | return self_consargs == other_consargs 92 | 93 | def get_hash(self): 94 | """Hash function. 95 | 96 | This is the method to potentially override in derived classes, 97 | not :meth:`__hash__`. 98 | """ 99 | return hash((type(self),) + self._cons_args(self.children)) 100 | 101 | 102 | def pre_traversal(expression_dags): 103 | """Pre-order traversal of the nodes of expression DAGs.""" 104 | seen = set() 105 | lifo = [] 106 | # Some roots might be same, but they must be visited only once. 107 | # Keep the original ordering of roots, for deterministic code 108 | # generation. 109 | for root in expression_dags: 110 | if root not in seen: 111 | seen.add(root) 112 | lifo.append(root) 113 | 114 | while lifo: 115 | node = lifo.pop() 116 | yield node 117 | for child in reversed(node.children): 118 | if child not in seen: 119 | seen.add(child) 120 | lifo.append(child) 121 | 122 | 123 | def post_traversal(expression_dags): 124 | """Post-order traversal of the nodes of expression DAGs.""" 125 | seen = set() 126 | lifo = [] 127 | # Some roots might be same, but they must be visited only once. 128 | # Keep the original ordering of roots, for deterministic code 129 | # generation. 130 | for root in expression_dags: 131 | if root not in seen: 132 | seen.add(root) 133 | lifo.append((root, list(root.children))) 134 | 135 | while lifo: 136 | node, deps = lifo[-1] 137 | for i, dep in enumerate(deps): 138 | if dep is not None and dep not in seen: 139 | lifo.append((dep, list(dep.children))) 140 | deps[i] = None 141 | break 142 | else: 143 | yield node 144 | seen.add(node) 145 | lifo.pop() 146 | 147 | 148 | # Default to the more efficient pre-order traversal 149 | traversal = pre_traversal 150 | 151 | 152 | def collect_refcount(expression_dags): 153 | """Collects reference counts for a multi-root expression DAG.""" 154 | result = collections.Counter(expression_dags) 155 | for node in traversal(expression_dags): 156 | result.update(node.children) 157 | return result 158 | 159 | 160 | def noop_recursive(function): 161 | """No-op wrapper for functions with overridable recursive calls. 162 | 163 | :arg function: a function with parameters (value, rec), where 164 | ``rec`` is expected to be a function used for 165 | recursive calls. 166 | :returns: a function with working recursion and nothing fancy 167 | """ 168 | def recursive(node): 169 | return function(node, recursive) 170 | return recursive 171 | 172 | 173 | def noop_recursive_arg(function): 174 | """No-op wrapper for functions with overridable recursive calls 175 | and an argument. 176 | 177 | :arg function: a function with parameters (value, rec, arg), where 178 | ``rec`` is expected to be a function used for 179 | recursive calls. 180 | :returns: a function with working recursion and nothing fancy 181 | """ 182 | def recursive(node, arg): 183 | return function(node, recursive, arg) 184 | return recursive 185 | 186 | 187 | class Memoizer(object): 188 | """Caching wrapper for functions with overridable recursive calls. 189 | The lifetime of the cache is the lifetime of the object instance. 190 | 191 | :arg function: a function with parameters (value, rec), where 192 | ``rec`` is expected to be a function used for 193 | recursive calls. 194 | :returns: a function with working recursion and caching 195 | """ 196 | def __init__(self, function): 197 | self.cache = {} 198 | self.function = function 199 | 200 | def __call__(self, node): 201 | try: 202 | return self.cache[node] 203 | except KeyError: 204 | result = self.function(node, self) 205 | self.cache[node] = result 206 | return result 207 | 208 | 209 | class MemoizerArg(object): 210 | """Caching wrapper for functions with overridable recursive calls 211 | and an argument. The lifetime of the cache is the lifetime of the 212 | object instance. 213 | 214 | :arg function: a function with parameters (value, rec, arg), where 215 | ``rec`` is expected to be a function used for 216 | recursive calls. 217 | :returns: a function with working recursion and caching 218 | """ 219 | def __init__(self, function): 220 | self.cache = {} 221 | self.function = function 222 | 223 | def __call__(self, node, arg): 224 | cache_key = (node, arg) 225 | try: 226 | return self.cache[cache_key] 227 | except KeyError: 228 | result = self.function(node, self, arg) 229 | self.cache[cache_key] = result 230 | return result 231 | 232 | 233 | def reuse_if_untouched(node, self): 234 | """Reuse if untouched recipe""" 235 | new_children = list(map(self, node.children)) 236 | if all(nc == c for nc, c in zip(new_children, node.children)): 237 | return node 238 | else: 239 | return node.reconstruct(*new_children) 240 | 241 | 242 | def reuse_if_untouched_arg(node, self, arg): 243 | """Reuse if touched recipe propagating an extra argument""" 244 | new_children = [self(child, arg) for child in node.children] 245 | if all(nc == c for nc, c in zip(new_children, node.children)): 246 | return node 247 | else: 248 | return node.reconstruct(*new_children) 249 | -------------------------------------------------------------------------------- /pyop2/codegen/optimise.py: -------------------------------------------------------------------------------- 1 | from pyop2.codegen.node import traversal, reuse_if_untouched, Memoizer 2 | from functools import singledispatch 3 | from pyop2.codegen.representation import (Index, RuntimeIndex, Node, 4 | FunctionCall, Variable, Argument) 5 | 6 | 7 | def collect_indices(expressions): 8 | """Collect indices in expressions. 9 | 10 | :arg expressions: an iterable of expressions to collect indices 11 | from. 12 | :returns: iterable of nodes of type :class:`Index` or 13 | :class:`RuntimeIndex`. 14 | """ 15 | for node in traversal(expressions): 16 | if isinstance(node, (Index, RuntimeIndex)): 17 | yield node 18 | 19 | 20 | @singledispatch 21 | def replace_indices(node, self): 22 | raise AssertionError("Unhandled node type %r" % type(node)) 23 | 24 | 25 | replace_indices.register(Node)(reuse_if_untouched) 26 | 27 | 28 | @replace_indices.register(Index) 29 | def replace_indices_index(node, self): 30 | return self.subst.get(node, node) 31 | 32 | 33 | def index_merger(instructions, cache=None): 34 | """Merge indices across an instruction stream. 35 | 36 | Indices are candidates for merging if they have the same extent as 37 | an already seen index in the instruction stream, and appear at the 38 | same level of the loop nest. 39 | 40 | :arg instructions: Iterable of nodes to merge indices across. 41 | :returns: a memoized callable suitable for index merging. 42 | """ 43 | if cache is None: 44 | cache = {} 45 | 46 | appeared = {} 47 | subst = [] 48 | 49 | index_replacer = Memoizer(replace_indices) 50 | 51 | for insn in instructions: 52 | if isinstance(insn, FunctionCall): 53 | continue 54 | 55 | indices = tuple(i for i in collect_indices([insn])) 56 | runtime = tuple(i for i in indices if not isinstance(i, Index)) 57 | free = tuple(i for i in indices if isinstance(i, Index)) 58 | 59 | indices = runtime + free 60 | 61 | key = runtime + tuple(i.extent for i in free) 62 | full_key = key 63 | # Look for matching key prefix 64 | while key not in cache and len(key): 65 | key = key[:-1] 66 | 67 | if key in cache: 68 | new_indices = cache[key] + indices[len(key):] 69 | else: 70 | new_indices = indices 71 | 72 | for i in range(len(key), len(full_key) + 1): 73 | cache[full_key[:i]] = new_indices[:i] 74 | 75 | for i, ni in zip(indices, new_indices): 76 | if i in appeared: 77 | if isinstance(i, (Index)) and i.extent != 1 or isinstance(i, (RuntimeIndex)): 78 | subst.append((i, appeared[i])) 79 | if i != ni: 80 | if i in appeared: 81 | assert appeared[i] == ni 82 | appeared[i] = ni 83 | if isinstance(i, (Index)) and i.extent != 1 or isinstance(i, (RuntimeIndex)): 84 | subst.append((i, ni)) 85 | 86 | index_replacer.subst = dict(subst) 87 | return index_replacer 88 | 89 | 90 | @singledispatch 91 | def _rename_node(node, self): 92 | """Rename nodes 93 | 94 | :param node: root of expression 95 | :param self: function for recursive calls 96 | """ 97 | raise AssertionError("cannot handle type %s" % type(node)) 98 | 99 | 100 | _rename_node.register(Node)(reuse_if_untouched) 101 | 102 | 103 | @_rename_node.register(Index) 104 | def _rename_node_index(node, self): 105 | name = self.renamer(node) 106 | return Index(extent=node.extent, name=name) 107 | 108 | 109 | @_rename_node.register(FunctionCall) 110 | def _rename_node_func(node, self): 111 | free_indices = tuple(map(self, node.free_indices)) 112 | children = tuple(map(self, node.children)) 113 | return FunctionCall(node.name, node.label, node.access, free_indices, *children) 114 | 115 | 116 | @_rename_node.register(Variable) 117 | def _rename_node_variable(node, self): 118 | name = self.renamer(node) 119 | return Variable(name, node.shape, node.dtype) 120 | 121 | 122 | @_rename_node.register(Argument) 123 | def _rename_node_argument(node, self): 124 | name = self.renamer(node) 125 | return Argument(node.shape, node.dtype, name=name) 126 | 127 | 128 | def rename_nodes(instructions, renamer): 129 | """Rename the nodes in the instructions. 130 | 131 | :param instructions: Iterable of nodes. 132 | :param renamer: Function that maps nodes to new names 133 | :return: List of instructions with nodes renamed. 134 | """ 135 | mapper = Memoizer(_rename_node) 136 | mapper.renamer = renamer 137 | return list(map(mapper, instructions)) 138 | -------------------------------------------------------------------------------- /pyop2/configuration.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """PyOP2 global configuration.""" 35 | 36 | import os 37 | from tempfile import gettempdir 38 | from loopy.target.c import CWithGNULibcTarget 39 | 40 | from pyop2.exceptions import ConfigurationError 41 | 42 | 43 | class Configuration(dict): 44 | r"""PyOP2 configuration parameters 45 | 46 | :param cc: C compiler (executable name eg: `gcc` 47 | or path eg: `/opt/gcc/bin/gcc`). 48 | :param cxx: C++ compiler (executable name eg: `g++` 49 | or path eg: `/opt/gcc/bin/g++`). 50 | :param ld: Linker (executable name `ld` 51 | or path eg: `/opt/gcc/bin/ld`). 52 | :param cflags: extra flags to be passed to the C compiler. 53 | :param cxxflags: extra flags to be passed to the C++ compiler. 54 | :param ldflags: extra flags to be passed to the linker. 55 | :param simd_width: number of doubles in SIMD instructions 56 | (e.g. 4 for AVX2, 8 for AVX512). 57 | :param debug: Turn on debugging for generated code (turns off 58 | compiler optimisations). 59 | :param type_check: Should PyOP2 type-check API-calls? (Default, 60 | yes) 61 | :param check_src_hashes: Should PyOP2 check that generated code is 62 | the same on all processes? (Default, yes). Uses an allreduce. 63 | :param cache_dir: Where should generated code be cached? 64 | :param node_local_compilation: Should generated code by compiled 65 | "node-local" (one process for each set of processes that share 66 | a filesystem)? You should probably arrange to set cache_dir 67 | to a node-local filesystem too. 68 | :param log_level: How chatty should PyOP2 be? Valid values 69 | are "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". 70 | :param print_cache_size: Should PyOP2 print the cache information at 71 | program exit? 72 | :param matnest: Should matrices on mixed maps be built as nests? (Default yes) 73 | :param block_sparsity: Should sparsity patterns on datasets with 74 | cdim > 1 be built as block sparsities, or dof sparsities. The 75 | former saves memory but changes which preconditioners are 76 | available for the resulting matrices. (Default yes) 77 | :param spmd_strict: Enable barriers for calls marked with @collective and 78 | for cache access. This adds considerable overhead, but is useful for 79 | tracking down deadlocks. (Default no) 80 | """ 81 | # name, env variable, type, default, write once 82 | cache_dir = os.path.join(gettempdir(), "pyop2-cache-uid%s" % os.getuid()) 83 | DEFAULTS = { 84 | "cc": 85 | ("PYOP2_CC", str, ""), 86 | "cxx": 87 | ("PYOP2_CXX", str, ""), 88 | "ld": 89 | ("PYOP2_LD", str, ""), 90 | "cflags": 91 | ("PYOP2_CFLAGS", str, ""), 92 | "cxxflags": 93 | ("PYOP2_CXXFLAGS", str, ""), 94 | "ldflags": 95 | ("PYOP2_LDFLAGS", str, ""), 96 | "simd_width": 97 | ("PYOP2_SIMD_WIDTH", int, 4), 98 | "debug": 99 | ("PYOP2_DEBUG", bool, False), 100 | "compute_kernel_flops": 101 | ("PYOP2_COMPUTE_KERNEL_FLOPS", bool, False), 102 | "type_check": 103 | ("PYOP2_TYPE_CHECK", bool, True), 104 | "check_src_hashes": 105 | ("PYOP2_CHECK_SRC_HASHES", bool, True), 106 | "log_level": 107 | ("PYOP2_LOG_LEVEL", (str, int), "WARNING"), 108 | "cache_dir": 109 | ("PYOP2_CACHE_DIR", str, cache_dir), 110 | "node_local_compilation": 111 | ("PYOP2_NODE_LOCAL_COMPILATION", bool, True), 112 | "no_fork_available": 113 | ("PYOP2_NO_FORK_AVAILABLE", bool, False), 114 | "print_cache_info": 115 | ("PYOP2_CACHE_INFO", bool, False), 116 | "matnest": 117 | ("PYOP2_MATNEST", bool, True), 118 | "block_sparsity": 119 | ("PYOP2_BLOCK_SPARSITY", bool, True), 120 | "spmd_strict": 121 | ("PYOP2_SPMD_STRICT", bool, False), 122 | } 123 | """Default values for PyOP2 configuration parameters""" 124 | 125 | def __init__(self): 126 | def convert(env, typ, v): 127 | if not isinstance(typ, type): 128 | typ = typ[0] 129 | try: 130 | if typ is bool: 131 | return bool(int(os.environ.get(env, v))) 132 | return typ(os.environ.get(env, v)) 133 | except ValueError: 134 | raise ValueError("Cannot convert value of environment variable %s to %r" % (env, typ)) 135 | defaults = dict((k, convert(env, typ, v)) 136 | for k, (env, typ, v) in Configuration.DEFAULTS.items()) 137 | super(Configuration, self).__init__(**defaults) 138 | self._set = set() 139 | self._defaults = defaults 140 | 141 | def reset(self): 142 | """Reset the configuration parameters to the default values.""" 143 | self.update(self._defaults) 144 | self._set = set() 145 | 146 | def reconfigure(self, **kwargs): 147 | """Update the configuration parameters with new values.""" 148 | for k, v in kwargs.items(): 149 | self[k] = v 150 | 151 | def unsafe_reconfigure(self, **kwargs): 152 | """"Unsafely reconfigure (just replacing the values)""" 153 | self.update(kwargs) 154 | 155 | def __setitem__(self, key, value): 156 | """Set the value of a configuration parameter. 157 | 158 | :arg key: The parameter to set 159 | :arg value: The value to set it to. 160 | """ 161 | if key in Configuration.DEFAULTS: 162 | valid_type = Configuration.DEFAULTS[key][1] 163 | if not isinstance(value, valid_type): 164 | raise ConfigurationError("Values for configuration key %s must be of type %r, not %r" 165 | % (key, valid_type, type(value))) 166 | self._set.add(key) 167 | super(Configuration, self).__setitem__(key, value) 168 | 169 | 170 | configuration = Configuration() 171 | 172 | target = CWithGNULibcTarget() 173 | -------------------------------------------------------------------------------- /pyop2/datatypes.py: -------------------------------------------------------------------------------- 1 | 2 | import ctypes 3 | 4 | import loopy as lp 5 | import numpy 6 | from petsc4py.PETSc import IntType, RealType, ScalarType 7 | 8 | IntType = numpy.dtype(IntType) 9 | RealType = numpy.dtype(RealType) 10 | ScalarType = numpy.dtype(ScalarType) 11 | 12 | 13 | def as_cstr(dtype): 14 | """Convert a numpy dtype like object to a C type as a string.""" 15 | return {"bool": "unsigned char", 16 | "int": "int", 17 | "int8": "int8_t", 18 | "int16": "int16_t", 19 | "int32": "int32_t", 20 | "int64": "int64_t", 21 | "uint8": "uint8_t", 22 | "uint16": "uint16_t", 23 | "uint32": "uint32_t", 24 | "uint64": "uint64_t", 25 | "float32": "float", 26 | "float64": "double", 27 | "complex128": "double complex"}[numpy.dtype(dtype).name] 28 | 29 | 30 | def as_ctypes(dtype): 31 | """Convert a numpy dtype like object to a ctypes type.""" 32 | return {"bool": ctypes.c_bool, 33 | "int": ctypes.c_int, 34 | "int8": ctypes.c_char, 35 | "int16": ctypes.c_int16, 36 | "int32": ctypes.c_int32, 37 | "int64": ctypes.c_int64, 38 | "uint8": ctypes.c_ubyte, 39 | "uint16": ctypes.c_uint16, 40 | "uint32": ctypes.c_uint32, 41 | "uint64": ctypes.c_uint64, 42 | "float32": ctypes.c_float, 43 | "float64": ctypes.c_double}[numpy.dtype(dtype).name] 44 | 45 | 46 | def as_numpy_dtype(dtype): 47 | """Convert a dtype-like object into a numpy dtype.""" 48 | if isinstance(dtype, numpy.dtype): 49 | return dtype 50 | elif isinstance(dtype, lp.types.NumpyType): 51 | return dtype.numpy_dtype 52 | else: 53 | raise ValueError 54 | 55 | 56 | def dtype_limits(dtype): 57 | """Attempt to determine the min and max values of a datatype. 58 | 59 | :arg dtype: A numpy datatype. 60 | :returns: a 2-tuple of min, max 61 | :raises ValueError: If numeric limits could not be determined. 62 | """ 63 | try: 64 | info = numpy.finfo(dtype) 65 | except ValueError: 66 | # maybe an int? 67 | try: 68 | info = numpy.iinfo(dtype) 69 | except ValueError as e: 70 | raise ValueError("Unable to determine numeric limits from %s" % dtype) from e 71 | return info.min, info.max 72 | 73 | 74 | class OpaqueType(lp.types.OpaqueType): 75 | def __init__(self, name): 76 | super().__init__(name=name) 77 | 78 | def __repr__(self): 79 | return self.name 80 | -------------------------------------------------------------------------------- /pyop2/exceptions.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """OP2 exception types""" 35 | 36 | 37 | class DataTypeError(TypeError): 38 | 39 | """Invalid type for data.""" 40 | 41 | 42 | class DimTypeError(TypeError): 43 | 44 | """Invalid type for dimension.""" 45 | 46 | 47 | class ArityTypeError(TypeError): 48 | 49 | """Invalid type for arity.""" 50 | 51 | 52 | class IndexTypeError(TypeError): 53 | 54 | """Invalid type for index.""" 55 | 56 | 57 | class NameTypeError(TypeError): 58 | 59 | """Invalid type for name.""" 60 | 61 | 62 | class SetTypeError(TypeError): 63 | 64 | """Invalid type for :class:`pyop2.op2.Set`.""" 65 | 66 | 67 | class SizeTypeError(TypeError): 68 | 69 | """Invalid type for size.""" 70 | 71 | 72 | class SubsetIndexOutOfBounds(TypeError): 73 | 74 | """Out of bound index.""" 75 | 76 | 77 | class SparsityTypeError(TypeError): 78 | 79 | """Invalid type for :class:`pyop2.op2.Sparsity`.""" 80 | 81 | 82 | class MapTypeError(TypeError): 83 | 84 | """Invalid type for :class:`pyop2.op2.Map`.""" 85 | 86 | 87 | class DataSetTypeError(TypeError): 88 | """Invalid type for :class:`pyop2.op2.DataSet`.""" 89 | 90 | 91 | class MatTypeError(TypeError): 92 | 93 | """Invalid type for :class:`pyop2.op2.Mat`.""" 94 | 95 | 96 | class DatTypeError(TypeError): 97 | 98 | """Invalid type for :class:`pyop2.op2.Dat`.""" 99 | 100 | 101 | class KernelTypeError(TypeError): 102 | 103 | """Invalid type for :class:`pyop2.op2.Kernel`.""" 104 | 105 | 106 | class DataValueError(ValueError): 107 | 108 | """Illegal value for data.""" 109 | 110 | 111 | class IndexValueError(ValueError): 112 | 113 | """Illegal value for index.""" 114 | 115 | 116 | class ModeValueError(ValueError): 117 | 118 | """Illegal value for mode.""" 119 | 120 | 121 | class IterateValueError(ValueError): 122 | 123 | """Illegal value for iterate.""" 124 | 125 | 126 | class SetValueError(ValueError): 127 | 128 | """Illegal value for :class:`pyop2.op2.Set`.""" 129 | 130 | 131 | class MapValueError(ValueError): 132 | 133 | """Illegal value for :class:`pyop2.op2.Map`.""" 134 | 135 | 136 | class ConfigurationError(RuntimeError): 137 | 138 | """Illegal configuration value or type.""" 139 | 140 | 141 | class CompilationError(RuntimeError): 142 | 143 | """Error during JIT compilation""" 144 | 145 | 146 | class SparsityFormatError(ValueError): 147 | 148 | """Unable to produce a sparsity for this matrix format.""" 149 | 150 | 151 | class CachingError(ValueError): 152 | 153 | """A caching error.""" 154 | 155 | 156 | class HashError(CachingError): 157 | 158 | """Something is wrong with the hash.""" 159 | -------------------------------------------------------------------------------- /pyop2/local_kernel.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from dataclasses import dataclass 3 | import hashlib 4 | from typing import Union 5 | 6 | import loopy as lp 7 | from loopy.tools import LoopyKeyBuilder 8 | import numpy as np 9 | 10 | from pyop2 import version 11 | from pyop2.configuration import configuration 12 | from pyop2.datatypes import ScalarType 13 | from pyop2.exceptions import NameTypeError 14 | from pyop2.types import Access 15 | from pyop2.utils import cached_property, validate_type 16 | 17 | 18 | @dataclass(frozen=True) 19 | class LocalKernelArg: 20 | """Class representing a kernel argument. 21 | 22 | :param access: Access descriptor for the argument. 23 | :param dtype: The argument's datatype. 24 | """ 25 | 26 | access: Access 27 | dtype: Union[np.dtype, str] 28 | 29 | 30 | @validate_type(("name", str, NameTypeError)) 31 | def Kernel(code, name, **kwargs): 32 | """Construct a local kernel. 33 | 34 | For a description of the arguments to this function please see :class:`LocalKernel`. 35 | """ 36 | if isinstance(code, str): 37 | return CStringLocalKernel(code, name, **kwargs) 38 | elif isinstance(code, (lp.LoopKernel, lp.TranslationUnit)): 39 | return LoopyLocalKernel(code, name, **kwargs) 40 | else: 41 | raise TypeError("code argument is the wrong type") 42 | 43 | 44 | class LocalKernel(abc.ABC): 45 | """Class representing the kernel executed per member of the iterset. 46 | 47 | :arg code: Function definition (including signature). 48 | :arg name: The kernel name. This must match the name of the kernel 49 | function given in `code`. 50 | :arg accesses: Optional iterable of :class:`Access` instances describing 51 | how each argument in the function definition is accessed. 52 | 53 | :kwarg cpp: Is the kernel actually C++ rather than C? If yes, 54 | then compile with the C++ compiler (kernel is wrapped in 55 | extern C for linkage reasons). 56 | :kwarg flop_count: The number of FLOPs performed by the kernel. 57 | :kwarg headers: list of system headers to include when compiling the kernel 58 | in the form ``#include `` (optional, defaults to empty) 59 | :kwarg include_dirs: list of additional include directories to be searched 60 | when compiling the kernel (optional, defaults to empty) 61 | :kwarg ldargs: A list of arguments to pass to the linker when 62 | compiling this Kernel. 63 | :kwarg opts: An options dictionary for declaring optimisations to apply. 64 | :kwarg requires_zeroed_output_arguments: Does this kernel require the 65 | output arguments to be zeroed on entry when called? (default no) 66 | :kwarg user_code: code snippet to be executed once at the very start of 67 | the generated kernel wrapper code (optional, defaults to 68 | empty) 69 | :kwarg events: Tuple of log event names which are called in the C code of the local kernels 70 | 71 | Consider the case of initialising a :class:`~pyop2.Dat` with seeded random 72 | values in the interval 0 to 1. The corresponding :class:`~pyop2.Kernel` is 73 | constructed as follows: :: 74 | 75 | op2.CStringKernel("void setrand(double *x) { x[0] = (double)random()/RAND_MAX); }", 76 | name="setrand", 77 | headers=["#include "], user_code="srandom(10001);") 78 | 79 | .. note:: 80 | When running in parallel with MPI the generated code must be the same 81 | on all ranks. 82 | """ 83 | 84 | @validate_type(("name", str, NameTypeError)) 85 | def __init__(self, code, name, accesses=None, *, 86 | cpp=False, 87 | flop_count=None, 88 | headers=(), 89 | include_dirs=(), 90 | ldargs=(), 91 | opts=None, 92 | requires_zeroed_output_arguments=False, 93 | user_code="", 94 | events=()): 95 | self.code = code 96 | self.name = name 97 | self.accesses = accesses 98 | self.cpp = cpp 99 | self.flop_count = flop_count 100 | self.headers = headers 101 | self.include_dirs = include_dirs 102 | self.ldargs = ldargs 103 | self.opts = opts or {} 104 | self.requires_zeroed_output_arguments = requires_zeroed_output_arguments 105 | self.user_code = user_code 106 | self.events = events 107 | 108 | @property 109 | @abc.abstractmethod 110 | def dtypes(self): 111 | """Return the dtypes of the arguments to the kernel.""" 112 | 113 | @property 114 | def cache_key(self): 115 | return self._immutable_cache_key, self.accesses, self.dtypes 116 | 117 | @cached_property 118 | def _immutable_cache_key(self): 119 | # We need this function because self.accesses is mutable due to legacy support 120 | if isinstance(self.code, lp.TranslationUnit): 121 | key_hash = hashlib.sha256() 122 | self.code.update_persistent_hash(key_hash, LoopyKeyBuilder()) 123 | code = key_hash.hexdigest() 124 | else: 125 | code = self.code 126 | 127 | key = (code, self.name, self.cpp, self.flop_count, 128 | self.headers, self.include_dirs, self.ldargs, sorted(self.opts.items()), 129 | self.requires_zeroed_output_arguments, self.user_code, version.__version__) 130 | return hashlib.md5(str(key).encode()).hexdigest() 131 | 132 | @property 133 | def _wrapper_cache_key_(self): 134 | import warnings 135 | warnings.warn("_wrapper_cache_key is deprecated, use cache_key instead", DeprecationWarning) 136 | 137 | return self.cache_key 138 | 139 | @property 140 | def arguments(self): 141 | """Return an iterable of :class:`LocalKernelArg` instances representing 142 | the arguments expected by the kernel. 143 | """ 144 | assert len(self.accesses) == len(self.dtypes) 145 | 146 | return tuple(LocalKernelArg(acc, dtype) 147 | for acc, dtype in zip(self.accesses, self.dtypes)) 148 | 149 | @cached_property 150 | def num_flops(self): 151 | """Compute the numbers of FLOPs if not already known.""" 152 | if self.flop_count is not None: 153 | return self.flop_count 154 | 155 | if not configuration["compute_kernel_flops"]: 156 | return 0 157 | 158 | if isinstance(self.code, lp.TranslationUnit): 159 | op_map = lp.get_op_map( 160 | self.code.copy(options=lp.Options(ignore_boostable_into=True), 161 | silenced_warnings=['insn_count_subgroups_upper_bound', 162 | 'get_x_map_guessing_subgroup_size', 163 | 'summing_if_branches_ops']), 164 | subgroup_size='guess') 165 | return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], 166 | dtype=[ScalarType]).eval_and_sum({}) 167 | else: 168 | return 0 169 | 170 | def __eq__(self, other): 171 | if not isinstance(other, LocalKernel): 172 | return NotImplemented 173 | else: 174 | return self.cache_key == other.cache_key 175 | 176 | def __hash__(self): 177 | return hash(self.cache_key) 178 | 179 | def __str__(self): 180 | return f"OP2 Kernel: {self.name}" 181 | 182 | def __repr__(self): 183 | return 'Kernel("""%s""", %r)' % (self.code, self.name) 184 | 185 | 186 | class CStringLocalKernel(LocalKernel): 187 | """:class:`LocalKernel` class where `code` is a string of C code. 188 | 189 | :kwarg dtypes: Iterable of datatypes (either `np.dtype` or `str`) for 190 | each kernel argument. This is not required for :class:`LoopyLocalKernel` 191 | because it can be inferred. 192 | 193 | All other `__init__` parameters are the same. 194 | """ 195 | 196 | @validate_type(("code", str, TypeError)) 197 | def __init__(self, code, name, accesses=None, dtypes=None, **kwargs): 198 | super().__init__(code, name, accesses, **kwargs) 199 | self._dtypes = dtypes 200 | 201 | @property 202 | def dtypes(self): 203 | return self._dtypes 204 | 205 | @dtypes.setter 206 | def dtypes(self, dtypes): 207 | self._dtypes = dtypes 208 | 209 | 210 | class LoopyLocalKernel(LocalKernel): 211 | """:class:`LocalKernel` class where `code` has type :class:`loopy.LoopKernel` 212 | or :class:`loopy.TranslationUnit`. 213 | """ 214 | 215 | @validate_type(("code", (lp.LoopKernel, lp.TranslationUnit), TypeError)) 216 | def __init__(self, code, *args, **kwargs): 217 | super().__init__(code, *args, **kwargs) 218 | 219 | @property 220 | def dtypes(self): 221 | return tuple(a.dtype for a in self._loopy_arguments) 222 | 223 | @cached_property 224 | def _loopy_arguments(self): 225 | """Return the loopy arguments associated with the kernel.""" 226 | return tuple(a for a in self.code.callables_table[self.name].subkernel.args 227 | if isinstance(a, lp.ArrayArg)) 228 | -------------------------------------------------------------------------------- /pyop2/logger.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """The PyOP2 logger, based on the Python standard library logging module.""" 35 | 36 | from contextlib import contextmanager 37 | import logging 38 | 39 | logger = logging.getLogger('pyop2') 40 | handler = logging.StreamHandler() 41 | logger.addHandler(handler) 42 | 43 | 44 | debug = logger.debug 45 | info = logger.info 46 | warning = logger.warning 47 | error = logger.error 48 | critical = logger.critical 49 | 50 | DEBUG = logging.DEBUG 51 | INFO = logging.INFO 52 | WARNING = logging.WARNING 53 | ERROR = logging.ERROR 54 | CRITICAL = logging.CRITICAL 55 | 56 | 57 | def set_log_level(level): 58 | '''Set the log level of the PyOP2 logger. 59 | 60 | :arg level: the log level. Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL ''' 61 | logger.setLevel(level) 62 | 63 | 64 | def log(level, msg, *args, **kwargs): 65 | ''' Print 'msg % args' with the severity 'level'. 66 | 67 | :arg level: the log level. Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL 68 | :arg msg: the message ''' 69 | 70 | logger.log(level, msg, *args, **kwargs) 71 | 72 | 73 | _indent = 0 74 | 75 | 76 | @contextmanager 77 | def progress(level, msg, *args, **kwargs): 78 | """A context manager to print a progress message. 79 | 80 | The block is wrapped in ``msg...``, ``msg...done`` log messages 81 | with an appropriate indent (to distinguish nested message). 82 | 83 | :arg level: the log level. See :func:`log` for valid values 84 | :arg msg: the message. 85 | 86 | See :func:`log` for more details. 87 | """ 88 | global _indent 89 | log(level, (' ' * _indent) + msg + '...', *args, **kwargs) 90 | _indent += 2 91 | yield 92 | _indent -= 2 93 | log(level, (' ' * _indent) + msg + '...done', *args, **kwargs) 94 | -------------------------------------------------------------------------------- /pyop2/mpi-compat.h: -------------------------------------------------------------------------------- 1 | /* Author: Lisandro Dalcin */ 2 | /* Contact: dalcinl@gmail.com */ 3 | 4 | #ifndef MPI_COMPAT_H 5 | #define MPI_COMPAT_H 6 | 7 | #include 8 | 9 | #if (MPI_VERSION < 3) && !defined(PyMPI_HAVE_MPI_Message) 10 | typedef void *PyMPI_MPI_Message; 11 | #define MPI_Message PyMPI_MPI_Message 12 | #endif 13 | 14 | #endif/*MPI_COMPAT_H*/ 15 | -------------------------------------------------------------------------------- /pyop2/op2.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """The PyOP2 API specification.""" 35 | 36 | import atexit 37 | 38 | from pyop2.configuration import configuration 39 | from pyop2.datatypes import OpaqueType # noqa: F401 40 | from pyop2.logger import debug, info, warning, error, critical, set_log_level 41 | from pyop2.mpi import MPI, COMM_WORLD, collective 42 | 43 | from pyop2.types import ( # noqa: F401 44 | Set, ExtrudedSet, MixedSet, Subset, DataSet, MixedDataSet, 45 | Map, MixedMap, PermutedMap, ComposedMap, Sparsity, Halo, 46 | Global, Constant, GlobalDataSet, 47 | Dat, MixedDat, DatView, Mat 48 | ) 49 | from pyop2.types import (READ, WRITE, RW, INC, MIN, MAX, 50 | ON_BOTTOM, ON_TOP, ON_INTERIOR_FACETS, ALL) 51 | 52 | from pyop2.local_kernel import CStringLocalKernel, LoopyLocalKernel, Kernel # noqa: F401 53 | from pyop2.global_kernel import (GlobalKernelArg, DatKernelArg, MixedDatKernelArg, # noqa: F401 54 | MatKernelArg, MixedMatKernelArg, MapKernelArg, GlobalKernel) 55 | from pyop2.parloop import (GlobalParloopArg, DatParloopArg, MixedDatParloopArg, # noqa: F401 56 | MatParloopArg, MixedMatParloopArg, PassthroughArg, Parloop, parloop, par_loop) 57 | from pyop2.parloop import (GlobalLegacyArg, DatLegacyArg, MixedDatLegacyArg, # noqa: F401 58 | MatLegacyArg, MixedMatLegacyArg, LegacyParloop, ParLoop) 59 | 60 | 61 | __all__ = ['configuration', 'READ', 'WRITE', 'RW', 'INC', 'MIN', 'MAX', 62 | 'ON_BOTTOM', 'ON_TOP', 'ON_INTERIOR_FACETS', 'ALL', 63 | 'debug', 'info', 'warning', 'error', 'critical', 'initialised', 64 | 'set_log_level', 'MPI', 'init', 'exit', 'Kernel', 'Set', 'ExtrudedSet', 65 | 'MixedSet', 'Subset', 'DataSet', 'GlobalDataSet', 'MixedDataSet', 66 | 'Halo', 'Dat', 'MixedDat', 'Mat', 'Global', 'Map', 'MixedMap', 67 | 'Sparsity', 'parloop', 'Parloop', 'ParLoop', 'par_loop', 68 | 'DatView', 'PermutedMap', 'ComposedMap'] 69 | 70 | 71 | _initialised = False 72 | 73 | # set the log level 74 | set_log_level(configuration['log_level']) 75 | 76 | 77 | def initialised(): 78 | """Check whether PyOP2 has been yet initialised but not yet finalised.""" 79 | return _initialised 80 | 81 | 82 | @collective 83 | def init(**kwargs): 84 | """Initialise PyOP2: select the backend and potentially other configuration 85 | options. 86 | 87 | :arg debug: The level of debugging output. 88 | :arg comm: The MPI communicator to use for parallel communication, 89 | defaults to `MPI_COMM_WORLD` 90 | :arg log_level: The log level. Options: DEBUG, INFO, WARNING, ERROR, CRITICAL 91 | 92 | For debugging purposes, `init` accepts all keyword arguments 93 | accepted by the PyOP2 :class:`Configuration` object, see 94 | :meth:`Configuration.__init__` for details of further accepted 95 | options. 96 | 97 | .. note:: 98 | Calling ``init`` again with a different backend raises an exception. 99 | Changing the backend is not possible. Calling ``init`` again with the 100 | same backend or not specifying a backend will update the configuration. 101 | Calling ``init`` after ``exit`` has been called is an error and will 102 | raise an exception. 103 | """ 104 | global _initialised 105 | configuration.reconfigure(**kwargs) 106 | 107 | set_log_level(configuration['log_level']) 108 | _initialised = True 109 | 110 | 111 | @atexit.register 112 | @collective 113 | def exit(): 114 | """Exit OP2 and clean up""" 115 | if configuration['print_cache_info'] and COMM_WORLD.rank == 0: 116 | from pyop2.caching import print_cache_stats 117 | print(f"{' PyOP2 cache sizes on rank 0 at exit ':*^120}") 118 | print_cache_stats(alive=False) 119 | configuration.reset() 120 | global _initialised 121 | _initialised = False 122 | -------------------------------------------------------------------------------- /pyop2/profiling.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | from petsc4py import PETSc 36 | from decorator import decorator 37 | 38 | 39 | timed_stage = PETSc.Log.Stage 40 | """Enter a code Stage, this is a PETSc log Stage. 41 | 42 | :arg name: The name of the stage.""" 43 | 44 | 45 | timed_region = PETSc.Log.Event 46 | """Time a code region, this a PETSc log Event. 47 | 48 | :arg name: The name of the region.""" 49 | 50 | 51 | class timed_function(object): 52 | def __init__(self, name=None): 53 | self.name = name 54 | 55 | def __call__(self, f): 56 | def wrapper(f, *args, **kwargs): 57 | if self.name is None: 58 | self.name = f.__name__ 59 | with timed_region(self.name): 60 | return f(*args, **kwargs) 61 | return decorator(wrapper, f) 62 | -------------------------------------------------------------------------------- /pyop2/types/__init__.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | from .access import * # noqa: F401 4 | from .data_carrier import * # noqa: F401 5 | from .dataset import * # noqa: F401 6 | from .dat import * # noqa: F401 7 | from .glob import * # noqa: F401 8 | from .halo import * # noqa: F401 9 | from .map import * # noqa: F401 10 | from .mat import * # noqa: F401 11 | from .set import * # noqa: F401 12 | 13 | 14 | class IterationRegion(enum.IntEnum): 15 | BOTTOM = 1 16 | TOP = 2 17 | INTERIOR_FACETS = 3 18 | ALL = 4 19 | 20 | 21 | ON_BOTTOM = IterationRegion.BOTTOM 22 | """Iterate over the cells at the bottom of the column in an extruded mesh.""" 23 | 24 | ON_TOP = IterationRegion.TOP 25 | """Iterate over the top cells in an extruded mesh.""" 26 | 27 | ON_INTERIOR_FACETS = IterationRegion.INTERIOR_FACETS 28 | """Iterate over the interior facets of an extruded mesh.""" 29 | 30 | ALL = IterationRegion.ALL 31 | """Iterate over all cells of an extruded mesh.""" 32 | -------------------------------------------------------------------------------- /pyop2/types/access.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class Access(enum.IntEnum): 5 | READ = 1 6 | WRITE = 2 7 | RW = 3 8 | INC = 4 9 | MIN = 5 10 | MAX = 6 11 | 12 | 13 | READ = Access.READ 14 | """The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed read-only.""" 15 | 16 | WRITE = Access.WRITE 17 | """The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed write-only, 18 | and OP2 is not required to handle write conflicts.""" 19 | 20 | RW = Access.RW 21 | """The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed for reading 22 | and writing, and OP2 is not required to handle write conflicts.""" 23 | 24 | INC = Access.INC 25 | """The kernel computes increments to be summed onto a :class:`Global`, 26 | :class:`Dat`, or :class:`Mat`. OP2 is responsible for managing the write 27 | conflicts caused.""" 28 | 29 | MIN = Access.MIN 30 | """The kernel contributes to a reduction into a :class:`Global` using a ``min`` 31 | operation. OP2 is responsible for reducing over the different kernel 32 | invocations.""" 33 | 34 | MAX = Access.MAX 35 | """The kernel contributes to a reduction into a :class:`Global` using a ``max`` 36 | operation. OP2 is responsible for reducing over the different kernel 37 | invocations.""" 38 | -------------------------------------------------------------------------------- /pyop2/types/data_carrier.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | 5 | from pyop2 import ( 6 | datatypes as dtypes, 7 | mpi, 8 | utils 9 | ) 10 | from pyop2.types.access import Access 11 | 12 | 13 | class DataCarrier(abc.ABC): 14 | 15 | """Abstract base class for OP2 data. 16 | 17 | Actual objects will be :class:`DataCarrier` objects of rank 0 18 | (:class:`Global`), rank 1 (:class:`Dat`), or rank 2 19 | (:class:`Mat`)""" 20 | 21 | @utils.cached_property 22 | def dtype(self): 23 | """The Python type of the data.""" 24 | return self._data.dtype 25 | 26 | @utils.cached_property 27 | def ctype(self): 28 | """The c type of the data.""" 29 | return dtypes.as_cstr(self.dtype) 30 | 31 | @utils.cached_property 32 | def name(self): 33 | """User-defined label.""" 34 | return self._name 35 | 36 | @utils.cached_property 37 | def dim(self): 38 | """The shape tuple of the values for each element of the object.""" 39 | return self._dim 40 | 41 | @utils.cached_property 42 | def cdim(self): 43 | """The scalar number of values for each member of the object. This is 44 | the product of the dim tuple.""" 45 | return self._cdim 46 | 47 | def increment_dat_version(self): 48 | pass 49 | 50 | 51 | class EmptyDataMixin(abc.ABC): 52 | """A mixin for :class:`Dat` and :class:`Global` objects that takes 53 | care of allocating data on demand if the user has passed nothing 54 | in. 55 | 56 | Accessing the :attr:`_data` property allocates a zeroed data array 57 | if it does not already exist. 58 | """ 59 | def __init__(self, data, dtype, shape): 60 | if data is None: 61 | self._dtype = np.dtype(dtype if dtype is not None else dtypes.ScalarType) 62 | else: 63 | self._numpy_data = utils.verify_reshape(data, dtype, shape, allow_none=True) 64 | self._dtype = self._data.dtype 65 | 66 | @utils.cached_property 67 | def _data(self): 68 | """Return the user-provided data buffer, or a zeroed buffer of 69 | the correct size if none was provided.""" 70 | if not self._is_allocated: 71 | self._numpy_data = np.zeros(self.shape, dtype=self._dtype) 72 | return self._numpy_data 73 | 74 | @property 75 | def _is_allocated(self): 76 | """Return True if the data buffer has been allocated.""" 77 | return hasattr(self, '_numpy_data') 78 | 79 | 80 | class VecAccessMixin(abc.ABC): 81 | 82 | def __init__(self, petsc_counter=None): 83 | if petsc_counter: 84 | # Use lambda since `_vec` allocates the data buffer 85 | # -> Dat/Global should not allocate storage until accessed 86 | self._dat_version = lambda: self._vec.stateGet() 87 | self.increment_dat_version = lambda: self._vec.stateIncrease() 88 | else: 89 | # No associated PETSc Vec if incompatible type: 90 | # -> Equip Dat/Global with their own counter. 91 | self._version = 0 92 | self._dat_version = lambda: self._version 93 | 94 | def _inc(): 95 | self._version += 1 96 | self.increment_dat_version = _inc 97 | 98 | @property 99 | def dat_version(self): 100 | return self._dat_version() 101 | 102 | @abc.abstractmethod 103 | def vec_context(self, access): 104 | pass 105 | 106 | @abc.abstractproperty 107 | def _vec(self): 108 | pass 109 | 110 | @property 111 | @mpi.collective 112 | def vec(self): 113 | """Context manager for a PETSc Vec appropriate for this Dat. 114 | 115 | You're allowed to modify the data you get back from this view.""" 116 | return self.vec_context(access=Access.RW) 117 | 118 | @property 119 | @mpi.collective 120 | def vec_wo(self): 121 | """Context manager for a PETSc Vec appropriate for this Dat. 122 | 123 | You're allowed to modify the data you get back from this view, 124 | but you cannot read from it.""" 125 | return self.vec_context(access=Access.WRITE) 126 | 127 | @property 128 | @mpi.collective 129 | def vec_ro(self): 130 | """Context manager for a PETSc Vec appropriate for this Dat. 131 | 132 | You're not allowed to modify the data you get back from this view.""" 133 | return self.vec_context(access=Access.READ) 134 | -------------------------------------------------------------------------------- /pyop2/types/halo.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class Halo(abc.ABC): 5 | 6 | """A description of a halo associated with a :class:`pyop2.types.set.Set`. 7 | 8 | The halo object describes which :class:`pyop2.types.set.Set` elements are sent 9 | where, and which :class:`pyop2.types.set.Set` elements are received from where. 10 | """ 11 | 12 | @abc.abstractproperty 13 | def comm(self): 14 | """The MPI communicator for this halo.""" 15 | pass 16 | 17 | @abc.abstractproperty 18 | def local_to_global_numbering(self): 19 | """The mapping from process-local to process-global numbers for this halo.""" 20 | pass 21 | 22 | @abc.abstractmethod 23 | def global_to_local_begin(self, dat, insert_mode): 24 | """Begin an exchange from global (assembled) to local (ghosted) representation. 25 | 26 | :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. 27 | :arg insert_mode: The insertion mode. 28 | """ 29 | pass 30 | 31 | @abc.abstractmethod 32 | def global_to_local_end(self, dat, insert_mode): 33 | """Finish an exchange from global (assembled) to local (ghosted) representation. 34 | 35 | :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. 36 | :arg insert_mode: The insertion mode. 37 | """ 38 | pass 39 | 40 | @abc.abstractmethod 41 | def local_to_global_begin(self, dat, insert_mode): 42 | """Begin an exchange from local (ghosted) to global (assembled) representation. 43 | 44 | :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. 45 | :arg insert_mode: The insertion mode. 46 | """ 47 | pass 48 | 49 | @abc.abstractmethod 50 | def local_to_global_end(self, dat, insert_mode): 51 | """Finish an exchange from local (ghosted) to global (assembled) representation. 52 | 53 | :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. 54 | :arg insert_mode: The insertion mode. 55 | """ 56 | pass 57 | -------------------------------------------------------------------------------- /pyop2/version.py: -------------------------------------------------------------------------------- 1 | 2 | __version_info__ = (0, 12, 0) 3 | __version__ = '.'.join(map(str, __version_info__)) 4 | -------------------------------------------------------------------------------- /requirements-ext.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.9.1 2 | Cython>=0.22 3 | pytest>=2.3 4 | flake8>=2.1.0 5 | pycparser>=2.10 6 | mpi4py>=1.3.1 7 | decorator<=4.4.2 8 | dataclasses 9 | cachetools 10 | packaging 11 | pytools 12 | -------------------------------------------------------------------------------- /requirements-git.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/firedrakeproject/loopy.git@main#egg=loopy 2 | git+https://github.com/firedrakeproject/pytest-mpi.git@main#egg=pytest-mpi 3 | -------------------------------------------------------------------------------- /requirements-minimal.txt: -------------------------------------------------------------------------------- 1 | -r requirements-ext.txt 2 | -r requirements-git.txt 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements-minimal.txt 2 | -------------------------------------------------------------------------------- /scripts/pyop2-clean: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pyop2.compilation import clear_compiler_disk_cache 3 | 4 | 5 | if __name__ == '__main__': 6 | clear_compiler_disk_cache(prompt=True) 7 | -------------------------------------------------------------------------------- /scripts/spydump: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This file is part of PyOP2 4 | # 5 | # PyOP2 is Copyright (c) 2012, Imperial College London and 6 | # others. Please see the AUTHORS file in the main source directory for 7 | # a full list of copyright holders. All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without 10 | # modification, are permitted provided that the following conditions 11 | # are met: 12 | # 13 | # * Redistributions of source code must retain the above copyright 14 | # notice, this list of conditions and the following disclaimer. 15 | # * Redistributions in binary form must reproduce the above copyright 16 | # notice, this list of conditions and the following disclaimer in the 17 | # documentation and/or other materials provided with the distribution. 18 | # * The name of Imperial College London or that of other 19 | # contributors may not be used to endorse or promote products 20 | # derived from this software without specific prior written 21 | # permission. 22 | # 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 24 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 28 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 34 | # OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | """Show a spy plot from a binary PETSc matrix dump or compare two dumps as spy 37 | plots if two input file names are given.""" 38 | 39 | import matplotlib 40 | import numpy as np 41 | import pylab 42 | from scipy.sparse import csr_matrix 43 | 44 | COOKIE = 1211216 # from petscmat.h 45 | IntType = '>i4' # big-endian, 4 byte integer 46 | ScalarType = '>f8' # big-endian, 8 byte real floating 47 | 48 | # after http://lists.mcs.anl.gov/pipermail/petsc-users/2010-February/005935.html 49 | def readmat(filename): 50 | with open(filename, 'rb') as fh: 51 | header = np.fromfile(fh, dtype=IntType, count=4) 52 | assert header[0] == COOKIE 53 | M, N, nz = header[1:] 54 | # 55 | I = np.empty(M+1, dtype=IntType) 56 | I[0] = 0 57 | rownz = np.fromfile(fh, dtype=IntType, count=M) 58 | np.cumsum(rownz, out=I[1:]) 59 | assert I[-1] == nz 60 | # 61 | J = np.fromfile(fh, dtype=IntType, count=nz) 62 | V = np.fromfile(fh, dtype=ScalarType, count=nz) 63 | return (M, N), (I, J, V) 64 | 65 | def dump2csr(filename): 66 | (M, N), (I, J, V) = readmat(filename) 67 | return csr_matrix((V, J, I)) 68 | 69 | def compare_dump(files, outfile=None, marker='.', markersize=.5): 70 | """Compare two binary PETSc matrix dumps as spy plots.""" 71 | 72 | opts = {'marker': marker, 'markersize': markersize} 73 | csr1 = dump2csr(files[0]) 74 | 75 | if len(files) > 1: 76 | matplotlib.rc('font', size=4) 77 | pylab.figure(figsize=(12, 5), dpi=300) 78 | pylab.subplot(221) 79 | else: 80 | matplotlib.rc('font', size=10) 81 | pylab.figure(figsize=(5, 5), dpi=300) 82 | pylab.spy(csr1, **opts) 83 | pylab.title(files[0]) 84 | 85 | if len(files) > 1: 86 | csr2 = dump2csr(files[1]) 87 | pylab.subplot(222) 88 | pylab.spy(csr2, **opts) 89 | pylab.title(files[1]) 90 | 91 | pylab.subplot(223) 92 | pylab.spy(csr1 - csr2, **opts) 93 | pylab.title(files[0] + ' - ' + files[1]) 94 | 95 | pylab.subplot(224) 96 | pylab.plot(csr1.data, label=files[0], **opts) 97 | pylab.plot(csr2.data, label=files[1], **opts) 98 | pylab.plot(csr1.data - csr2.data, label='Difference', **opts) 99 | pylab.legend() 100 | pylab.title('Nonzero values') 101 | 102 | if outfile: 103 | pylab.savefig(outfile) 104 | else: 105 | pylab.show() 106 | 107 | if __name__ == '__main__': 108 | import argparse 109 | parser = argparse.ArgumentParser(description=__doc__, add_help=True) 110 | parser.add_argument('files', nargs='+', help='Matrix dump files') 111 | parser.add_argument('--output', '-o', 112 | help='Output plot to file instead of showing interactively') 113 | parser.add_argument('--marker', default='.', choices=['s', 'o', '.', ','], 114 | help='Specify marker to use for spyplot') 115 | parser.add_argument('--markersize', type=float, default=.5, 116 | help='Specify marker size to use for spyplot') 117 | args = parser.parse_args() 118 | 119 | compare_dump(args.files, args.output, marker=args.marker, markersize=args.markersize) 120 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = pyop2/_version.py 10 | versionfile_build = pyop2/_version.py 11 | tag_prefix = v 12 | parentdir_prefix = pyop2- 13 | 14 | [flake8] 15 | ignore = E501,F403,F405,E226,E402,E721,E731,E741,W503,F999 16 | exclude = .git,__pycache__,build,dist,doc/sphinx/source/conf.py,doc/sphinx/server.py,demo 17 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This file is part of PyOP2 4 | # 5 | # PyOP2 is Copyright (c) 2012, Imperial College London and 6 | # others. Please see the AUTHORS file in the main source directory for 7 | # a full list of copyright holders. All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without 10 | # modification, are permitted provided that the following conditions 11 | # are met: 12 | # 13 | # * Redistributions of source code must retain the above copyright 14 | # notice, this list of conditions and the following disclaimer. 15 | # * Redistributions in binary form must reproduce the above copyright 16 | # notice, this list of conditions and the following disclaimer in the 17 | # documentation and/or other materials provided with the distribution. 18 | # * The name of Imperial College London or that of other 19 | # contributors may not be used to endorse or promote products 20 | # derived from this software without specific prior written 21 | # permission. 22 | # 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 24 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 28 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 34 | # OF THE POSSIBILITY OF SUCH DAMAGE. 35 | 36 | from setuptools import setup, Extension 37 | from glob import glob 38 | from os import environ as env 39 | import sys 40 | import numpy as np 41 | import petsc4py 42 | import versioneer 43 | import os 44 | 45 | 46 | def get_petsc_dir(): 47 | try: 48 | arch = '/' + env.get('PETSC_ARCH', '') 49 | dir = env['PETSC_DIR'] 50 | return (dir, dir + arch) 51 | except KeyError: 52 | try: 53 | import petsc 54 | return (petsc.get_petsc_dir(), ) 55 | except ImportError: 56 | sys.exit("""Error: Could not find PETSc library. 57 | 58 | Set the environment variable PETSC_DIR to your local PETSc base 59 | directory or install PETSc from PyPI: pip install petsc""") 60 | 61 | 62 | cmdclass = versioneer.get_cmdclass() 63 | _sdist = cmdclass['sdist'] 64 | 65 | if "clean" in sys.argv[1:]: 66 | # Forcibly remove the results of Cython. 67 | for dirname, dirs, files in os.walk("pyop2"): 68 | for f in files: 69 | base, ext = os.path.splitext(f) 70 | if ext in (".c", ".cpp", ".so") and base + ".pyx" in files: 71 | os.remove(os.path.join(dirname, f)) 72 | 73 | # If Cython is available, built the extension module from the Cython source 74 | try: 75 | from Cython.Distutils import build_ext 76 | cmdclass['build_ext'] = build_ext 77 | sparsity_sources = ['pyop2/sparsity.pyx'] 78 | # Else we require the Cython-compiled .c file to be present and use that 79 | # Note: file is not in revision control but needs to be included in distributions 80 | except ImportError: 81 | sparsity_sources = ['pyop2/sparsity.c'] 82 | sources = sparsity_sources 83 | from os.path import exists 84 | if not all([exists(f) for f in sources]): 85 | raise ImportError("Installing from source requires Cython") 86 | 87 | 88 | install_requires = [ 89 | 'decorator', 90 | 'mpi4py', 91 | 'numpy>=1.6', 92 | 'pytools', 93 | ] 94 | 95 | version = sys.version_info[:2] 96 | 97 | if version < (3, 6): 98 | raise ValueError("Python version >= 3.6 required") 99 | 100 | test_requires = [ 101 | 'flake8>=2.1.0', 102 | 'pytest>=2.3', 103 | ] 104 | 105 | petsc_dirs = get_petsc_dir() 106 | numpy_includes = [np.get_include()] 107 | includes = numpy_includes + [petsc4py.get_include()] 108 | includes += ["%s/include" % d for d in petsc_dirs] 109 | 110 | if 'CC' not in env: 111 | env['CC'] = "mpicc" 112 | 113 | 114 | class sdist(_sdist): 115 | def run(self): 116 | # Make sure the compiled Cython files in the distribution are up-to-date 117 | from Cython.Build import cythonize 118 | cythonize(sparsity_sources, language="c", include_path=includes) 119 | _sdist.run(self) 120 | 121 | 122 | cmdclass['sdist'] = sdist 123 | 124 | setup(name='PyOP2', 125 | version=versioneer.get_version(), 126 | description='Framework for performance-portable parallel computations on unstructured meshes', 127 | author='Imperial College London and others', 128 | author_email='mapdes@imperial.ac.uk', 129 | url='https://github.com/OP2/PyOP2/', 130 | classifiers=[ 131 | 'Development Status :: 3 - Alpha', 132 | 'Intended Audience :: Developers', 133 | 'Intended Audience :: Science/Research', 134 | 'License :: OSI Approved :: BSD License', 135 | 'Operating System :: OS Independent', 136 | 'Programming Language :: C', 137 | 'Programming Language :: Cython', 138 | 'Programming Language :: Python :: 3', 139 | 'Programming Language :: Python :: 3.6', 140 | ], 141 | install_requires=install_requires + test_requires, 142 | packages=['pyop2', 'pyop2.codegen', 'pyop2.types'], 143 | package_data={ 144 | 'pyop2': ['assets/*', '*.h', '*.pxd', '*.pyx', 'codegen/c/*.c']}, 145 | scripts=glob('scripts/*'), 146 | cmdclass=cmdclass, 147 | ext_modules=[Extension('pyop2.sparsity', sparsity_sources, 148 | include_dirs=['pyop2'] + includes, language="c", 149 | libraries=["petsc"], 150 | extra_link_args=(["-L%s/lib" % d for d in petsc_dirs] 151 | + ["-Wl,-rpath,%s/lib" % d for d in petsc_dirs]))]) 152 | -------------------------------------------------------------------------------- /test/unit/test_callables.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012-2014, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | import pytest 35 | import loopy 36 | from pyop2.codegen.rep2loopy import SolveCallable, INVCallable 37 | import numpy as np 38 | from pyop2 import op2 39 | from pyop2.configuration import target 40 | 41 | 42 | @pytest.fixture 43 | def s(): 44 | return op2.Set(1) 45 | 46 | 47 | @pytest.fixture 48 | def zero_mat(s): 49 | return op2.Dat(s ** (2, 2), [[0.0, 0.0], [0.0, 0.0]]) 50 | 51 | 52 | @pytest.fixture 53 | def inv_mat(s): 54 | return op2.Dat(s ** (2, 2), [[1.0, 2.0], [3.0, 4.0]]) 55 | 56 | 57 | @pytest.fixture 58 | def zero_vec(s): 59 | return op2.Dat(s ** (2, 1), [0.0, 0.0]) 60 | 61 | 62 | @pytest.fixture 63 | def solve_mat(s): 64 | d = op2.Dat(s ** (2, 2), [[2.0, 1.0], [-3.0, 2.0]]) 65 | return d 66 | 67 | 68 | @pytest.fixture 69 | def solve_vec(s): 70 | return op2.Dat(s ** (2, 1), [1.0, 0.0]) 71 | 72 | 73 | class TestCallables: 74 | 75 | def test_inverse_callable(self, zero_mat, inv_mat): 76 | loopy.set_caching_enabled(False) 77 | 78 | k = loopy.make_kernel( 79 | ["{ : }"], 80 | """ 81 | B[:,:] = inverse(A[:,:]) 82 | """, 83 | [loopy.GlobalArg('B', dtype=np.float64, shape=(2, 2)), 84 | loopy.GlobalArg('A', dtype=np.float64, shape=(2, 2))], 85 | target=target, 86 | name="callable_kernel", 87 | lang_version=(2018, 2)) 88 | 89 | k = loopy.register_callable(k, INVCallable.name, INVCallable()) 90 | code = loopy.generate_code_v2(k).device_code() 91 | code.replace('void callable_kernel', 'static void callable_kernel') 92 | 93 | loopykernel = op2.Kernel(code, "callable_kernel", ldargs=["-llapack"]) 94 | 95 | op2.par_loop(loopykernel, zero_mat.dataset.set, zero_mat(op2.WRITE), inv_mat(op2.READ)) 96 | expected = np.linalg.inv(inv_mat.data) 97 | assert np.allclose(expected, zero_mat.data) 98 | 99 | def test_solve_callable(self, zero_vec, solve_mat, solve_vec): 100 | loopy.set_caching_enabled(False) 101 | 102 | k = loopy.make_kernel( 103 | ["{ : }"], 104 | """ 105 | x[:] = solve(A[:,:], b[:]) 106 | """, 107 | [loopy.GlobalArg('x', dtype=np.float64, shape=(2, )), 108 | loopy.GlobalArg('A', dtype=np.float64, shape=(2, 2)), 109 | loopy.GlobalArg('b', dtype=np.float64, shape=(2, ),)], 110 | target=target, 111 | name="callable_kernel2", 112 | lang_version=(2018, 2)) 113 | 114 | k = loopy.register_callable(k, SolveCallable.name, SolveCallable()) 115 | code = loopy.generate_code_v2(k).device_code() 116 | code.replace('void callable_kernel2', 'static void callable_kernel2') 117 | loopykernel = op2.Kernel(code, "callable_kernel2", ldargs=["-llapack"]) 118 | args = [zero_vec(op2.READ), solve_mat(op2.READ), solve_vec(op2.WRITE)] 119 | 120 | op2.par_loop(loopykernel, solve_mat.dataset.set, *args) 121 | expected = np.linalg.solve(solve_mat.data, solve_vec.data) 122 | assert np.allclose(expected, zero_vec.data) 123 | -------------------------------------------------------------------------------- /test/unit/test_configuration.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """Configuration unit tests.""" 35 | 36 | 37 | import pytest 38 | from pyop2.configuration import Configuration 39 | from pyop2.exceptions import ConfigurationError 40 | 41 | 42 | class TestConfigurationAPI: 43 | """Configuration API unit tests.""" 44 | 45 | def test_add_configuration_value(self): 46 | """Defining an non default argument.""" 47 | c = Configuration() 48 | c.reconfigure(foo='bar') 49 | assert c['foo'] == 'bar' 50 | 51 | @pytest.mark.parametrize(('key', 'val'), [('debug', 'illegal'), 52 | ('log_level', 1.5)]) 53 | def test_configuration_illegal_types(self, key, val): 54 | """Illegal types for configuration values should raise 55 | ConfigurationError.""" 56 | c = Configuration() 57 | with pytest.raises(ConfigurationError): 58 | c[key] = val 59 | -------------------------------------------------------------------------------- /test/unit/test_direct_loop.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import pytest 36 | import numpy as np 37 | from petsc4py import PETSc 38 | 39 | from pyop2 import op2 40 | from pyop2.exceptions import MapValueError 41 | from pyop2.mpi import COMM_WORLD 42 | 43 | nelems = 4096 44 | 45 | 46 | @pytest.fixture(params=[(nelems, nelems, nelems), 47 | (0, nelems, nelems), 48 | (nelems // 2, nelems, nelems), 49 | (0, nelems//2, nelems)]) 50 | def elems(request): 51 | return op2.Set(request.param, "elems") 52 | 53 | 54 | @pytest.fixture 55 | def delems(elems): 56 | return op2.DataSet(elems, 1, "delems") 57 | 58 | 59 | @pytest.fixture 60 | def delems2(elems): 61 | return op2.DataSet(elems, 2, "delems2") 62 | 63 | 64 | def xarray(): 65 | return np.array(range(nelems), dtype=np.uint32) 66 | 67 | 68 | class TestDirectLoop: 69 | 70 | """ 71 | Direct Loop Tests 72 | """ 73 | 74 | @pytest.fixture 75 | def x(cls, delems): 76 | return op2.Dat(delems, xarray(), np.uint32, "x") 77 | 78 | @pytest.fixture 79 | def y(cls, delems2): 80 | return op2.Dat(delems2, [xarray(), xarray()], np.uint32, "x") 81 | 82 | @pytest.fixture 83 | def g(cls): 84 | return op2.Global(1, 0, np.uint32, "g", comm=COMM_WORLD) 85 | 86 | @pytest.fixture 87 | def h(cls): 88 | return op2.Global(1, 1, np.uint32, "h", comm=COMM_WORLD) 89 | 90 | def test_wo(self, elems, x): 91 | """Set a Dat to a scalar value with op2.WRITE.""" 92 | kernel_wo = """static void wo(unsigned int* x) { *x = 42; }""" 93 | op2.par_loop(op2.Kernel(kernel_wo, "wo"), 94 | elems, x(op2.WRITE)) 95 | assert all(map(lambda x: x == 42, x.data)) 96 | 97 | def test_mismatch_set_raises_error(self, elems, x): 98 | """The iterset of the parloop should match the dataset of the direct dat.""" 99 | kernel_wo = """static void wo(unsigned int* x) { *x = 42; }""" 100 | with pytest.raises(MapValueError): 101 | op2.par_loop( 102 | op2.Kernel(kernel_wo, "wo"), 103 | op2.Set(elems.size), 104 | x(op2.WRITE) 105 | ) 106 | 107 | def test_rw(self, elems, x): 108 | """Increment each value of a Dat by one with op2.RW.""" 109 | kernel_rw = """static void wo(unsigned int* x) { (*x) = (*x) + 1; }""" 110 | op2.par_loop(op2.Kernel(kernel_rw, "wo"), 111 | elems, x(op2.RW)) 112 | _nelems = elems.size 113 | assert sum(x.data_ro) == _nelems * (_nelems + 1) // 2 114 | if _nelems == nelems: 115 | assert sum(x.data_ro_with_halos) == nelems * (nelems + 1) // 2 116 | 117 | def test_global_inc(self, elems, x, g): 118 | """Increment each value of a Dat by one and a Global at the same time.""" 119 | kernel_global_inc = """static void global_inc(unsigned int* x, unsigned int* inc) { 120 | (*x) = (*x) + 1; (*inc) += (*x); 121 | }""" 122 | op2.par_loop(op2.Kernel(kernel_global_inc, "global_inc"), 123 | elems, x(op2.RW), g(op2.INC)) 124 | _nelems = elems.size 125 | assert g.data[0] == _nelems * (_nelems + 1) // 2 126 | 127 | def test_global_inc_init_not_zero(self, elems, g): 128 | """Increment a global initialized with a non-zero value.""" 129 | k = """static void k(unsigned int* inc) { (*inc) += 1; }""" 130 | g.data[0] = 10 131 | op2.par_loop(op2.Kernel(k, 'k'), elems, g(op2.INC)) 132 | assert g.data[0] == elems.size + 10 133 | 134 | def test_global_max_dat_is_max(self, elems, x, g): 135 | """Verify that op2.MAX reduces to the maximum value.""" 136 | k_code = """static void k(unsigned int *g, unsigned int *x) { 137 | if ( *g < *x ) { *g = *x; } 138 | }""" 139 | k = op2.Kernel(k_code, 'k') 140 | 141 | op2.par_loop(k, elems, g(op2.MAX), x(op2.READ)) 142 | assert g.data[0] == x.data.max() 143 | 144 | def test_global_max_g_is_max(self, elems, x, g): 145 | """Verify that op2.MAX does not reduce a maximum value smaller than the 146 | Global's initial value.""" 147 | k_code = """static void k(unsigned int *x, unsigned int *g) { 148 | if ( *g < *x ) { *g = *x; } 149 | }""" 150 | 151 | k = op2.Kernel(k_code, 'k') 152 | 153 | g.data[0] = nelems * 2 154 | 155 | op2.par_loop(k, elems, x(op2.READ), g(op2.MAX)) 156 | 157 | assert g.data[0] == nelems * 2 158 | 159 | def test_global_min_dat_is_min(self, elems, x, g): 160 | """Verify that op2.MIN reduces to the minimum value.""" 161 | k_code = """static void k(unsigned int *g, unsigned int *x) { 162 | if ( *g > *x ) { *g = *x; } 163 | }""" 164 | k = op2.Kernel(k_code, 'k') 165 | g.data[0] = 1000 166 | op2.par_loop(k, elems, g(op2.MIN), x(op2.READ)) 167 | 168 | assert g.data[0] == x.data.min() 169 | 170 | def test_global_min_g_is_min(self, elems, x, g): 171 | """Verify that op2.MIN does not reduce a minimum value larger than the 172 | Global's initial value.""" 173 | k_code = """static void k(unsigned int *x, unsigned int *g) { 174 | if ( *g > *x ) { *g = *x; } 175 | }""" 176 | 177 | k = op2.Kernel(k_code, 'k') 178 | g.data[0] = 10 179 | x.data[:] = 11 180 | op2.par_loop(k, elems, x(op2.READ), g(op2.MIN)) 181 | 182 | assert g.data[0] == 10 183 | 184 | def test_global_read(self, elems, x, h): 185 | """Increment each value of a Dat by the value of a Global.""" 186 | kernel_global_read = """ 187 | static void global_read(unsigned int* x, unsigned int* h) { 188 | (*x) += (*h); 189 | }""" 190 | op2.par_loop(op2.Kernel(kernel_global_read, "global_read"), 191 | elems, x(op2.RW), h(op2.READ)) 192 | _nelems = elems.size 193 | assert sum(x.data_ro) == _nelems * (_nelems + 1) // 2 194 | 195 | def test_2d_dat(self, elems, y): 196 | """Set both components of a vector-valued Dat to a scalar value.""" 197 | kernel_2d_wo = """static void k2d_wo(unsigned int* x) { 198 | x[0] = 42; x[1] = 43; 199 | }""" 200 | op2.par_loop(op2.Kernel(kernel_2d_wo, "k2d_wo"), 201 | elems, y(op2.WRITE)) 202 | assert all(map(lambda x: all(x == [42, 43]), y.data)) 203 | 204 | def test_host_write(self, elems, x, g): 205 | """Increment a global by the values of a Dat.""" 206 | kernel = """static void k(unsigned int *g, unsigned int *x) { *g += *x; }""" 207 | x.data[:] = 1 208 | g.data[:] = 0 209 | op2.par_loop(op2.Kernel(kernel, 'k'), elems, 210 | g(op2.INC), x(op2.READ)) 211 | _nelems = elems.size 212 | assert g.data[0] == _nelems 213 | 214 | x.data[:] = 2 215 | g.data[:] = 0 216 | kernel = """static void k(unsigned int *x, unsigned int *g) { *g += *x; }""" 217 | op2.par_loop(op2.Kernel(kernel, 'k'), elems, 218 | x(op2.READ), g(op2.INC)) 219 | assert g.data[0] == 2 * _nelems 220 | 221 | def test_zero_1d_dat(self, x): 222 | """Zero a Dat.""" 223 | x.data[:] = 10 224 | assert (x.data == 10).all() 225 | x.zero() 226 | assert (x.data == 0).all() 227 | 228 | def test_zero_2d_dat(self, y): 229 | """Zero a vector-valued Dat.""" 230 | y.data[:] = 10 231 | assert (y.data == 10).all() 232 | y.zero() 233 | assert (y.data == 0).all() 234 | 235 | def test_kernel_cplusplus(self, delems): 236 | """Test that passing cpp=True to a Kernel works.""" 237 | 238 | y = op2.Dat(delems, dtype=np.float64) 239 | y.data[:] = -10.5 240 | 241 | k = op2.Kernel(""" 242 | #include 243 | 244 | static void k(double *y) 245 | { 246 | *y = std::abs(*y); 247 | } 248 | """, "k", cpp=True) 249 | op2.par_loop(k, y.dataset.set, y(op2.RW)) 250 | 251 | assert (y.data == 10.5).all() 252 | 253 | def test_passthrough_mat(self): 254 | niters = 10 255 | iterset = op2.Set(niters) 256 | 257 | c_kernel = """ 258 | static void mat_inc(Mat mat) { 259 | PetscScalar values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; 260 | PetscInt idxs[] = {0, 2, 4}; 261 | MatSetValues(mat, 3, idxs, 3, idxs, values, ADD_VALUES); 262 | } 263 | """ 264 | kernel = op2.Kernel(c_kernel, "mat_inc") 265 | 266 | # create a tiny 5x5 sparse matrix 267 | petsc_mat = PETSc.Mat().create() 268 | petsc_mat.setSizes(5) 269 | petsc_mat.setUp() 270 | petsc_mat.setValues([0, 2, 4], [0, 2, 4], np.zeros((3, 3), dtype=PETSc.ScalarType)) 271 | petsc_mat.assemble() 272 | 273 | arg = op2.PassthroughArg(op2.OpaqueType("Mat"), petsc_mat.handle) 274 | op2.par_loop(kernel, iterset, arg) 275 | petsc_mat.assemble() 276 | 277 | assert np.allclose( 278 | petsc_mat.getValues(range(5), range(5)), 279 | [ 280 | [10, 0, 20, 0, 30], 281 | [0]*5, 282 | [40, 0, 50, 0, 60], 283 | [0]*5, 284 | [70, 0, 80, 0, 90], 285 | ] 286 | ) 287 | 288 | 289 | if __name__ == '__main__': 290 | import os 291 | pytest.main(os.path.abspath(__file__)) 292 | -------------------------------------------------------------------------------- /test/unit/test_globals.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | from pyop2 import op2 35 | from pyop2.mpi import COMM_WORLD 36 | 37 | 38 | def test_global_operations(): 39 | g1 = op2.Global(1, data=2., comm=COMM_WORLD) 40 | g2 = op2.Global(1, data=5., comm=COMM_WORLD) 41 | 42 | assert (g1 + g2).data == 7. 43 | assert (g2 - g1).data == 3. 44 | assert (-g2).data == -5. 45 | assert (g1 * g2).data == 10. 46 | g1 *= g2 47 | assert g1.data == 10. 48 | 49 | 50 | def test_global_dat_version(): 51 | g1 = op2.Global(1, data=1., comm=COMM_WORLD) 52 | g2 = op2.Global(1, data=2., comm=COMM_WORLD) 53 | 54 | assert g1.dat_version == 0 55 | assert g2.dat_version == 0 56 | 57 | # Access data property 58 | d1 = g1.data 59 | 60 | assert g1.dat_version == 1 61 | assert g2.dat_version == 0 62 | 63 | # Access data property 64 | g2.data[:] += 1 65 | 66 | assert g1.dat_version == 1 67 | assert g2.dat_version == 1 68 | 69 | # Access zero property 70 | g1.zero() 71 | 72 | assert g1.dat_version == 2 73 | assert g2.dat_version == 1 74 | 75 | # Access data setter 76 | g2.data = d1 77 | 78 | assert g1.dat_version == 2 79 | assert g2.dat_version == 2 80 | -------------------------------------------------------------------------------- /test/unit/test_iteration_space_dats.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import pytest 36 | import numpy 37 | 38 | from pyop2 import op2 39 | 40 | 41 | def _seed(): 42 | return 0.02041724 43 | 44 | 45 | nnodes = 4096 46 | nele = nnodes // 2 47 | 48 | 49 | @pytest.fixture(scope='module') 50 | def node(): 51 | return op2.Set(nnodes, 'node') 52 | 53 | 54 | @pytest.fixture(scope='module') 55 | def ele(): 56 | return op2.Set(nele, 'ele') 57 | 58 | 59 | @pytest.fixture 60 | def d1(node): 61 | return op2.Dat(node, numpy.zeros(nnodes), dtype=numpy.int32) 62 | 63 | 64 | @pytest.fixture 65 | def d2(node): 66 | return op2.Dat(node ** 2, numpy.zeros(2 * nnodes), dtype=numpy.int32) 67 | 68 | 69 | @pytest.fixture 70 | def vd1(ele): 71 | return op2.Dat(ele, numpy.zeros(nele), dtype=numpy.int32) 72 | 73 | 74 | @pytest.fixture 75 | def vd2(ele): 76 | return op2.Dat(ele ** 2, numpy.zeros(2 * nele), dtype=numpy.int32) 77 | 78 | 79 | @pytest.fixture(scope='module') 80 | def node2ele(node, ele): 81 | vals = numpy.arange(nnodes) / 2 82 | return op2.Map(node, ele, 1, vals, 'node2ele') 83 | 84 | 85 | class TestIterationSpaceDats: 86 | 87 | """ 88 | Test IterationSpace access to Dat objects 89 | """ 90 | 91 | def test_sum_nodes_to_edges(self): 92 | """Creates a 1D grid with edge values numbered consecutively. 93 | Iterates over edges, summing the node values.""" 94 | 95 | nedges = nnodes - 1 96 | nodes = op2.Set(nnodes, "nodes") 97 | edges = op2.Set(nedges, "edges") 98 | 99 | node_vals = op2.Dat(nodes, numpy.arange( 100 | nnodes, dtype=numpy.uint32), numpy.uint32, "node_vals") 101 | edge_vals = op2.Dat( 102 | edges, numpy.zeros(nedges, dtype=numpy.uint32), numpy.uint32, "edge_vals") 103 | 104 | e_map = numpy.array([(i, i + 1) 105 | for i in range(nedges)], dtype=numpy.uint32) 106 | edge2node = op2.Map(edges, nodes, 2, e_map, "edge2node") 107 | kernel_sum = """ 108 | static void sum(unsigned int *edge, unsigned int *nodes) { 109 | for (int i=0; i<2; ++i) 110 | edge[0] += nodes[i]; 111 | } 112 | """ 113 | 114 | op2.par_loop(op2.Kernel(kernel_sum, "sum"), edges, 115 | edge_vals(op2.INC), 116 | node_vals(op2.READ, edge2node)) 117 | 118 | expected = numpy.arange(1, nedges * 2 + 1, 2) 119 | assert all(expected == edge_vals.data) 120 | 121 | def test_read_1d_itspace_map(self, node, d1, vd1, node2ele): 122 | vd1.data[:] = numpy.arange(nele) 123 | k = """ 124 | static void k(int *d, int *vd) { 125 | for (int i=0; i<1; ++i) 126 | d[0] = vd[i]; 127 | } 128 | """ 129 | op2.par_loop(op2.Kernel(k, 'k'), node, 130 | d1(op2.WRITE), 131 | vd1(op2.READ, node2ele)) 132 | assert all(d1.data[::2] == vd1.data) 133 | assert all(d1.data[1::2] == vd1.data) 134 | 135 | def test_write_1d_itspace_map(self, node, vd1, node2ele): 136 | k = """ 137 | static void k(int *vd) { 138 | for (int i=0; i<1; ++i) 139 | vd[i] = 2; 140 | } 141 | """ 142 | 143 | op2.par_loop(op2.Kernel(k, 'k'), node, 144 | vd1(op2.WRITE, node2ele)) 145 | assert all(vd1.data == 2) 146 | 147 | def test_inc_1d_itspace_map(self, node, d1, vd1, node2ele): 148 | vd1.data[:] = 3 149 | d1.data[:] = numpy.arange(nnodes).reshape(d1.data.shape) 150 | 151 | k = """ 152 | static void k(int *vd, int *d) { 153 | for (int i=0; i<1; ++i) 154 | vd[i] += d[0]; 155 | } 156 | """ 157 | op2.par_loop(op2.Kernel(k, 'k'), node, 158 | vd1(op2.INC, node2ele), 159 | d1(op2.READ)) 160 | expected = numpy.zeros_like(vd1.data) 161 | expected[:] = 3 162 | expected += numpy.arange( 163 | start=0, stop=nnodes, step=2).reshape(expected.shape) 164 | expected += numpy.arange( 165 | start=1, stop=nnodes, step=2).reshape(expected.shape) 166 | assert all(vd1.data == expected) 167 | 168 | def test_read_2d_itspace_map(self, d2, vd2, node2ele, node): 169 | vd2.data[:] = numpy.arange(nele * 2).reshape(nele, 2) 170 | k = """ 171 | static void k(int *d, int *vd) { 172 | for (int i=0; i<1; ++i) { 173 | d[0] = vd[i]; 174 | d[1] = vd[i+1]; 175 | } 176 | } 177 | """ 178 | op2.par_loop(op2.Kernel(k, 'k'), node, 179 | d2(op2.WRITE), 180 | vd2(op2.READ, node2ele)) 181 | assert all(d2.data[::2, 0] == vd2.data[:, 0]) 182 | assert all(d2.data[::2, 1] == vd2.data[:, 1]) 183 | assert all(d2.data[1::2, 0] == vd2.data[:, 0]) 184 | assert all(d2.data[1::2, 1] == vd2.data[:, 1]) 185 | 186 | def test_write_2d_itspace_map(self, vd2, node2ele, node): 187 | k = """ 188 | static void k(int *vd) { 189 | for (int i=0; i<1; ++i) { 190 | vd[i] = 2; 191 | vd[i+1] = 3; 192 | } 193 | } 194 | """ 195 | op2.par_loop(op2.Kernel(k, 'k'), node, 196 | vd2(op2.WRITE, node2ele)) 197 | assert all(vd2.data[:, 0] == 2) 198 | assert all(vd2.data[:, 1] == 3) 199 | 200 | def test_inc_2d_itspace_map(self, d2, vd2, node2ele, node): 201 | vd2.data[:, 0] = 3 202 | vd2.data[:, 1] = 4 203 | d2.data[:] = numpy.arange(2 * nnodes).reshape(d2.data.shape) 204 | 205 | k = """ 206 | static void k(int *vd, int *d) { 207 | for (int i=0; i<1; ++i) { 208 | vd[i] += d[0]; 209 | vd[i+1] += d[1]; 210 | } 211 | } 212 | """ 213 | 214 | op2.par_loop(op2.Kernel(k, 'k'), node, 215 | vd2(op2.INC, node2ele), 216 | d2(op2.READ)) 217 | 218 | expected = numpy.zeros_like(vd2.data) 219 | expected[:, 0] = 3 220 | expected[:, 1] = 4 221 | expected[:, 0] += numpy.arange(start=0, stop=2 * nnodes, step=4) 222 | expected[:, 0] += numpy.arange(start=2, stop=2 * nnodes, step=4) 223 | expected[:, 1] += numpy.arange(start=1, stop=2 * nnodes, step=4) 224 | expected[:, 1] += numpy.arange(start=3, stop=2 * nnodes, step=4) 225 | assert all(vd2.data[:, 0] == expected[:, 0]) 226 | assert all(vd2.data[:, 1] == expected[:, 1]) 227 | 228 | 229 | if __name__ == '__main__': 230 | import os 231 | pytest.main(os.path.abspath(__file__)) 232 | -------------------------------------------------------------------------------- /test/unit/test_linalg.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import pytest 36 | import numpy as np 37 | 38 | from pyop2 import op2 39 | 40 | nelems = 8 41 | 42 | 43 | @pytest.fixture 44 | def set(): 45 | return op2.Set(nelems) 46 | 47 | 48 | @pytest.fixture 49 | def dset(set): 50 | return op2.DataSet(set, 1) 51 | 52 | 53 | @pytest.fixture 54 | def x(dset): 55 | return op2.Dat(dset, None, np.float64, "x") 56 | 57 | 58 | @pytest.fixture 59 | def y(dset): 60 | return op2.Dat(dset, np.arange(1, nelems + 1), np.float64, "y") 61 | 62 | 63 | @pytest.fixture 64 | def yi(dset): 65 | return op2.Dat(dset, np.arange(1, nelems + 1), np.int64, "y") 66 | 67 | 68 | @pytest.fixture 69 | def x2(): 70 | s = op2.Set(nelems, "s1") 71 | return op2.Dat(s ** (1, 2), np.zeros(2 * nelems), np.float64, "x") 72 | 73 | 74 | @pytest.fixture 75 | def y2(): 76 | s = op2.Set(nelems, "s2") 77 | return op2.Dat(s ** (2, 1), np.zeros(2 * nelems), np.float64, "y") 78 | 79 | 80 | class TestLinAlgOp: 81 | 82 | """ 83 | Tests of linear algebra operators returning a new Dat. 84 | """ 85 | 86 | def test_add(self, x, y): 87 | x._data = 2 * y.data 88 | assert all((x + y).data == 3 * y.data) 89 | 90 | def test_sub(self, x, y): 91 | x._data = 2 * y.data 92 | assert all((x - y).data == y.data) 93 | 94 | def test_mul(self, x, y): 95 | x._data = 2 * y.data 96 | assert all((x * y).data == 2 * y.data * y.data) 97 | 98 | def test_div(self, x, y): 99 | x._data = 2 * y.data 100 | assert all((x / y).data == 2.0) 101 | 102 | def test_add_shape_mismatch(self, x2, y2): 103 | with pytest.raises(ValueError): 104 | x2 + y2 105 | 106 | def test_sub_shape_mismatch(self, x2, y2): 107 | with pytest.raises(ValueError): 108 | x2 - y2 109 | 110 | def test_mul_shape_mismatch(self, x2, y2): 111 | with pytest.raises(ValueError): 112 | x2 * y2 113 | 114 | def test_div_shape_mismatch(self, x2, y2): 115 | with pytest.raises(ValueError): 116 | x2 / y2 117 | 118 | def test_add_scalar(self, x, y): 119 | x._data = y.data + 1.0 120 | assert all(x.data == (y + 1.0).data) 121 | 122 | def test_radd_scalar(self, x, y): 123 | x._data = y.data + 1.0 124 | assert all(x.data == (1.0 + y).data) 125 | 126 | def test_pos_copies(self, y): 127 | z = +y 128 | assert all(z.data == y.data) 129 | assert z is not y 130 | 131 | def test_neg_copies(self, y): 132 | z = -y 133 | assert all(z.data == -y.data) 134 | assert z is not y 135 | 136 | def test_sub_scalar(self, x, y): 137 | x._data = y.data - 1.0 138 | assert all(x.data == (y - 1.0).data) 139 | 140 | def test_rsub_scalar(self, x, y): 141 | x._data = 1.0 - y.data 142 | assert all(x.data == (1.0 - y).data) 143 | 144 | def test_mul_scalar(self, x, y): 145 | x._data = 2 * y.data 146 | assert all(x.data == (y * 2.0).data) 147 | 148 | def test_rmul_scalar(self, x, y): 149 | x._data = 2 * y.data 150 | assert all(x.data == (2.0 * y).data) 151 | 152 | def test_div_scalar(self, x, y): 153 | x._data = 2 * y.data 154 | assert all((x / 2.0).data == y.data) 155 | 156 | def test_add_ftype(self, y, yi): 157 | x = y + yi 158 | assert x.data.dtype == np.float64 159 | 160 | def test_sub_ftype(self, y, yi): 161 | x = y - yi 162 | assert x.data.dtype == np.float64 163 | 164 | def test_mul_ftype(self, y, yi): 165 | x = y * yi 166 | assert x.data.dtype == np.float64 167 | 168 | def test_div_ftype(self, y, yi): 169 | x = y / yi 170 | assert x.data.dtype == np.float64 171 | 172 | def test_add_itype(self, y, yi): 173 | xi = yi + y 174 | assert xi.data.dtype == np.int64 175 | 176 | def test_sub_itype(self, y, yi): 177 | xi = yi - y 178 | assert xi.data.dtype == np.int64 179 | 180 | def test_mul_itype(self, y, yi): 181 | xi = yi * y 182 | assert xi.data.dtype == np.int64 183 | 184 | def test_div_itype(self, y, yi): 185 | xi = yi / y 186 | assert xi.data.dtype == np.int64 187 | 188 | def test_linalg_and_parloop(self, x, y): 189 | """Linear algebra operators should force computation""" 190 | x._data = np.zeros(x.dataset.total_size, dtype=np.float64) 191 | k = op2.Kernel('static void k(double *x) { *x = 1.0; }', 'k') 192 | op2.par_loop(k, x.dataset.set, x(op2.WRITE)) 193 | z = x + y 194 | assert all(z.data == y.data + 1) 195 | 196 | 197 | class TestLinAlgIop: 198 | 199 | """ 200 | Tests of linear algebra operators modifying a Dat in place. 201 | """ 202 | 203 | def test_iadd(self, x, y): 204 | x._data = 2 * y.data 205 | x += y 206 | assert all(x.data == 3 * y.data) 207 | 208 | def test_isub(self, x, y): 209 | x._data = 2 * y.data 210 | x -= y 211 | assert all(x.data == y.data) 212 | 213 | def test_imul(self, x, y): 214 | x._data = 2 * y.data 215 | x *= y 216 | assert all(x.data == 2 * y.data * y.data) 217 | 218 | def test_idiv(self, x, y): 219 | x._data = 2 * y.data 220 | x /= y 221 | assert all(x.data == 2.0) 222 | 223 | def test_iadd_shape_mismatch(self, x2, y2): 224 | with pytest.raises(ValueError): 225 | x2 += y2 226 | 227 | def test_isub_shape_mismatch(self, x2, y2): 228 | with pytest.raises(ValueError): 229 | x2 -= y2 230 | 231 | def test_imul_shape_mismatch(self, x2, y2): 232 | with pytest.raises(ValueError): 233 | x2 *= y2 234 | 235 | def test_idiv_shape_mismatch(self, x2, y2): 236 | with pytest.raises(ValueError): 237 | x2 /= y2 238 | 239 | def test_iadd_scalar(self, x, y): 240 | x._data = y.data + 1.0 241 | y += 1.0 242 | assert all(x.data == y.data) 243 | 244 | def test_isub_scalar(self, x, y): 245 | x._data = y.data - 1.0 246 | y -= 1.0 247 | assert all(x.data == y.data) 248 | 249 | def test_imul_scalar(self, x, y): 250 | x._data = 2 * y.data 251 | y *= 2.0 252 | assert all(x.data == y.data) 253 | 254 | def test_idiv_scalar(self, x, y): 255 | x._data = 2 * y.data 256 | x /= 2.0 257 | assert all(x.data == y.data) 258 | 259 | def test_iadd_ftype(self, y, yi): 260 | y += yi 261 | assert y.data.dtype == np.float64 262 | 263 | def test_isub_ftype(self, y, yi): 264 | y -= yi 265 | assert y.data.dtype == np.float64 266 | 267 | def test_imul_ftype(self, y, yi): 268 | y *= yi 269 | assert y.data.dtype == np.float64 270 | 271 | def test_idiv_ftype(self, y, yi): 272 | y /= yi 273 | assert y.data.dtype == np.float64 274 | 275 | def test_iadd_itype(self, y, yi): 276 | yi += y 277 | assert yi.data.dtype == np.int64 278 | 279 | def test_isub_itype(self, y, yi): 280 | yi -= y 281 | assert yi.data.dtype == np.int64 282 | 283 | def test_imul_itype(self, y, yi): 284 | yi *= y 285 | assert yi.data.dtype == np.int64 286 | 287 | def test_idiv_itype(self, y, yi): 288 | yi /= y 289 | assert yi.data.dtype == np.int64 290 | 291 | 292 | class TestLinAlgScalar: 293 | 294 | """ 295 | Tests of linear algebra operators return a scalar. 296 | """ 297 | 298 | def test_norm(self): 299 | s = op2.Set(2) 300 | n = op2.Dat(s, [3, 4], np.float64, "n") 301 | assert abs(n.norm - 5) < 1e-12 302 | 303 | def test_inner(self): 304 | s = op2.Set(2) 305 | n = op2.Dat(s, [3, 4], np.float64) 306 | o = op2.Dat(s, [4, 5], np.float64) 307 | 308 | ret = n.inner(o) 309 | 310 | assert abs(ret - 32) < 1e-12 311 | 312 | ret = o.inner(n) 313 | 314 | assert abs(ret - 32) < 1e-12 315 | 316 | def test_norm_mixed(self): 317 | s = op2.Set(1) 318 | 319 | n = op2.Dat(s, [3], np.float64) 320 | o = op2.Dat(s, [4], np.float64) 321 | 322 | md = op2.MixedDat([n, o]) 323 | 324 | assert abs(md.norm - 5) < 1e-12 325 | 326 | def test_inner_mixed(self): 327 | s = op2.Set(1) 328 | 329 | n = op2.Dat(s, [3], np.float64) 330 | o = op2.Dat(s, [4], np.float64) 331 | 332 | md = op2.MixedDat([n, o]) 333 | 334 | n1 = op2.Dat(s, [4], np.float64) 335 | o1 = op2.Dat(s, [5], np.float64) 336 | 337 | md1 = op2.MixedDat([n1, o1]) 338 | 339 | ret = md.inner(md1) 340 | 341 | assert abs(ret - 32) < 1e-12 342 | 343 | ret = md1.inner(md) 344 | 345 | assert abs(ret - 32) < 1e-12 346 | -------------------------------------------------------------------------------- /test/unit/test_petsc.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | """ 35 | PETSc specific unit tests 36 | """ 37 | 38 | 39 | import pytest 40 | import numpy as np 41 | 42 | from pyop2 import op2 43 | 44 | # If mpi4py or petsc4py are not available this test module is skipped 45 | mpi4py = pytest.importorskip("mpi4py") 46 | petsc4py = pytest.importorskip("petsc4py") 47 | 48 | 49 | class TestPETSc: 50 | 51 | def test_vec_norm_changes(self): 52 | s = op2.Set(1) 53 | d = op2.Dat(s) 54 | 55 | d.data[:] = 1 56 | 57 | with d.vec_ro as v: 58 | assert np.allclose(v.norm(), 1.0) 59 | 60 | d.data[:] = 2 61 | 62 | with d.vec_ro as v: 63 | assert np.allclose(v.norm(), 2.0) 64 | 65 | def test_mixed_vec_access(self): 66 | s = op2.Set(1) 67 | ms = op2.MixedSet([s, s]) 68 | d = op2.MixedDat(ms) 69 | 70 | d.data[0][:] = 1.0 71 | d.data[1][:] = 2.0 72 | 73 | with d.vec_ro as v: 74 | assert np.allclose(v.array_r, [1.0, 2.0]) 75 | 76 | d.data[0][:] = 0.0 77 | d.data[0][:] = 0.0 78 | 79 | with d.vec_wo as v: 80 | assert np.allclose(v.array_r, [1.0, 2.0]) 81 | v.array[:] = 1 82 | 83 | assert d.data[0][0] == 1 84 | assert d.data[1][0] == 1 85 | -------------------------------------------------------------------------------- /test/unit/test_vector_map.py: -------------------------------------------------------------------------------- 1 | # This file is part of PyOP2 2 | # 3 | # PyOP2 is Copyright (c) 2012, Imperial College London and 4 | # others. Please see the AUTHORS file in the main source directory for 5 | # a full list of copyright holders. All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # * The name of Imperial College London or that of other 17 | # contributors may not be used to endorse or promote products 18 | # derived from this software without specific prior written 19 | # permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS 22 | # ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 | # OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import pytest 36 | import numpy 37 | 38 | from pyop2 import op2 39 | 40 | 41 | def _seed(): 42 | return 0.02041724 43 | 44 | 45 | nnodes = 4096 46 | nele = nnodes // 2 47 | 48 | 49 | @pytest.fixture(scope='module') 50 | def node(): 51 | return op2.Set(nnodes, 'node') 52 | 53 | 54 | @pytest.fixture(scope='module') 55 | def ele(): 56 | return op2.Set(nele, 'ele') 57 | 58 | 59 | @pytest.fixture(scope='module') 60 | def dnode(node): 61 | return op2.DataSet(node, 1, 'dnode') 62 | 63 | 64 | @pytest.fixture(scope='module') 65 | def dnode2(node): 66 | return op2.DataSet(node, 2, 'dnode2') 67 | 68 | 69 | @pytest.fixture(scope='module') 70 | def dele(ele): 71 | return op2.DataSet(ele, 1, 'dele') 72 | 73 | 74 | @pytest.fixture(scope='module') 75 | def dele2(ele): 76 | return op2.DataSet(ele, 2, 'dele2') 77 | 78 | 79 | @pytest.fixture 80 | def d1(dnode): 81 | return op2.Dat(dnode, numpy.zeros(nnodes), dtype=numpy.int32) 82 | 83 | 84 | @pytest.fixture 85 | def d2(dnode2): 86 | return op2.Dat(dnode2, numpy.zeros(2 * nnodes), dtype=numpy.int32) 87 | 88 | 89 | @pytest.fixture 90 | def vd1(dele): 91 | return op2.Dat(dele, numpy.zeros(nele), dtype=numpy.int32) 92 | 93 | 94 | @pytest.fixture 95 | def vd2(dele2): 96 | return op2.Dat(dele2, numpy.zeros(2 * nele), dtype=numpy.int32) 97 | 98 | 99 | @pytest.fixture(scope='module') 100 | def node2ele(node, ele): 101 | vals = numpy.arange(nnodes) / 2 102 | return op2.Map(node, ele, 1, vals, 'node2ele') 103 | 104 | 105 | class TestVectorMap: 106 | 107 | """ 108 | Vector Map Tests 109 | """ 110 | 111 | def test_sum_nodes_to_edges(self): 112 | """Creates a 1D grid with edge values numbered consecutively. 113 | Iterates over edges, summing the node values.""" 114 | 115 | nedges = nnodes - 1 116 | nodes = op2.Set(nnodes, "nodes") 117 | edges = op2.Set(nedges, "edges") 118 | 119 | node_vals = op2.Dat( 120 | nodes, numpy.array(range(nnodes), dtype=numpy.uint32), numpy.uint32, "node_vals") 121 | edge_vals = op2.Dat( 122 | edges, numpy.array([0] * nedges, dtype=numpy.uint32), numpy.uint32, "edge_vals") 123 | 124 | e_map = numpy.array([(i, i + 1) 125 | for i in range(nedges)], dtype=numpy.uint32) 126 | edge2node = op2.Map(edges, nodes, 2, e_map, "edge2node") 127 | 128 | kernel_sum = """ 129 | static void sum(unsigned int* edge, unsigned int *nodes) { 130 | *edge = nodes[0] + nodes[1]; 131 | } 132 | """ 133 | op2.par_loop(op2.Kernel(kernel_sum, "sum"), edges, 134 | edge_vals(op2.WRITE), 135 | node_vals(op2.READ, edge2node)) 136 | 137 | expected = numpy.asarray( 138 | range(1, nedges * 2 + 1, 2)) 139 | assert all(expected == edge_vals.data) 140 | 141 | def test_read_1d_vector_map(self, node, d1, vd1, node2ele): 142 | vd1.data[:] = numpy.arange(nele) 143 | k = """ 144 | static void k(int *d, int *vd) { 145 | *d = vd[0]; 146 | }""" 147 | op2.par_loop(op2.Kernel(k, 'k'), node, 148 | d1(op2.WRITE), 149 | vd1(op2.READ, node2ele)) 150 | assert all(d1.data[::2] == vd1.data) 151 | assert all(d1.data[1::2] == vd1.data) 152 | 153 | def test_write_1d_vector_map(self, node, vd1, node2ele): 154 | k = """ 155 | static void k(int *vd) { 156 | vd[0] = 2; 157 | } 158 | """ 159 | 160 | op2.par_loop(op2.Kernel(k, 'k'), node, 161 | vd1(op2.WRITE, node2ele)) 162 | assert all(vd1.data == 2) 163 | 164 | def test_inc_1d_vector_map(self, node, d1, vd1, node2ele): 165 | vd1.data[:] = 3 166 | d1.data[:] = numpy.arange(nnodes).reshape(d1.data.shape) 167 | 168 | k = """ 169 | static void k(int *vd, int *d) { 170 | vd[0] += *d; 171 | }""" 172 | op2.par_loop(op2.Kernel(k, 'k'), node, 173 | vd1(op2.INC, node2ele), 174 | d1(op2.READ)) 175 | expected = numpy.zeros_like(vd1.data) 176 | expected[:] = 3 177 | expected += numpy.arange( 178 | start=0, stop=nnodes, step=2).reshape(expected.shape) 179 | expected += numpy.arange( 180 | start=1, stop=nnodes, step=2).reshape(expected.shape) 181 | assert all(vd1.data == expected) 182 | 183 | def test_read_2d_vector_map(self, node, d2, vd2, node2ele): 184 | vd2.data[:] = numpy.arange(nele * 2).reshape(nele, 2) 185 | k = """ 186 | static void k(int d[2], int vd[1][2]) { 187 | d[0] = vd[0][0]; 188 | d[1] = vd[0][1]; 189 | }""" 190 | op2.par_loop(op2.Kernel(k, 'k'), node, 191 | d2(op2.WRITE), 192 | vd2(op2.READ, node2ele)) 193 | assert all(d2.data[::2, 0] == vd2.data[:, 0]) 194 | assert all(d2.data[::2, 1] == vd2.data[:, 1]) 195 | assert all(d2.data[1::2, 0] == vd2.data[:, 0]) 196 | assert all(d2.data[1::2, 1] == vd2.data[:, 1]) 197 | 198 | def test_write_2d_vector_map(self, node, vd2, node2ele): 199 | k = """ 200 | static void k(int vd[1][2]) { 201 | vd[0][0] = 2; 202 | vd[0][1] = 3; 203 | } 204 | """ 205 | 206 | op2.par_loop(op2.Kernel(k, 'k'), node, 207 | vd2(op2.WRITE, node2ele)) 208 | assert all(vd2.data[:, 0] == 2) 209 | assert all(vd2.data[:, 1] == 3) 210 | 211 | def test_inc_2d_vector_map(self, node, d2, vd2, node2ele): 212 | vd2.data[:, 0] = 3 213 | vd2.data[:, 1] = 4 214 | d2.data[:] = numpy.arange(2 * nnodes).reshape(d2.data.shape) 215 | 216 | k = """ 217 | static void k(int vd[1][2], int d[2]) { 218 | vd[0][0] += d[0]; 219 | vd[0][1] += d[1]; 220 | }""" 221 | op2.par_loop(op2.Kernel(k, 'k'), node, 222 | vd2(op2.INC, node2ele), 223 | d2(op2.READ)) 224 | 225 | expected = numpy.zeros_like(vd2.data) 226 | expected[:, 0] = 3 227 | expected[:, 1] = 4 228 | expected[:, 0] += numpy.arange(start=0, stop=2 * nnodes, step=4) 229 | expected[:, 0] += numpy.arange(start=2, stop=2 * nnodes, step=4) 230 | expected[:, 1] += numpy.arange(start=1, stop=2 * nnodes, step=4) 231 | expected[:, 1] += numpy.arange(start=3, stop=2 * nnodes, step=4) 232 | assert all(vd2.data[:, 0] == expected[:, 0]) 233 | assert all(vd2.data[:, 1] == expected[:, 1]) 234 | 235 | 236 | if __name__ == '__main__': 237 | import os 238 | pytest.main(os.path.abspath(__file__)) 239 | --------------------------------------------------------------------------------