├── .gitignore ├── LICENSE ├── README.md ├── examples ├── boundary-conditions.yaml ├── broadcast.yaml ├── constants.yaml ├── hydro2d │ ├── .gitignore │ ├── BUILD-HOST-GEN │ ├── GIT-VERSION-GEN │ ├── LICENSE │ ├── Makefile │ ├── arch.hpp │ ├── array-macros.hpp │ ├── compare.cpp │ ├── config.c │ ├── hydro2d-x.yaml │ ├── hydro2d-y.yaml │ ├── pcl-hydro-core.cpp │ ├── pcl-hydro-params.cpp │ ├── pcl-hydro-util.cpp │ ├── pcl-hydro-vcore.cpp │ ├── pcl-hydro.hpp │ ├── run-tile.cpp │ ├── test.nml │ ├── timeseries.cpp │ ├── timeseries.hpp │ └── vtkfile.cpp ├── laplace5 │ ├── .gitignore │ ├── Makefile │ ├── laplace5-test.cpp │ └── laplace5.yaml ├── literals.yaml ├── reduction.yaml ├── split-loops.yaml ├── uninitialized.yaml ├── vectorization-inner.yaml └── vectorization-outer.yaml ├── hfav.py ├── hfav ├── __init__.py ├── analyze.py ├── c99.py ├── codegen.py ├── cpp.py ├── dag.py ├── dot.py ├── include │ ├── cpp-rotate.hpp │ └── hfav │ │ ├── c99-rotate.h │ │ └── cpp-rotate.hpp ├── inest.py ├── infer.py ├── ispace.py ├── iter_plot.py ├── old_cpp.py ├── parse.py └── term.py ├── regress-results └── test-examples.txt └── regress.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.dot 3 | *.png 4 | *.asm 5 | *.optrpt 6 | *.o 7 | *.mod 8 | *.out 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | GENERATED CODE EXEMPTION 6 | 7 | The output of this tool does not automatically import the Apache 8 | 2.0 license, except the output will continue to be subject to the 9 | limitation of liability clause in the Apache 2.0 license. Users may 10 | license their output under any license they choose but the liability 11 | of the authors of the tool for that output is governed by the 12 | limitation of liability clause in the Apache 2.0 license. 13 | 14 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 15 | 16 | 1. Definitions. 17 | 18 | "License" shall mean the terms and conditions for use, reproduction, 19 | and distribution as defined by Sections 1 through 9 of this document. 20 | 21 | "Licensor" shall mean the copyright owner or entity authorized by 22 | the copyright owner that is granting the License. 23 | 24 | "Legal Entity" shall mean the union of the acting entity and all 25 | other entities that control, are controlled by, or are under common 26 | control with that entity. For the purposes of this definition, 27 | "control" means (i) the power, direct or indirect, to cause the 28 | direction or management of such entity, whether by contract or 29 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 30 | outstanding shares, or (iii) beneficial ownership of such entity. 31 | 32 | "You" (or "Your") shall mean an individual or Legal Entity 33 | exercising permissions granted by this License. 34 | 35 | "Source" form shall mean the preferred form for making modifications, 36 | including but not limited to software source code, documentation 37 | source, and configuration files. 38 | 39 | "Object" form shall mean any form resulting from mechanical 40 | transformation or translation of a Source form, including but 41 | not limited to compiled object code, generated documentation, 42 | and conversions to other media types. 43 | 44 | "Work" shall mean the work of authorship, whether in Source or 45 | Object form, made available under the License, as indicated by a 46 | copyright notice that is included in or attached to the work 47 | (an example is provided in the Appendix below). 48 | 49 | "Derivative Works" shall mean any work, whether in Source or Object 50 | form, that is based on (or derived from) the Work and for which the 51 | editorial revisions, annotations, elaborations, or other modifications 52 | represent, as a whole, an original work of authorship. For the purposes 53 | of this License, Derivative Works shall not include works that remain 54 | separable from, or merely link (or bind by name) to the interfaces of, 55 | the Work and Derivative Works thereof. 56 | 57 | "Contribution" shall mean any work of authorship, including 58 | the original version of the Work and any modifications or additions 59 | to that Work or Derivative Works thereof, that is intentionally 60 | submitted to Licensor for inclusion in the Work by the copyright owner 61 | or by an individual or Legal Entity authorized to submit on behalf of 62 | the copyright owner. For the purposes of this definition, "submitted" 63 | means any form of electronic, verbal, or written communication sent 64 | to the Licensor or its representatives, including but not limited to 65 | communication on electronic mailing lists, source code control systems, 66 | and issue tracking systems that are managed by, or on behalf of, the 67 | Licensor for the purpose of discussing and improving the Work, but 68 | excluding communication that is conspicuously marked or otherwise 69 | designated in writing by the copyright owner as "Not a Contribution." 70 | 71 | "Contributor" shall mean Licensor and any individual or Legal Entity 72 | on behalf of whom a Contribution has been received by Licensor and 73 | subsequently incorporated within the Work. 74 | 75 | 2. Grant of Copyright License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | copyright license to reproduce, prepare Derivative Works of, 79 | publicly display, publicly perform, sublicense, and distribute the 80 | Work and such Derivative Works in Source or Object form. 81 | 82 | 3. Grant of Patent License. Subject to the terms and conditions of 83 | this License, each Contributor hereby grants to You a perpetual, 84 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 85 | (except as stated in this section) patent license to make, have made, 86 | use, offer to sell, sell, import, and otherwise transfer the Work, 87 | where such license applies only to those patent claims licensable 88 | by such Contributor that are necessarily infringed by their 89 | Contribution(s) alone or by combination of their Contribution(s) 90 | with the Work to which such Contribution(s) was submitted. If You 91 | institute patent litigation against any entity (including a 92 | cross-claim or counterclaim in a lawsuit) alleging that the Work 93 | or a Contribution incorporated within the Work constitutes direct 94 | or contributory patent infringement, then any patent licenses 95 | granted to You under this License for that Work shall terminate 96 | as of the date such litigation is filed. 97 | 98 | 4. Redistribution. You may reproduce and distribute copies of the 99 | Work or Derivative Works thereof in any medium, with or without 100 | modifications, and in Source or Object form, provided that You 101 | meet the following conditions: 102 | 103 | (a) You must give any other recipients of the Work or 104 | Derivative Works a copy of this License; and 105 | 106 | (b) You must cause any modified files to carry prominent notices 107 | stating that You changed the files; and 108 | 109 | (c) You must retain, in the Source form of any Derivative Works 110 | that You distribute, all copyright, patent, trademark, and 111 | attribution notices from the Source form of the Work, 112 | excluding those notices that do not pertain to any part of 113 | the Derivative Works; and 114 | 115 | (d) If the Work includes a "NOTICE" text file as part of its 116 | distribution, then any Derivative Works that You distribute must 117 | include a readable copy of the attribution notices contained 118 | within such NOTICE file, excluding those notices that do not 119 | pertain to any part of the Derivative Works, in at least one 120 | of the following places: within a NOTICE text file distributed 121 | as part of the Derivative Works; within the Source form or 122 | documentation, if provided along with the Derivative Works; or, 123 | within a display generated by the Derivative Works, if and 124 | wherever such third-party notices normally appear. The contents 125 | of the NOTICE file are for informational purposes only and 126 | do not modify the License. You may add Your own attribution 127 | notices within Derivative Works that You distribute, alongside 128 | or as an addendum to the NOTICE text from the Work, provided 129 | that such additional attribution notices cannot be construed 130 | as modifying the License. 131 | 132 | You may add Your own copyright statement to Your modifications and 133 | may provide additional or different license terms and conditions 134 | for use, reproduction, or distribution of Your modifications, or 135 | for any such Derivative Works as a whole, provided Your use, 136 | reproduction, and distribution of the Work otherwise complies with 137 | the conditions stated in this License. 138 | 139 | 5. Submission of Contributions. Unless You explicitly state otherwise, 140 | any Contribution intentionally submitted for inclusion in the Work 141 | by You to the Licensor shall be under the terms and conditions of 142 | this License, without any additional terms or conditions. 143 | Notwithstanding the above, nothing herein shall supersede or modify 144 | the terms of any separate license agreement you may have executed 145 | with Licensor regarding such Contributions. 146 | 147 | 6. Trademarks. This License does not grant permission to use the trade 148 | names, trademarks, service marks, or product names of the Licensor, 149 | except as required for reasonable and customary use in describing the 150 | origin of the Work and reproducing the content of the NOTICE file. 151 | 152 | 7. Disclaimer of Warranty. Unless required by applicable law or 153 | agreed to in writing, Licensor provides the Work (and each 154 | Contributor provides its Contributions) on an "AS IS" BASIS, 155 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 156 | implied, including, without limitation, any warranties or conditions 157 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 158 | PARTICULAR PURPOSE. You are solely responsible for determining the 159 | appropriateness of using or redistributing the Work and assume any 160 | risks associated with Your exercise of permissions under this License. 161 | 162 | 8. Limitation of Liability. In no event and under no legal theory, 163 | whether in tort (including negligence), contract, or otherwise, 164 | unless required by applicable law (such as deliberate and grossly 165 | negligent acts) or agreed to in writing, shall any Contributor be 166 | liable to You for damages, including any direct, indirect, special, 167 | incidental, or consequential damages of any character arising as a 168 | result of this License or out of the use or inability to use the 169 | Work (including but not limited to damages for loss of goodwill, 170 | work stoppage, computer failure or malfunction, or any and all 171 | other commercial damages or losses), even if such Contributor 172 | has been advised of the possibility of such damages. 173 | 174 | 9. Accepting Warranty or Additional Liability. While redistributing 175 | the Work or Derivative Works thereof, You may choose to offer, 176 | and charge a fee for, acceptance of support, warranty, indemnity, 177 | or other liability obligations and/or rights consistent with this 178 | License. However, in accepting such obligations, You may act only 179 | on Your own behalf and on Your sole responsibility, not on behalf 180 | of any other Contributor, and only if You agree to indemnify, 181 | defend, and hold each Contributor harmless for any liability 182 | incurred by, or claims asserted against, such Contributor by reason 183 | of your accepting any such warranty or additional liability. 184 | 185 | END OF TERMS AND CONDITIONS 186 | 187 | APPENDIX: How to apply the Apache License to your work. 188 | 189 | To apply the Apache License to your work, attach the following 190 | boilerplate notice, with the fields enclosed by brackets "{}" 191 | replaced with your own identifying information. (Don't include 192 | the brackets!) The text should be enclosed in the appropriate 193 | comment syntax for the file format. We also recommend that a 194 | file or class name and description of purpose be included on the 195 | same "printed page" as the copyright notice for easier 196 | identification within third-party archives. 197 | 198 | Copyright {yyyy} {name of copyright owner} 199 | 200 | Licensed under the Apache License, Version 2.0 (the "License"); 201 | you may not use this file except in compliance with the License. 202 | You may obtain a copy of the License at 203 | 204 | http://www.apache.org/licenses/LICENSE-2.0 205 | 206 | Unless required by applicable law or agreed to in writing, software 207 | distributed under the License is distributed on an "AS IS" BASIS, 208 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 209 | See the License for the specific language governing permissions and 210 | limitations under the License. 211 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DISCONTINUATION OF PROJECT. 2 | 3 | This project will no longer be maintained by Intel. 4 | 5 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project. 6 | 7 | Intel no longer accepts patches to this project. 8 | 9 | If you have an ongoing need to use this project, are interested in independently developing it, or would like to maintain patches for the open source software community, please create your own fork of this project. 10 | HFAV 11 | ==== 12 | 13 | High-performance Fusion And Vectorization (formerly "Rolling Thunder") 14 | 15 | Overview 16 | -------- 17 | 18 | This is a prototype that demonstrates how certain code transformation techniques may be automatically applied to a suitable input; in particular, it aims to automatically fuse and vectorize kernels while minimizing intermediate storage. For computations where *pure* kernels are applied to regular grids, particularly where kernels pass information to on another, hfav may provide speedup. 19 | 20 | hfav accepts a declarative input file that specifies the function prototype for each kernel along with information about each parameter and the iteration space that the kernel should be applied to. Terminal conditions (*axioms* and *goals*) are supplied, along with options about code generation and output. The resulting output is indended to be linked into the original code, perferably in a fashion that enables inlining (which is necessary for auto-vectorization). 21 | 22 | License 23 | ------- 24 | 25 | This software and all but one example is distributed with a modified Apache License 2.0. See LICENSE for details; the modification is an exception that code generated with this software is only subject to the limited liability clauses of the Apache 2.0 license (in particular, we don't retain copyright on generated code). 26 | 27 | The hydro2d example is subject to the CeCILL license; see examples/hydro2d/LICENSE for details. 28 | 29 | Usage 30 | ----- 31 | 32 | hfav.py is the top-level interface to hfav. It is invoked as: 33 | 34 | hfav.py [-h] [-d] [-o OUTPUT_LOCATION] [-s STORAGE] [-v {0,1,2}] FILE 35 | 36 | ### Options 37 | 38 | - `FILE`: input YAML file (*mandatory*) 39 | - `-h, --help`: show help message and exit 40 | - `-d, --debug`: enable debug output 41 | - `-o OUTPUT_LOCATION, --output OUTPUT_LOCATION`: override output location; "-" gives stdout 42 | - `-s STORAGE, --storage STORAGE`: where to place temporary arrays (default: stack) 43 | - `-v {0,1,2}, --verbosity {0,1,2}` level of verbosity while processing 44 | 45 | It can be useful to export the environment variable `HFAVROOT` to the `hfav/` directory contained in this source distribution. 46 | 47 | Examples 48 | -------- 49 | 50 | The YAML format accepted by hfav is best understood by looking at examples. See the `examples/` directory for more detail. 51 | 52 | The `hydro2d/` directory contains a more comprehensive example complete with Makefile integration. 53 | 54 | More information 55 | ---------------- 56 | 57 | A paper on the ideas behind HFAV will be presented at the Seventh Internation Workshop on Domain-Specific Languages and High-Level Framworks for High Performance Computing (WOLFHPC) at ACM/IEEE Supercomputing in Denver in November 2017. 58 | 59 | Jason D. Sewall and Simon J. Pennycook. 2017. High-Performance Code Generation though Fusion and Vectorization. To be presented at WOLFHPC 2017, Denver. Nobember 2017. 60 | 61 | A preprint is available at arXiv: [https://arxiv.org/abs/1710.08774](https://arxiv.org/abs/1710.08774). 62 | 63 | Contributors 64 | ------------ 65 | 66 | - John Pennycook (john.pennycook@intel.com) 67 | - Jason Sewall (jason.sewall@intel.com) 68 | -------------------------------------------------------------------------------- /examples/boundary-conditions.yaml: -------------------------------------------------------------------------------- 1 | # Example: boundary-conditions.yaml 2 | # Demonstrates usage of "code blocks" to implement boundary conditions. 3 | # The "code blocks" functionality is very brittle, and may break unexpectedly. 4 | 5 | kernels: 6 | 7 | flux_x: 8 | declaration: flux(cell_t lc, cell_t rc, flux_t &fx); 9 | inputs: | 10 | lc : cell[j?-1][i?] 11 | rc : cell[j?][i?] 12 | outputs: | 13 | fx : flux_x[j?][i?] 14 | 15 | integrate: 16 | declaration: integrate(flux_t lf, flux_t rf, cell_t &ic); 17 | inputs: | 18 | lf : boundary(flux_x[j?][i?]) 19 | rf : boundary(flux_x[j?+1][i?]) 20 | outputs: | 21 | ic : integrated(cell[j?][i?]) 22 | 23 | # Code blocks contain arbitrary user code (e.g. MPI) 24 | # Inputs and outputs can contain wildcards and ranges, and can optionally redirect to a global variable. 25 | # The user is responsible for ensuring that: 26 | # 1) References to hfav temporaries use appropriately mangled names; and/or 27 | # 2) Appropriate rules are in place to move data between hfav temporaries and global variables. 28 | code blocks: 29 | exchange_fluxes: 30 | code: | 31 | exchange_fluxes(__hfav_flux_x, __hfav_boundary_flux_x); 32 | inputs: | 33 | flux_t flux_x[j*][i*] 34 | outputs: | 35 | flux_t boundary(flux_x[j?-1:+1][i?]) 36 | 37 | globals: 38 | 39 | inputs: | 40 | cell_t d_cell[j?][i?] => cell[j?][i?] 41 | 42 | outputs: | 43 | integrated(cell[j][i]) => cell_t d_cell[j][i] 44 | 45 | codegen options: 46 | 47 | loops: 48 | - 49 | iter_ident: i 50 | start: first_i 51 | end: last_i 52 | stride: 1 53 | - 54 | iter_ident: j 55 | start: first_j 56 | end: last_j 57 | stride: 1 58 | 59 | loop order: [j, i] 60 | 61 | language : C99 62 | vector loop: None 63 | prefix : __hfav_ 64 | types: 65 | cell_t: float64 66 | flux_t: float 67 | clamp_t: int32 68 | -------------------------------------------------------------------------------- /examples/broadcast.yaml: -------------------------------------------------------------------------------- 1 | # Example: broadcast.yaml 2 | # Demonstrates re-use of lower-dimensionality quantities across dimensions. 3 | 4 | kernels: 5 | 6 | compute_slice: 7 | declaration: compute_slice(double in, double& out); 8 | inputs: | 9 | in: input[i?][j?] 10 | outputs: | 11 | out: slice[i?][j?] 12 | 13 | broadcast: 14 | declaration: broadcast(double slice, double& cube); 15 | inputs: | 16 | slice: slice[i?][j?] 17 | outputs: | 18 | cube: cube[i?][j?][k?] 19 | 20 | globals: 21 | inputs: | 22 | double input[i?][j?] 23 | outputs: | 24 | cube[i][j][k] => double output[i][j][k] 25 | 26 | codegen options: 27 | 28 | loops: 29 | - 30 | iter_ident: i 31 | start: first_i 32 | end: last_i 33 | stride: 1 34 | - 35 | iter_ident: j 36 | start: first_j 37 | end: last_j 38 | stride: 1 39 | - 40 | iter_ident: k 41 | start: first_k 42 | end: last_k 43 | stride: 1 44 | 45 | loop order: [i, j, k] 46 | 47 | language: C99 48 | prefix: __hfav_ 49 | vector loop: None 50 | -------------------------------------------------------------------------------- /examples/constants.yaml: -------------------------------------------------------------------------------- 1 | # Example: constants.yaml 2 | # Demonstrates usage of scalar constants (dt). 3 | 4 | kernels: 5 | 6 | update: 7 | declaration: update(double dt, double om1, double old, double op1, double &new); 8 | inputs: | 9 | dt: dt 10 | om1: old[j?][i?-1] 11 | old: old[j?][i?] 12 | op1: old[j?][i?+1] 13 | outputs: | 14 | new: new[j?][i?] 15 | 16 | globals: 17 | 18 | inputs: | 19 | double dt 20 | double cell[j?][i?] => old[j?][i?] 21 | outputs: | 22 | new[j][i] => double cell[j][i] 23 | 24 | codegen options: 25 | 26 | loops: 27 | - 28 | iter_ident: i 29 | start: first_i 30 | end: last_i 31 | stride: 1 32 | - 33 | iter_ident: j 34 | start: first_j 35 | end: last_j 36 | stride: 1 37 | 38 | loop order: [j, i] 39 | 40 | language: C99 41 | prefix: __hfav_ 42 | vector loop: None 43 | -------------------------------------------------------------------------------- /examples/hydro2d/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | run-tile 3 | run-gen 4 | compare 5 | version 6 | Dep/ 7 | Hydro.pvd 8 | *.idx 9 | *.pak 10 | hydro2d-x-gen.hpp 11 | hydro2d-y-gen.hpp 12 | -------------------------------------------------------------------------------- /examples/hydro2d/BUILD-HOST-GEN: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BUILD_NAME=$(uname -n) 4 | BUILD_MACHINE=$(uname -m) 5 | BUILD_SYSTEM=$(uname -s) 6 | BUILD_RELEASE=$(uname -r) 7 | 8 | echo "$BUILD_NAME-$BUILD_MACHINE-$BUILD_SYSTEM-$BUILD_RELEASE" 9 | -------------------------------------------------------------------------------- /examples/hydro2d/GIT-VERSION-GEN: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LF=' 4 | ' 5 | 6 | GITDIR="$PWD" 7 | OUTDIR="$PWD" 8 | if test x"$2" != x"" 9 | then 10 | OUTDIR="$2" 11 | fi 12 | if test x"$1" != x"" 13 | then 14 | GITDIR="$1" 15 | fi 16 | 17 | # First see if there is a version file (included in release tarballs), 18 | # then try git-describe, then default. 19 | if test x"$(git -C $GITDIR rev-parse --git-dir 2>/dev/null)" != x"" 20 | then 21 | VN=$(git -C $GITDIR describe --abbrev=4 HEAD 2>/dev/null) 22 | if test x"$VN" == x"" -o x"$VN" == x"$LF" 23 | then 24 | VN=$(git -C $GITDIR log --pretty=format:"%h" -n 1) 25 | fi 26 | git update-index -q --refresh >/dev/null 2>/dev/null 27 | test -z "$(git -C $GITDIR diff-index --name-only HEAD --)" || 28 | VN="$VN.dirty" 29 | echo "$VN"> $OUTDIR/version.cand 30 | if test ! -f $OUTDIR/version -o -n "$(diff -q $OUTDIR/version.cand $OUTDIR/version 2>/dev/null)" 31 | then 32 | echo "$VN"> $OUTDIR/version 33 | fi 34 | rm -rf $OUTDIR/version.cand 35 | else 36 | if test -f $OUTDIR/version 37 | then 38 | VN=$(cat $OUTDIR/version) 39 | else 40 | VN="unknown" 41 | fi 42 | fi 43 | 44 | echo $(expr "$VN" : v*'\(.*\)') 45 | -------------------------------------------------------------------------------- /examples/hydro2d/Makefile: -------------------------------------------------------------------------------- 1 | CXX=icpc 2 | 3 | GIT_VERSION:=$(shell sh -c './GIT-VERSION-GEN') 4 | COMPILER_VERSION:="$(CXX)-$(shell $(CXX) --version | head -n1 | cut -d' ' -f4)" 5 | BUILD_HOST=$(shell sh -c './BUILD-HOST-GEN') 6 | 7 | HFAV_DIR=../../ 8 | HFAVROOT?=$(HFAV_DIR)/hfav 9 | 10 | HFAV=$(HFAV_DIR)/hfav.py 11 | 12 | SSE_CXXFLAGS=-DSSE -xSSE4.2 13 | KNC_CXXFLAGS=-DKNC -mmic 14 | AVX_CXXFLAGS=-DAVX -xAVX 15 | AVX2_CXXFLAGS=-DAVX -xCORE-AVX2 16 | KNL_CXXFLAGS=-DAVX3 -xMIC-AVX512 17 | SKL_CXXFLAGS=-DAVX3 -xCORE-AVX512 18 | 19 | COMPILER_INC?=/opt/intel/compilers_and_libraries/linux/include/mic 20 | ARCH_CXXFLAGS= 21 | 22 | ifeq ($(ARCH),SSE) 23 | ARCH_CXXFLAGS=$(SSE_CXXFLAGS) 24 | endif 25 | 26 | ifeq ($(ARCH),KNC) 27 | ARCH_CXXFLAGS=$(KNC_CXXFLAGS) 28 | endif 29 | 30 | ifeq ($(ARCH),AVX) 31 | ARCH_CXXFLAGS=$(AVX_CXXFLAGS) 32 | endif 33 | 34 | ifeq ($(ARCH),AVX2) 35 | ARCH_CXXFLAGS=$(AVX2_CXXFLAGS) 36 | endif 37 | 38 | ifeq ($(ARCH),KNL) 39 | ARCH_CXXFLAGS=$(KNL_CXXFLAGS) -I$(COMPILER_INC) 40 | endif 41 | 42 | ifeq ($(ARCH),SKL) 43 | ARCH_CXXFLAGS=$(KNL_CXXFLAGS) 44 | endif 45 | 46 | 47 | BASE_CXXFLAGS=-DGIT_VERSION=\"$(GIT_VERSION)\" -DCOMPILER_VERSION=\"$(COMPILER_VERSION)\" \ 48 | -DBUILD_HOST=\"$(BUILD_HOST)\" \ 49 | -DDOUBLE \ 50 | -Wall -wd167 -ggdb \ 51 | -qopenmp \ 52 | -lnuma \ 53 | -O3 -ipo -restrict -qopt-report=5 -inline-forceinline \ 54 | -no-prec-div -no-prec-sqrt -fimf-precision=low -fimf-domain-exclusion=15 \ 55 | -I$(HFAVROOT)/include 56 | 57 | ull: run-tile run-gen compare 58 | 59 | run-tile: run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp pcl-hydro.hpp arch.hpp config.c Makefile 60 | $(CXX) -o $@ run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp config.c $(BASE_CXXFLAGS) $(ARCH_CXXFLAGS) 61 | 62 | hydro2d-x-gen.hpp: hydro2d-x.yaml $(HFAV) $(HFAV_DIR)/hfav/infer.py $(HFAV_DIR)/hfav/dag.py $(HFAV_DIR)/hfav/analyze.py $(HFAV_DIR)/hfav/codegen.py $(HFAV_DIR)/hfav/term.py $(HFAV_DIR)/hfav/c99.py $(HFAV_DIR)/hfav/parse.py 63 | $(HFAV) $(HFAV_FLAGS) hydro2d-x.yaml 64 | 65 | hydro2d-y-gen.hpp: hydro2d-y.yaml $(HFAV) $(HFAV_DIR)/hfav/infer.py $(HFAV_DIR)/hfav/dag.py $(HFAV_DIR)/hfav/analyze.py $(HFAV_DIR)/hfav/codegen.py $(HFAV_DIR)/hfav/term.py $(HFAV_DIR)/hfav/c99.py $(HFAV_DIR)/hfav/parse.py 66 | $(HFAV) $(HFAV_FLAGS) hydro2d-y.yaml 67 | 68 | run-gen: run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp pcl-hydro.hpp config.c arch.hpp Makefile hydro2d-x-gen.hpp hydro2d-y-gen.hpp 69 | $(CXX) -o $@ run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp config.c -I $(HFAVROOT)/include/ $(BASE_CXXFLAGS) $(ARCH_CXXFLAGS) -DUSE_GEN_X -DUSE_GEN_Y -qopt-report=5 70 | 71 | compare: compare.cpp timeseries.cpp Makefile 72 | $(CXX) -o $@ compare.cpp timeseries.cpp $(BASE_CXXFLAGS) 73 | 74 | clean: 75 | rm -rf run-tile compare hydro2d-x-gen.hpp hydro2d-y-gen.hpp *.optrpt 76 | -------------------------------------------------------------------------------- /examples/hydro2d/array-macros.hpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/array-macros.hpp: utilities for HPC codes 2 | 3 | (C) Jason Sewall : Intel -- initial version 4 | (C) John Pennycook : Intel -- augmentations to above version 5 | */ 6 | /* 7 | This software is governed by the CeCILL license under French law and 8 | abiding by the rules of distribution of free software. You can use, 9 | modify and/ or redistribute the software under the terms of the CeCILL 10 | license as circulated by CEA, CNRS and INRIA at the following URL 11 | "http://www.cecill.info". 12 | 13 | As a counterpart to the access to the source code and rights to copy, 14 | modify and redistribute granted by the license, users are provided only 15 | with a limited warranty and the software's author, the holder of the 16 | economic rights, and the successive licensors have only limited 17 | liability. 18 | 19 | In this respect, the user's attention is drawn to the risks associated 20 | with loading, using, modifying and/or developing or reproducing the 21 | software by the user in light of its specific status of free software, 22 | that may mean that it is complicated to manipulate, and that also 23 | therefore means that it is reserved for developers and experienced 24 | professionals having in-depth computer knowledge. Users are therefore 25 | encouraged to load and test the software's suitability as regards their 26 | requirements in conditions enabling the security of their systems and/or 27 | data to be ensured and, more generally, to use and operate it in the 28 | same conditions as regards security. 29 | 30 | The fact that you are presently reading this means that you have had 31 | knowledge of the CeCILL license and that you accept its terms. 32 | */ 33 | 34 | #ifndef _ARRAY_MACROS_HPP__ 35 | #define _ARRAY_MACROS_HPP__ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | __attribute__((noreturn)) 46 | static inline void die(const char *fmt, ...) 47 | { 48 | va_list val; 49 | va_start(val, fmt); 50 | vfprintf(stderr, fmt, val); 51 | va_end(val); 52 | exit(EXIT_FAILURE); 53 | } 54 | 55 | typedef unsigned long long u64; 56 | 57 | #define CACHE_LINE_BYTES 64 58 | 59 | inline void divvy(u64 *start, u64 *end, const u64 nitems, u64 chunkno, u64 nchunks) 60 | { 61 | const u64 items_per_chunk = nitems/nchunks; 62 | const u64 remainder = nitems - nchunks*items_per_chunk; 63 | 64 | *start = chunkno*items_per_chunk + std::min(chunkno, remainder); 65 | *end = (chunkno+1)*items_per_chunk + std::min(chunkno+1, remainder); 66 | } 67 | 68 | inline unsigned long long round_to_alignment(unsigned long long x, int alignment) 69 | { 70 | if(x & (alignment-1)) 71 | x = (x & ~(alignment-1)) + alignment; 72 | return x; 73 | } 74 | 75 | 76 | inline void *aligned_malloc(size_t bytes) 77 | { 78 | void *ptr; 79 | if(posix_memalign(&ptr, CACHE_LINE_BYTES, bytes)) 80 | return 0; 81 | return ptr; 82 | } 83 | 84 | inline void aligned_free(void *ptr) 85 | { 86 | free(ptr); 87 | } 88 | 89 | static void *xmalloc(size_t sze, const char *name) 90 | { 91 | void *res = malloc(sze); 92 | if(!res) 93 | die("Failed to allocate %zub for %s!\n", sze, name); 94 | return res; 95 | } 96 | 97 | static void xmalloc_free(void *p) 98 | { 99 | free(p); 100 | } 101 | 102 | static void *xaligned_malloc(size_t sze, const char *name) 103 | { 104 | void *res = aligned_malloc(sze); 105 | if(!res) 106 | die("Failed to allocate %zub for %s!\n", sze, name); 107 | return res; 108 | } 109 | 110 | static void xaligned_malloc_free(void *p) 111 | { 112 | free(p); 113 | } 114 | 115 | static void *xrealloc(void *ptr, size_t sze, const char *name) 116 | { 117 | void *res = realloc(ptr, sze); 118 | if(!res) 119 | die("Failed to allocate %zub for %s!\n", sze, name); 120 | return res; 121 | } 122 | 123 | static char *xstrdup(const char *ptr, const char *name) 124 | { 125 | char *res = strdup(ptr); 126 | if(!res) 127 | die("Failed to strdup %zub for %s!\n", strlen(ptr), name); 128 | return res; 129 | } 130 | 131 | static void xstrdup_free(void *ptr) 132 | { 133 | free(ptr); 134 | } 135 | 136 | static char *xstrndup(const char *ptr, size_t n, const char *name) 137 | { 138 | char *res = strndup(ptr,n ); 139 | if(!res) 140 | die("Failed to strndup %zub for %s!\n", strlen(ptr), name); 141 | return res; 142 | } 143 | 144 | static int xsnprintf(char *str, size_t n, const char *fmt, ...) 145 | { 146 | va_list val; 147 | va_start(val, fmt); 148 | int wanted_out = vsnprintf(str, n, fmt, val); 149 | va_end(val); 150 | if(wanted_out > n) 151 | { 152 | die("Ran out of buffer space for output string!\n"); 153 | } 154 | return wanted_out; 155 | } 156 | 157 | static FILE *xfopen_write(const char *str, char *mode) 158 | { 159 | int fd = creat(str, S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP); 160 | if(fd == -1) 161 | return 0; 162 | 163 | FILE *file = fdopen(fd, mode); 164 | return file; 165 | } 166 | 167 | static FILE *xfopen_read(const char *str, char *mode) 168 | { 169 | int fd = open(str, O_RDONLY); 170 | if(fd == -1) 171 | return 0; 172 | 173 | FILE *file = fdopen(fd, mode); 174 | return file; 175 | } 176 | 177 | static char *human_format(double in) 178 | { 179 | static const char cf_chars[] = {'t', 'g', 'm', 'k', 0}; 180 | static const unsigned long long cf_vals[] = { 181 | 1ULL << 40, 182 | 1ULL << 30, 183 | 1ULL << 20, 184 | 1ULL << 10, 185 | 0ULL, 186 | }; 187 | const double ain = std::abs(in); 188 | 189 | int i; 190 | for(i = 0; ain < cf_vals[i]; ++i); 191 | 192 | double v = in/std::max(cf_vals[i], 1ULL); 193 | char buff[1024]; 194 | xsnprintf(buff, 1023, "%.1lf%c", v, cf_chars[i]); 195 | return strdup(buff); 196 | } 197 | 198 | static long long suffixed_atoll(const char *nptr, int nthreads) 199 | { 200 | char *mod; 201 | double mul = strtod(nptr, &mod); 202 | while(*mod) 203 | { 204 | switch(*mod) 205 | { 206 | case 't': 207 | mul *= nthreads; 208 | break; 209 | case 'T': 210 | mul *= nthreads; 211 | break; 212 | case 'k': 213 | mul *= 1024; 214 | break; 215 | case 'K': 216 | mul *= 1000; 217 | break; 218 | case 'm': 219 | mul *= 1024*1024; 220 | break; 221 | case 'M': 222 | mul *= 1000000; 223 | break; 224 | case 'g': 225 | mul *= 1024*1024*1024; 226 | break; 227 | case 'G': 228 | mul *= 1000000000; 229 | break; 230 | default: 231 | return mul; 232 | } 233 | ++mod; 234 | } 235 | return mul; 236 | } 237 | 238 | #define DECLARE_ARRAY_ALL(type, name) \ 239 | int name##_n; \ 240 | int name##_n_allocd; \ 241 | type* name 242 | 243 | #define INIT_ARRAY(name, size) \ 244 | name##_n = 0; \ 245 | name##_n_allocd = size; \ 246 | name = (typeof(name)) xmalloc(sizeof(name[0])*name##_n_allocd, #name); 247 | 248 | #define INIT_ARRAY_ALIGNED(name, size) \ 249 | name##_n = 0; \ 250 | name##_n_allocd = size; \ 251 | name = (typeof(name)) xaligned_malloc(sizeof(name[0])*name##_n_allocd, #name); 252 | 253 | #define EXTEND_ARRAY(name, num) \ 254 | if(name##_n + num >= name##_n_allocd) \ 255 | { \ 256 | name##_n_allocd = (name##_n + num)*2; \ 257 | void *m = xrealloc(name, sizeof(name[0])*name##_n_allocd, #name); \ 258 | name = (typeof(name)) m; \ 259 | } 260 | 261 | #define EXTEND_ARRAY_ALIGNED(name, num) \ 262 | if(name##_n + num >= name##_n_allocd) \ 263 | { \ 264 | name##_n_allocd = (name##_n + num)*2; \ 265 | void *m = xaligned_malloc(sizeof(name[0])*name##_n_allocd, #name); \ 266 | memcpy(m, name, sizeof(name[0])*name##_n); \ 267 | aligned_free(name); \ 268 | name = (typeof(name)) m; \ 269 | } 270 | 271 | #define FREE_ARRAY_ALL(name) \ 272 | name##_n = 0; \ 273 | name##_n_allocd = 0; \ 274 | free(name); \ 275 | name = 0; 276 | 277 | #define FREE_ARRAY(name) \ 278 | name##_n = 0; \ 279 | free(name); \ 280 | name = 0; 281 | 282 | #define FREE_ARRAY_ALIGNED(name) \ 283 | name##_n = 0; \ 284 | aligned_free(name); \ 285 | name = 0; 286 | 287 | #endif 288 | -------------------------------------------------------------------------------- /examples/hydro2d/compare.cpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/compare.cpp: compare timeseries from hydro code 2 | 3 | (C) Jason Sewall : Intel -- initial version 4 | (C) John Pennycook : Intel -- augmentations to above version 5 | */ 6 | /* 7 | This software is governed by the CeCILL license under French law and 8 | abiding by the rules of distribution of free software. You can use, 9 | modify and/ or redistribute the software under the terms of the CeCILL 10 | license as circulated by CEA, CNRS and INRIA at the following URL 11 | "http://www.cecill.info". 12 | 13 | As a counterpart to the access to the source code and rights to copy, 14 | modify and redistribute granted by the license, users are provided only 15 | with a limited warranty and the software's author, the holder of the 16 | economic rights, and the successive licensors have only limited 17 | liability. 18 | 19 | In this respect, the user's attention is drawn to the risks associated 20 | with loading, using, modifying and/or developing or reproducing the 21 | software by the user in light of its specific status of free software, 22 | that may mean that it is complicated to manipulate, and that also 23 | therefore means that it is reserved for developers and experienced 24 | professionals having in-depth computer knowledge. Users are therefore 25 | encouraged to load and test the software's suitability as regards their 26 | requirements in conditions enabling the security of their systems and/or 27 | data to be ensured and, more generally, to use and operate it in the 28 | same conditions as regards security. 29 | 30 | The fact that you are presently reading this means that you have had 31 | knowledge of the CeCILL license and that you accept its terms. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include "timeseries.hpp" 42 | #include 43 | static const char usage_str[] = "USAGE:\t%s [-s start] [-e end] [-l] [-h] idxfile1 idxfile2\n"; 44 | 45 | static void usage(const char *name) 46 | { 47 | die(usage_str, basename(name)); 48 | } 49 | 50 | static void help(const char *name) 51 | { 52 | fprintf(stderr, usage_str, name); 53 | fprintf(stderr, "DESCRIPTION\n" 54 | "\t Compare timseries results with vortex particles\n"); 55 | fprintf(stderr, "OPTIONS\n" 56 | "\t-s,--start \n\t Start at frame (default 1)\n" 57 | "\t-e,--end \n\t Run up to (not inclusive, defaults to end of shorter)>\n" 58 | "\t-l,--last-only\n\t Only test last common frame in inputs (ignores -s and -e options)\n" 59 | "\t-h,--help\n\t print this help message\n" 60 | ); 61 | } 62 | 63 | bool compare(int nx, 64 | int ny, 65 | double *l2, 66 | double *linf, 67 | int *linfarg, 68 | int numstream, 69 | int frameno, 70 | const void *frame1, const void *frame2) 71 | { 72 | const int stride = nx*ny; 73 | 74 | for(int s = 0; s < numstream; ++s) 75 | { 76 | l2[s] = 0.0; 77 | linf[s] = 0.0; 78 | linfarg[s] = -1; 79 | const double *base1 = ((const double*)frame1) + s*stride; 80 | const double *base2 = ((const double*)frame2) + s*stride; 81 | 82 | for(int i = 0; i < stride; ++i) 83 | { 84 | l2[s] += (base1[i] - base2[i])*(base1[i] - base2[i]); 85 | if(linf[s] < std::abs(base1[i] - base2[i])) 86 | { 87 | linf[s] = std::abs(base1[i] - base2[i]); 88 | linfarg[s] = i; 89 | } 90 | } 91 | } 92 | return true; 93 | } 94 | 95 | int main(int argc, char *argv[]) 96 | { 97 | int start_frame = 0; 98 | int end_frame = -1; 99 | bool last_only = false; 100 | option opts[] = 101 | { 102 | {"start", required_argument, 0, 's'}, 103 | {"end", required_argument, 0, 'e'}, 104 | {"last-only", required_argument, 0, 'l'}, 105 | {"help", false, 0, 'h'}, 106 | {0, 0, 0, 0}, 107 | }; 108 | 109 | int opt; 110 | while((opt = getopt_long(argc, argv, "s:e:lh", opts, 0)) != -1) 111 | { 112 | switch(opt) 113 | { 114 | case 0: 115 | break; 116 | case 's': 117 | start_frame = atoi(optarg); 118 | if(start_frame < 0) 119 | die("--[s]start is %d, must be >= 0\n", start_frame); 120 | break; 121 | case 'e': 122 | end_frame = atoi(optarg); 123 | if(end_frame < 0) 124 | die("--[e]nd is %d, must be >= 0\n", end_frame); 125 | break; 126 | case 'l': 127 | last_only = true; 128 | break; 129 | case 'h': 130 | help(argv[0]); 131 | exit(0); 132 | default: 133 | usage(argv[0]); 134 | } 135 | } 136 | 137 | if(optind >= argc + 1) 138 | die("Expected 2 arguments (index files) after options\n"); 139 | 140 | timeseries_reader idx1; 141 | 142 | if(!idx1.load(argv[optind])) 143 | die("Can't load (first) index file %s\n", argv[optind]); 144 | 145 | timeseries_reader idx2; 146 | 147 | if(!idx2.load(argv[optind + 1])) 148 | die("Can't load (second) index file %s\n", argv[optind + 1]); 149 | 150 | int last_frame = std::min(idx1.frames_n, idx2.frames_n); 151 | if(end_frame != -1) 152 | last_frame = std::min(last_frame, end_frame); 153 | 154 | if(last_only) 155 | start_frame = last_frame-1; 156 | for(int current_frame = start_frame; current_frame < last_frame; ++current_frame) 157 | { 158 | double time1; 159 | size_t size1; 160 | const void *fr1 = idx1.get_frame(current_frame, &time1, &size1); 161 | if(!fr1) 162 | die("Woah, couldn't get frame %d from idx1", current_frame); 163 | 164 | double time2; 165 | size_t size2; 166 | const void *fr2 = idx2.get_frame(current_frame, &time2, &size2); 167 | if(!fr2) 168 | die("Woah, couldn't get frame %d from idx2", current_frame); 169 | 170 | if(time1 != time2) 171 | die("Frame %d: times differ! (First = %le, second = %le, first-second = %le)\n", current_frame, time1, time2, time1-time2); 172 | 173 | if(size1 != size2) 174 | die("Frame %d: sizes differ! (First = %zu, second = %zu)\n", current_frame, size1, size2); 175 | 176 | double l2[4]; 177 | double linf[4]; 178 | int linfarg[4]; 179 | 180 | size_t size; 181 | const int nx1 = ((const int*)(idx1.get_static("nx", &size)))[0]; 182 | const int ny1 = ((const int*)(idx1.get_static("ny", &size)))[0]; 183 | 184 | const int nx2 = ((const int*)(idx2.get_static("nx", &size)))[0]; 185 | const int ny2 = ((const int*)(idx2.get_static("ny", &size)))[0]; 186 | 187 | if(nx1 != nx2) 188 | die("Differing grid x dimensions! (First = %d, second = %d)\n", nx1, nx2); 189 | if(ny1 != ny2) 190 | die("Differing grid y dimensions! (First = %d, second = %d)\n", ny1, ny2); 191 | 192 | compare(nx1, ny1, l2, linf, linfarg, 4, current_frame, fr1, fr2); 193 | fprintf(stderr, "Frame %d\n", current_frame); 194 | for(int s = 0; s < 4; ++s) 195 | fprintf(stderr, " %d err:l2 = %le linf = %le (inf @ %d)\n", s, std::sqrt(l2[s]), linf[s], linfarg[s]); 196 | 197 | } 198 | if(idx1.frames_n - last_frame > 0) 199 | fprintf(stderr, "[warning] First has %d more frames unchecked\n", idx1.frames_n - last_frame); 200 | 201 | if(idx2.frames_n - last_frame > 0) 202 | fprintf(stderr, "[warning] Second has %d more frames unchecked\n", idx2.frames_n - last_frame); 203 | 204 | return EXIT_SUCCESS; 205 | } 206 | -------------------------------------------------------------------------------- /examples/hydro2d/config.c: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/config.c: print out configuration of software and hardware 2 | 3 | (C) Jason Sewall : Intel -- inital version 4 | (C) John Pennycook : Intel -- augmentations to above version 5 | */ 6 | /* 7 | This software is governed by the CeCILL license under French law and 8 | abiding by the rules of distribution of free software. You can use, 9 | modify and/ or redistribute the software under the terms of the CeCILL 10 | license as circulated by CEA, CNRS and INRIA at the following URL 11 | "http://www.cecill.info". 12 | 13 | As a counterpart to the access to the source code and rights to copy, 14 | modify and redistribute granted by the license, users are provided only 15 | with a limited warranty and the software's author, the holder of the 16 | economic rights, and the successive licensors have only limited 17 | liability. 18 | 19 | In this respect, the user's attention is drawn to the risks associated 20 | with loading, using, modifying and/or developing or reproducing the 21 | software by the user in light of its specific status of free software, 22 | that may mean that it is complicated to manipulate, and that also 23 | therefore means that it is reserved for developers and experienced 24 | professionals having in-depth computer knowledge. Users are therefore 25 | encouraged to load and test the software's suitability as regards their 26 | requirements in conditions enabling the security of their systems and/or 27 | data to be ensured and, more generally, to use and operate it in the 28 | same conditions as regards security. 29 | 30 | The fact that you are presently reading this means that you have had 31 | knowledge of the CeCILL license and that you accept its terms. 32 | */ 33 | 34 | #define USE_OMP 35 | #define USE_NUMACTL 36 | #include 37 | #include 38 | #include 39 | #include 40 | #ifdef USE_MPI 41 | #include 42 | #endif 43 | #ifdef USE_MKL 44 | #include "mkl.h" 45 | #endif 46 | #ifdef USE_OMP 47 | #include 48 | #endif 49 | #ifdef USE_NUMACTL 50 | #include 51 | #endif 52 | #include 53 | #include 54 | #include 55 | #include 56 | 57 | static char config_null[] = ""; 58 | 59 | static const char *xgetenv_name(const char *str) 60 | { 61 | const char *res = getenv(str); 62 | if(res == 0) 63 | return config_null; 64 | else 65 | return res; 66 | } 67 | 68 | #ifdef USE_NUMACTL 69 | struct node 70 | { 71 | struct bitmask *cpus; 72 | int has_cpu; 73 | int nearest_memonly; 74 | }; 75 | 76 | static void readnodes(struct node *nodes, int n) 77 | { 78 | int i; 79 | for (i = 0; i < n; ++i) 80 | { 81 | nodes[i].cpus = numa_allocate_cpumask(); 82 | if (nodes[i].cpus == 0) 83 | perror("allocate cpu bitmask"); 84 | int ret = numa_node_to_cpus(i, nodes[i].cpus); 85 | if (ret != 0) 86 | perror("numa_node_to_cpus"); 87 | nodes[i].has_cpu = numa_bitmask_weight(nodes[i].cpus); 88 | nodes[i].nearest_memonly = -1; 89 | } 90 | } 91 | 92 | static void findmem(struct node *nodes, int n) 93 | { 94 | int i; 95 | for (i = 0; i < n; ++i) 96 | { 97 | if (nodes[i].has_cpu == 0) 98 | continue; 99 | // look for a memory-only node with closest distance 100 | int memidx = -1; 101 | int distance = 0x7FFFFFFF; 102 | int j; 103 | for (j = 0; j < n; ++j) 104 | { 105 | if (nodes[j].has_cpu != 0) 106 | continue; 107 | int d = numa_distance(i, j); 108 | if (d < distance) 109 | { 110 | distance = d; 111 | memidx = j; 112 | } 113 | } 114 | nodes[i].nearest_memonly = memidx; 115 | } 116 | } 117 | 118 | static int xsnprintf(char *str, size_t n, const char *fmt, ...) 119 | { 120 | va_list val; 121 | va_start(val, fmt); 122 | int wanted_out = vsnprintf(str, n, fmt, val); 123 | va_end(val); 124 | if(wanted_out > n) 125 | { 126 | fprintf(stderr, "Ran out of buffer space for output string!\n"); 127 | exit(1); 128 | } 129 | return wanted_out; 130 | } 131 | 132 | static char *mem_nodes(struct node *nodes, struct bitmask *mynodes, int nnodes) 133 | { 134 | int i; 135 | char temp[1024]; 136 | memset(temp, 0, sizeof(char)*1024); 137 | char *curr = temp; 138 | *curr = 0; 139 | for (i = 0; i < nnodes; ++i) 140 | { 141 | if (numa_bitmask_isbitset(mynodes, i) && 142 | nodes[i].nearest_memonly > 0) 143 | { 144 | if (curr != temp) 145 | { 146 | curr += snprintf(curr, 1023-(curr-temp), ","); 147 | } 148 | curr += snprintf(curr, 1023-(curr-temp), "%d", nodes[i].nearest_memonly); 149 | } 150 | } 151 | char *res = strdup(temp); 152 | if(!res) 153 | return config_null; 154 | return res; 155 | } 156 | 157 | static char *cpu_nodes(struct bitmask *mynodes, int nnodes) 158 | { 159 | int i; 160 | char temp[1024]; 161 | memset(temp, 0, sizeof(char)*1024); 162 | char *curr = temp; 163 | for (i = 0; i < nnodes; ++i) 164 | { 165 | if (numa_bitmask_isbitset(mynodes, i)) 166 | { 167 | if (curr != temp) 168 | { 169 | curr += snprintf(curr, 1023-(curr-temp), ","); 170 | } 171 | curr += snprintf(curr, 1023-(curr-temp), "%d", i); 172 | } 173 | } 174 | char *res = strdup(temp); 175 | if(!res) 176 | return config_null; 177 | return res; 178 | } 179 | #endif 180 | 181 | static char *format_uname() 182 | { 183 | struct utsname un; 184 | if(uname(&un) == -1) 185 | { 186 | perror("uname"); 187 | exit(1); 188 | } 189 | char buff[1024]; 190 | snprintf(buff, 1023, "%s-%s-%s-%s", un.nodename, un.machine, un.sysname, un.release); 191 | char *res = strdup(buff); 192 | if(!res) 193 | return config_null; 194 | return res; 195 | } 196 | 197 | static void cpuid(const unsigned int info, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) 198 | { 199 | __asm__("cpuid;" 200 | :"=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 201 | :"a" (info)); 202 | } 203 | 204 | static void vendorid(char id[13]) 205 | { 206 | unsigned int temp; 207 | cpuid(0, &temp, (unsigned int*)id, (unsigned int*)(id+8), (unsigned int*)(id+4)); 208 | id[12] = 0; 209 | } 210 | 211 | static void proc_brand(char str[49]) 212 | { 213 | unsigned int i; 214 | static char nope[] = "Unknown"; 215 | unsigned int okay; 216 | cpuid(0x80000000, &okay, (unsigned int*)str, (unsigned int*)(str+4), (unsigned int*)(str+8)); 217 | if(okay < 0x80000004) 218 | { 219 | strcpy(str, nope); 220 | } 221 | else 222 | { 223 | for(i = 0; i < 3; ++i) 224 | { 225 | cpuid(0x80000002+i, (unsigned int*)(str+16*i), (unsigned int*)(str+16*i+4), (unsigned int*)(str+16*i+8), (unsigned int*)(str+16*i+12)); 226 | } 227 | } 228 | str[48] = 0; 229 | } 230 | 231 | typedef struct cpuinfo 232 | { 233 | unsigned stepping : 4; 234 | unsigned model : 4; 235 | unsigned family_id : 4; 236 | unsigned proc_type : 2; 237 | unsigned nothing : 2; 238 | unsigned extended_model_id : 4; 239 | unsigned extended_family_id : 8; 240 | unsigned nothing2 : 6; 241 | unsigned display_family; 242 | unsigned display_model; 243 | } cpuinfo; 244 | 245 | static void cpu_info(struct cpuinfo *ci) 246 | { 247 | unsigned int b, c, d; 248 | cpuid(0x1, (unsigned int*)ci, &b, &c, &d); 249 | if(ci->family_id == 0x0F) 250 | { 251 | ci->display_family = ci->extended_family_id + ci->family_id; 252 | } 253 | else 254 | { 255 | ci->display_family = ci->family_id; 256 | } 257 | if(ci->family_id == 0x0F || ci->family_id == 0x06) 258 | { 259 | ci->display_model = (ci->extended_model_id << 4) + ci->model; 260 | } 261 | else 262 | { 263 | ci->display_model = ci->model; 264 | } 265 | } 266 | 267 | void print_config(FILE *fp) 268 | { 269 | fprintf(fp, "%20s = %s\n", "GIT_VERSION", GIT_VERSION); 270 | fprintf(fp, "%20s = %s\n", "BUILD_HOST", BUILD_HOST); 271 | fprintf(fp, "%20s = %s\n", "COMPILER_VERSION", COMPILER_VERSION); 272 | fprintf(fp, "%20s = %s\n", "GLIBC_VERSION", gnu_get_libc_version ()); 273 | #ifdef USE_MKL 274 | char mkl_version[1024]; 275 | mkl_get_version_string(mkl_version, 1024); 276 | fprintf(fp, "%20s = %s\n", "MKL_VERSION", mkl_version); 277 | #endif 278 | #ifdef USE_MPI 279 | int mpi_major, mpi_minor; 280 | MPI_Get_version(&mpi_major, &mpi_minor); 281 | fprintf(fp, "%20s = %d.%d\n", "MPI_VERSION", mpi_major, mpi_minor); 282 | int mpi_len; 283 | char mpi_library_version[MPI_MAX_LIBRARY_VERSION_STRING]; 284 | MPI_Get_library_version(mpi_library_version, &mpi_len); 285 | char *mpi_newline = strchr(mpi_library_version, '\n'); 286 | if(mpi_newline) 287 | *mpi_newline = 0; 288 | fprintf(fp, "%20s = %s\n", "MPI_LIBRARY_VERSION", mpi_library_version); 289 | #endif 290 | fprintf(fp, "%20s = %s %s\n", "BUILD_DATE", __DATE__, __TIME__); 291 | fprintf(fp, "\n"); 292 | char *host = format_uname(); 293 | fprintf(fp, "%20s = %s\n", "HOST", host); 294 | if(host != config_null) 295 | free(host); 296 | char vid[13]; 297 | vendorid(vid); 298 | char pb[49]; 299 | proc_brand(pb); 300 | fprintf(fp, "%20s = %s %s\n", "CPU", vid, pb); 301 | cpuinfo ci; 302 | cpu_info(&ci); 303 | fprintf(fp, "%20s = %s\n", "LD_PRELOAD", xgetenv_name("LD_PRELOAD")); 304 | fprintf(fp, "%20s = Family %u Model %u Stepping %u\n", "CPUINFO", ci.display_family, ci.display_model, ci.stepping); 305 | fprintf(fp, "\n"); 306 | #ifdef USE_OMP 307 | fprintf(fp, "%20s = %d\n", "NTHREADS", omp_get_max_threads()); 308 | fprintf(fp, "%20s = %s\n", "KMP_AFFINITY", xgetenv_name("KMP_AFFINITY")); 309 | fprintf(fp, "%20s = %s\n", "KMP_PLACE_THREADS", xgetenv_name("KMP_PLACE_THREADS")); 310 | fprintf(fp, "%20s = %s\n", "KMP_BLOCKTIME", xgetenv_name("KMP_BLOCKTIME")); 311 | fprintf(fp, "\n"); 312 | #endif 313 | #ifdef USE_NUMACTL 314 | if(numa_available() != -1) 315 | { 316 | fprintf(fp, "%20s = %s\n", "NUMA_AVAILABLE", "YES"); 317 | const int nnodes = numa_max_node()+1; 318 | fprintf(fp, "%20s = %d\n", "NUMA_NODES", nnodes); 319 | struct node nodes[nnodes]; 320 | readnodes(nodes, nnodes); 321 | findmem(nodes, nnodes); 322 | struct bitmask *mynodes = numa_allocate_nodemask(); 323 | struct bitmask *mycpus = numa_allocate_cpumask(); 324 | int ret = numa_sched_getaffinity(0, mycpus); 325 | if (ret <= 0) // returns # bytes copied 326 | perror("numa_sched_getaffinity"); 327 | const int ncpus = numa_num_possible_cpus(); 328 | int i; 329 | for (i = 0; i < nnodes; ++i) 330 | { 331 | // check if there is any intersection with this node 332 | int j; 333 | for (j = 0; j < ncpus; ++j) 334 | if (numa_bitmask_isbitset(mycpus, j) && 335 | numa_bitmask_isbitset(nodes[i].cpus, j)) 336 | numa_bitmask_setbit(mynodes, i); 337 | } 338 | numa_free_cpumask(mycpus); 339 | char *mem_node_str = mem_nodes(nodes, mynodes, nnodes); 340 | fprintf(fp, "%20s = %s\n", "NUMA_MEM_NODES", mem_node_str); 341 | if(mem_node_str != config_null) 342 | free(mem_node_str); 343 | char *cpu_node_str = cpu_nodes(mynodes, nnodes); 344 | fprintf(fp, "%20s = %s\n", "NUMA_CPU_NODES", cpu_node_str); 345 | if(cpu_node_str != config_null) 346 | free(cpu_node_str); 347 | } 348 | else 349 | { 350 | fprintf(fp, "%20s = %s\n", "NUMA_AVAILABLE", "NO"); 351 | } 352 | fprintf(fp, "\n"); 353 | #endif 354 | fprintf(fp, "\n"); 355 | } 356 | 357 | #ifdef TEST_PROG 358 | int main() 359 | { 360 | print_config(stderr); 361 | return 0; 362 | } 363 | #endif 364 | -------------------------------------------------------------------------------- /examples/hydro2d/hydro2d-x.yaml: -------------------------------------------------------------------------------- 1 | kernels: 2 | 3 | rcp: 4 | declaration: REAL_T rcp(REAL_T x); 5 | inputs: | 6 | x : q? 7 | outputs: | 8 | : rcp(q?) 9 | 10 | constoprim: 11 | declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E); 12 | inputs: | 13 | cons_rho : rho[j?][i?] # cell (x0, x1) 14 | cons_rhou : rhou[j?][i?] # cell (x0, x1) 15 | cons_rhov : rhov[j?][i?] # cell (x0, x1) 16 | cons_E : E[j?][i?] # cell (x0, x1) 17 | outputs: | 18 | prim_rho : prim_rho[j?][i?] # cell (x0, x1) 19 | inv_prim_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 20 | prim_u : prim_u[j?][i?] # cell (x0, x1) 21 | prim_v : prim_v[j?][i?] # cell (x0, x1) 22 | E_internal : Einternal[j?][i?] # cell (x0, x1) 23 | 24 | new_constoprim: 25 | declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E); 26 | inputs: | 27 | cons_rho : new(rho[j?][i?]) # cell (x0, x1) 28 | cons_rhou : new(rhou[j?][i?]) # cell (x0, x1) 29 | cons_rhov : new(rhov[j?][i?]) # cell (x0, x1) 30 | cons_E : new(E[j?][i?]) # cell (x0, x1) 31 | outputs: | 32 | prim_rho : new(prim_rho[j?][i?]) # cell (x0, x1) 33 | inv_prim_rho : new(inv_prim_rho[j?][i?]) # cell (x0, x1) 34 | prim_u : new(prim_u[j?][i?]) # cell (x0, x1) 35 | prim_v : new(prim_v[j?][i?]) # cell (x0, x1) 36 | E_internal : new(Einternal[j?][i?]) # cell (x0, x1) 37 | 38 | eqstate: 39 | declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal); 40 | inputs: | 41 | rho : prim_rho[j?][i?] # cell (x0, x1) 42 | Einternal : Einternal[j?][i?] # cell (x0, x1) 43 | outputs: | 44 | : prim_p[j?][i?] # cell (x0, x1) 45 | 46 | new_eqstate: 47 | declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal); 48 | inputs: | 49 | rho : new(prim_rho[j?][i?]) # cell (x0, x1) 50 | Einternal : new(Einternal[j?][i?]) # cell (x0, x1) 51 | outputs: | 52 | : new(prim_p[j?][i?]) # cell (x0, x1) 53 | 54 | sound: 55 | declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p); 56 | inputs: | 57 | inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 58 | p : prim_p[j?][i?] # cell (x0, x1) 59 | outputs: | 60 | : prim_c[j?][i?] # cell (x0, x1) 61 | 62 | new_sound: 63 | declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p); 64 | inputs: | 65 | inv_rho : new(inv_prim_rho[j?][i?]) # cell (x0, x1) 66 | p : new(prim_p[j?][i?]) # cell (x0, x1) 67 | outputs: | 68 | : new(prim_c[j?][i?]) # cell (x0, x1) 69 | 70 | 71 | # Slope is computed for v = 0, 1, 2, 3 (prim_rho, prim_u, prim_v, prim_p) 72 | slope: 73 | declaration: REAL_T slope(REAL_T qm1, REAL_T q0, REAL_T qp1, REAL_T slope_type, REAL_T inv_slope_type); 74 | inputs: | 75 | qm1 : q?[j?][i?-1] # cell (x-1, x0) 76 | q0 : q?[j?][i? ] # cell (x0, x1) 77 | qp1 : q?[j?][i?+1] # cell (x1, x2) 78 | outputs: | 79 | : delta(q?[j?][i?]) # cell (x0, x1) 80 | 81 | # Handling of rcp(c) is terrible here. 82 | rtrace: 83 | declaration: rtrace(REAL_T& flux_rho_p, REAL_T& flux_u_p, REAL_T& flux_v_p, REAL_T& flux_p_p, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx); 84 | inputs: | 85 | rho : prim_rho[j?][i?] # cell (x0, x1) 86 | inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 87 | u : prim_u[j?][i?] # cell (x0, x1) 88 | v : prim_v[j?][i?] # cell (x0, x1) 89 | p : prim_p[j?][i?] # cell (x0, x1) 90 | drho : delta(prim_rho[j?][i?]) # cell (x0, x1) 91 | du : delta(prim_u[j?][i?]) # cell (x0, x1) 92 | dv : delta(prim_v[j?][i?]) # cell (x0, x1) 93 | dp : delta(prim_p[j?][i?]) # cell (x0, x1) 94 | c : prim_c[j?][i?] # cell (x0, x1) 95 | inv_c : rcp(prim_c[j?][i?]) # cell (x0, x1) 96 | outputs: | 97 | flux_rho_p : rflux(rho[j?][i?]) # interface x0 98 | flux_u_p : rflux(u[j?][i?]) # interface x0 99 | flux_v_p : rflux(v[j?][i?]) # interface x0 100 | flux_p_p : rflux(p[j?][i?]) # interface x0 101 | 102 | ltrace: 103 | declaration: ltrace(REAL_T& flux_rho_m, REAL_T& flux_u_m, REAL_T& flux_v_m, REAL_T& flux_p_m, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx); 104 | inputs: | 105 | rho : prim_rho[j?][i?-1] # cell (x0, x1) 106 | inv_rho : inv_prim_rho[j?][i?-1] # cell (x0, x1) 107 | u : prim_u[j?][i?-1] # cell (x0, x1) 108 | v : prim_v[j?][i?-1] # cell (x0, x1) 109 | p : prim_p[j?][i?-1] # cell (x0, x1) 110 | drho : delta(prim_rho[j?][i?-1]) # cell (x0, x1) 111 | du : delta(prim_u[j?][i?-1]) # cell (x0, x1) 112 | dv : delta(prim_v[j?][i?-1]) # cell (x0, x1) 113 | dp : delta(prim_p[j?][i?-1]) # cell (x0, x1) 114 | c : prim_c[j?][i?-1] # cell (x0, x1) 115 | inv_c : rcp(prim_c[j?][i?-1]) # cell (x0, x1) 116 | outputs: | 117 | flux_rho_m : lflux(rho[j?][i?]) # interface x1 118 | flux_u_m : lflux(u[j?][i?]) # interface x1 119 | flux_v_m : lflux(v[j?][i?]) # interface x1 120 | flux_p_m : lflux(p[j?][i?]) # interface x1 121 | 122 | riemann: 123 | declaration: riemann(REAL_T& gdnv_rho, REAL_T& gdnv_u, REAL_T& gdnv_v, REAL_T& gdnv_p, REAL_T in_left_rho, REAL_T in_left_u, REAL_T in_left_v, REAL_T in_left_p, REAL_T in_right_rho, REAL_T in_right_u, REAL_T in_right_v, REAL_T in_right_p); 124 | inputs: | 125 | in_left_rho : lflux(rho[j?][i?]) # interface x0 126 | in_left_u : lflux(u[j?][i?]) # interface x0 127 | in_left_v : lflux(v[j?][i?]) # interface x0 128 | in_left_p : lflux(p[j?][i?]) # interface x0 129 | in_right_rho : rflux(rho[j?][i?]) # interface x0 130 | in_right_u : rflux(u[j?][i?]) # interface x0 131 | in_right_v : rflux(v[j?][i?]) # interface x0 132 | in_right_p : rflux(p[j?][i?]) # interface x0 133 | outputs: | 134 | gdnv_rho : gdnv(rho[j?][i?]) # interface x0 135 | gdnv_u : gdnv(u[j?][i?]) # interface x0 136 | gdnv_v : gdnv(v[j?][i?]) # interface x0 137 | gdnv_p : gdnv(p[j?][i?]) # interface x0 138 | 139 | cmpflx: 140 | declaration: cmpflx(REAL_T& flux_rho, REAL_T& flux_rhou, REAL_T& flux_rhov, REAL_T& flux_E, REAL_T gdnv_rho, REAL_T gdnv_u, REAL_T gdnv_v, REAL_T gdnv_p); 141 | inputs: | 142 | gdnv_rho : gdnv(rho[j?][i?]) # interface x0 143 | gdnv_u : gdnv(u[j?][i?]) # interface x0 144 | gdnv_v : gdnv(v[j?][i?]) # interface x0 145 | gdnv_p : gdnv(p[j?][i?]) # interface x0 146 | outputs: | 147 | flux_rho : flux(rho[j?][i?]) # interface x0 148 | flux_rhou : flux(rhou[j?][i?]) # interface x0 149 | flux_rhov : flux(rhov[j?][i?]) # interface x0 150 | flux_E : flux(E[j?][i?]) # interface x0 151 | 152 | update: 153 | declaration: REAL_T update(REAL_T in, REAL_T flux_left, REAL_T flux_right, REAL_T dtdx); 154 | inputs: | 155 | in : q?[j?][i?] # cell (x0, x1) 156 | flux_left : flux(q?[j?][i?]) # interface x0 157 | flux_right : flux(q?[j?][i?+1]) # interface x1 158 | outputs: | 159 | : new(q?[j?][i?]) # cell (x0, x1) 160 | 161 | courant: 162 | declaration: hfav_courant(REAL_T& cfl, REAL_T u, REAL_T v, REAL_T c); 163 | inputs: | 164 | u : new(prim_u[j?][i?]) 165 | v : new(prim_v[j?][i?]) 166 | c : new(prim_c[j?][i?]) 167 | outputs: | 168 | cfl : cfl[j?][i?] 169 | 170 | max_courant: 171 | declaration: max_courant(REAL_T cfl, REAL_T& courantv); 172 | inputs: | 173 | cfl: cfl[j][i] 174 | outputs: | 175 | courantv: reduction(max:courantv) 176 | 177 | globals: 178 | inputs: | 179 | REAL_T rho[j?][i?] 180 | REAL_T rhou[j?][i?] 181 | REAL_T rhov[j?][i?] 182 | REAL_T E[j?][i?] 183 | 184 | outputs: | 185 | new(rho[j][i]) => REAL_T rho[j][i] 186 | new(rhou[j][i]) => REAL_T rhou[j][i] 187 | new(rhov[j][i]) => REAL_T rhov[j][i] 188 | new(E[j][i]) => REAL_T E[j][i] 189 | courantv => REAL_T courantv 190 | 191 | codegen options: 192 | header: | 193 | static void gen_xstrip(REAL_T &courantv, const int ystride, const int istart, const int iend, const int jstart, const int jend, REAL_T (*rho)[ystride], REAL_T (*rhou)[ystride], REAL_T (*rhov)[ystride], REAL_T (*E)[ystride], const REAL_T slope_type, const REAL_T inv_slope_type, const REAL_T dtdx) 194 | { 195 | footer: | 196 | } 197 | loops: 198 | - 199 | iter_ident: i 200 | start: istart 201 | end: iend 202 | stride: 1 203 | - 204 | iter_ident: j 205 | start: jstart 206 | end: jend 207 | stride: 1 208 | loop order : [j, i] 209 | 210 | vector loop : i 211 | 212 | language : C99 213 | types: 214 | REAL_T : double 215 | 216 | prefix : __hfav_ 217 | output file : hydro2d-x-gen.hpp 218 | -------------------------------------------------------------------------------- /examples/hydro2d/hydro2d-y.yaml: -------------------------------------------------------------------------------- 1 | kernels: 2 | 3 | rcp: 4 | declaration: REAL_T rcp(REAL_T x); 5 | inputs: | 6 | x : q? 7 | outputs: | 8 | : rcp(q?) 9 | 10 | constoprim: 11 | declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E); 12 | inputs: | 13 | cons_rho : rho[j?][i?] # cell (x0, x1) 14 | cons_rhou : rhou[j?][i?] # cell (x0, x1) 15 | cons_rhov : rhov[j?][i?] # cell (x0, x1) 16 | cons_E : E[j?][i?] # cell (x0, x1) 17 | outputs: | 18 | prim_rho : prim_rho[j?][i?] # cell (x0, x1) 19 | inv_prim_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 20 | prim_u : prim_u[j?][i?] # cell (x0, x1) 21 | prim_v : prim_v[j?][i?] # cell (x0, x1) 22 | E_internal : Einternal[j?][i?] # cell (x0, x1) 23 | 24 | eqstate: 25 | declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal); 26 | inputs: | 27 | rho : prim_rho[j?][i?] # cell (x0, x1) 28 | Einternal : Einternal[j?][i?] # cell (x0, x1) 29 | outputs: | 30 | : prim_p[j?][i?] # cell (x0, x1) 31 | 32 | sound: 33 | declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p); 34 | inputs: | 35 | inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 36 | p : prim_p[j?][i?] # cell (x0, x1) 37 | outputs: | 38 | : prim_c[j?][i?] # cell (x0, x1) 39 | 40 | # Slope is computed for v = 0, 1, 2, 3 (prim_rho, prim_u, prim_v, prim_p) 41 | slope: 42 | declaration: REAL_T slope(REAL_T qm1, REAL_T q0, REAL_T qp1, REAL_T slope_type, REAL_T inv_slope_type); 43 | inputs: | 44 | qm1 : q?[j?-1][i?] # cell (x-1, x0) 45 | q0 : q?[j? ][i?] # cell (x0, x1) 46 | qp1 : q?[j?+1][i?] # cell (x1, x2) 47 | outputs: | 48 | : delta(q?[j?][i?]) # cell (x0, x1) 49 | 50 | # Handling of rcp(c) is terrible here. 51 | ltrace: 52 | declaration: ltrace(REAL_T& flux_rho_m, REAL_T& flux_u_m, REAL_T& flux_v_m, REAL_T& flux_p_m, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx); 53 | inputs: | 54 | rho : prim_rho[j?][i?] # cell (x0, x1) 55 | inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 56 | u : prim_u[j?][i?] # cell (x0, x1) 57 | v : prim_v[j?][i?] # cell (x0, x1) 58 | p : prim_p[j?][i?] # cell (x0, x1) 59 | drho : delta(prim_rho[j?][i?]) # cell (x0, x1) 60 | du : delta(prim_u[j?][i?]) # cell (x0, x1) 61 | dv : delta(prim_v[j?][i?]) # cell (x0, x1) 62 | dp : delta(prim_p[j?][i?]) # cell (x0, x1) 63 | c : prim_c[j?][i?] # cell (x0, x1) 64 | inv_c : rcp(prim_c[j?][i?]) # cell (x0, x1) 65 | outputs: | 66 | flux_rho_m : lflux(rho[j?+1][i?]) # interface x1 67 | flux_u_m : lflux(u[j?+1][i?]) # interface x1 68 | flux_v_m : lflux(v[j?+1][i?]) # interface x1 69 | flux_p_m : lflux(p[j?+1][i?]) # interface x1 70 | 71 | rtrace: 72 | declaration: rtrace(REAL_T& flux_rho_p, REAL_T& flux_u_p, REAL_T& flux_v_p, REAL_T& flux_p_p, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx); 73 | inputs: | 74 | rho : prim_rho[j?][i?] # cell (x0, x1) 75 | inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1) 76 | u : prim_u[j?][i?] # cell (x0, x1) 77 | v : prim_v[j?][i?] # cell (x0, x1) 78 | p : prim_p[j?][i?] # cell (x0, x1) 79 | drho : delta(prim_rho[j?][i?]) # cell (x0, x1) 80 | du : delta(prim_u[j?][i?]) # cell (x0, x1) 81 | dv : delta(prim_v[j?][i?]) # cell (x0, x1) 82 | dp : delta(prim_p[j?][i?]) # cell (x0, x1) 83 | c : prim_c[j?][i?] # cell (x0, x1) 84 | inv_c : rcp(prim_c[j?][i?]) # cell (x0, x1) 85 | outputs: | 86 | flux_rho_p : rflux(rho[j?][i?]) # interface x0 87 | flux_u_p : rflux(u[j?][i?]) # interface x0 88 | flux_v_p : rflux(v[j?][i?]) # interface x0 89 | flux_p_p : rflux(p[j?][i?]) # interface x0 90 | 91 | riemann: 92 | declaration: riemann(REAL_T& gdnv_rho, REAL_T& gdnv_u, REAL_T& gdnv_v, REAL_T& gdnv_p, REAL_T in_left_rho, REAL_T in_left_u, REAL_T in_left_v, REAL_T in_left_p, REAL_T in_right_rho, REAL_T in_right_u, REAL_T in_right_v, REAL_T in_right_p); 93 | inputs: | 94 | in_left_rho : lflux(rho[j?][i?]) # interface x0 95 | in_left_u : lflux(u[j?][i?]) # interface x0 96 | in_left_v : lflux(v[j?][i?]) # interface x0 97 | in_left_p : lflux(p[j?][i?]) # interface x0 98 | in_right_rho : rflux(rho[j?][i?]) # interface x0 99 | in_right_u : rflux(u[j?][i?]) # interface x0 100 | in_right_v : rflux(v[j?][i?]) # interface x0 101 | in_right_p : rflux(p[j?][i?]) # interface x0 102 | outputs: | 103 | gdnv_rho : gdnv(rho[j?][i?]) # interface x0 104 | gdnv_u : gdnv(u[j?][i?]) # interface x0 105 | gdnv_v : gdnv(v[j?][i?]) # interface x0 106 | gdnv_p : gdnv(p[j?][i?]) # interface x0 107 | 108 | cmpflx: 109 | declaration: cmpflx(REAL_T& flux_rho, REAL_T& flux_rhou, REAL_T& flux_rhov, REAL_T& flux_E, REAL_T gdnv_rho, REAL_T gdnv_u, REAL_T gdnv_v, REAL_T gdnv_p); 110 | inputs: | 111 | gdnv_rho : gdnv(rho[j?][i?]) # interface x0 112 | gdnv_u : gdnv(u[j?][i?]) # interface x0 113 | gdnv_v : gdnv(v[j?][i?]) # interface x0 114 | gdnv_p : gdnv(p[j?][i?]) # interface x0 115 | outputs: | 116 | flux_rho : flux(rho[j?][i?]) # interface x0 117 | flux_rhou : flux(rhou[j?][i?]) # interface x0 118 | flux_rhov : flux(rhov[j?][i?]) # interface x0 119 | flux_E : flux(E[j?][i?]) # interface x0 120 | 121 | update: 122 | declaration: REAL_T update(REAL_T in, REAL_T flux_left, REAL_T flux_right, REAL_T dtdx); 123 | inputs: | 124 | in : q?[j?][i?] # cell (x0, x1) 125 | flux_left : flux(q?[j?][i?]) # interface x0 126 | flux_right : flux(q?[j?+1][i?]) # interface x1 127 | outputs: | 128 | : new(q?[j?][i?]) # cell (x0, x1) 129 | 130 | globals: 131 | inputs: | 132 | REAL_T rho[j?][i?] 133 | REAL_T rhou[j?][i?] 134 | REAL_T rhov[j?][i?] 135 | REAL_T E[j?][i?] 136 | 137 | outputs: | 138 | new(rho[j][i]) => REAL_T rho[j][i] 139 | new(rhou[j][i]) => REAL_T rhou[j][i] 140 | new(rhov[j][i]) => REAL_T rhov[j][i] 141 | new(E[j][i]) => REAL_T E[j][i] 142 | 143 | codegen options: 144 | header: | 145 | static void gen_ystrip(const int ystride, const int istart, const int iend, const int jstart, const int jend, REAL_T (*rho)[ystride], REAL_T (*rhou)[ystride], REAL_T (*rhov)[ystride], REAL_T (*E)[ystride], const REAL_T slope_type, const REAL_T inv_slope_type, const REAL_T dtdx) 146 | { 147 | footer: | 148 | } 149 | loops: 150 | - 151 | iter_ident: i 152 | start: istart 153 | end: iend 154 | stride: 1 155 | - 156 | iter_ident: j 157 | start: jstart 158 | end: jend 159 | stride: 1 160 | loop order : [i, j] 161 | 162 | vector loop : i 163 | 164 | language : C99 165 | types: 166 | REAL_T : double 167 | 168 | prefix : __hfav_ 169 | output file : hydro2d-y-gen.hpp 170 | -------------------------------------------------------------------------------- /examples/hydro2d/pcl-hydro-params.cpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/pcl-hydro-params.cpp : parameter parsing for hydro 2 | 3 | (C) Romain Teyssier : CEA/IRFU -- original F90 code 4 | (C) Pierre-Francois Lavallee : IDRIS -- original F90 code 5 | (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version 6 | (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86 7 | (C) John Pennycook : Intel -- augmentations to above version 8 | */ 9 | /* 10 | This software is governed by the CeCILL license under French law and 11 | abiding by the rules of distribution of free software. You can use, 12 | modify and/ or redistribute the software under the terms of the CeCILL 13 | license as circulated by CEA, CNRS and INRIA at the following URL 14 | "http://www.cecill.info". 15 | 16 | As a counterpart to the access to the source code and rights to copy, 17 | modify and redistribute granted by the license, users are provided only 18 | with a limited warranty and the software's author, the holder of the 19 | economic rights, and the successive licensors have only limited 20 | liability. 21 | 22 | In this respect, the user's attention is drawn to the risks associated 23 | with loading, using, modifying and/or developing or reproducing the 24 | software by the user in light of its specific status of free software, 25 | that may mean that it is complicated to manipulate, and that also 26 | therefore means that it is reserved for developers and experienced 27 | professionals having in-depth computer knowledge. Users are therefore 28 | encouraged to load and test the software's suitability as regards their 29 | requirements in conditions enabling the security of their systems and/or 30 | data to be ensured and, more generally, to use and operate it in the 31 | same conditions as regards security. 32 | 33 | The fact that you are presently reading this means that you have had 34 | knowledge of the CeCILL license and that you accept its terms. 35 | */ 36 | #include "pcl-hydro.hpp" 37 | 38 | #include 39 | #include 40 | 41 | static void default_values(hydro *H) 42 | { 43 | // Default values should be given 44 | H->global_n[0] = 20; 45 | H->global_n[1] = 20; 46 | H->nxystep = -1; 47 | H->dx = 1.0; 48 | H->t = 0.0; 49 | H->step = 0; 50 | H->tend = 0.0; 51 | H->courant_number = 0.5; 52 | H->iorder = 2; 53 | H->slope_type = 1.; 54 | H->scheme = hydro::MUSCL; 55 | H->nstepmax = (unsigned int)-1; 56 | H->testcase = 0; 57 | } 58 | 59 | static void keyval(char *buffer, char **pkey, char **pval) 60 | { 61 | char *ptr; 62 | *pkey = buffer; 63 | *pval = buffer; 64 | 65 | // kill the newline 66 | *pval = strchr(buffer, '\n'); 67 | if (*pval) 68 | **pval = 0; 69 | 70 | // suppress leading whites or tabs 71 | while ((**pkey == ' ') || (**pkey == '\t')) 72 | (*pkey)++; 73 | *pval = strchr(buffer, '='); 74 | if (*pval) { 75 | **pval = 0; 76 | (*pval)++; 77 | } 78 | // strip key from white or tab 79 | while ((ptr = strchr(*pkey, ' ')) != NULL) { 80 | *ptr = 0; 81 | } 82 | while ((ptr = strchr(*pkey, '\t')) != NULL) { 83 | *ptr = 0; 84 | } 85 | } 86 | 87 | bool hydro_set_kv(hydro *H, char *kvstr) 88 | { 89 | char *pkey, *pval; 90 | keyval(kvstr, &pkey, &pval); 91 | 92 | if(!pkey || !pval) 93 | return false; 94 | 95 | // int parameters 96 | if (strcmp(pkey, "nstepmax") == 0) { 97 | sscanf(pval, "%u", &H->nstepmax); 98 | return true; 99 | } 100 | if (strcmp(pkey, "nx") == 0) { 101 | int tmp; 102 | sscanf(pval, "%d", &tmp); 103 | if(tmp > 0) 104 | { 105 | H->global_n[0] = tmp; 106 | return true; 107 | } 108 | else 109 | { 110 | return false; 111 | } 112 | } 113 | if (strcmp(pkey, "ny") == 0) { 114 | int tmp; 115 | sscanf(pval, "%d", &tmp); 116 | if(tmp > 0) 117 | { 118 | H->global_n[1] = tmp; 119 | return true; 120 | } 121 | else 122 | { 123 | return false; 124 | } 125 | } 126 | if (strcmp(pkey, "nxystep") == 0) { 127 | int tmp; 128 | sscanf(pval, "%d", &tmp); 129 | if(tmp > 0) 130 | { 131 | H->nxystep = tmp; 132 | return true; 133 | } 134 | else 135 | { 136 | return false; 137 | } 138 | } 139 | if (strcmp(pkey, "iorder") == 0) { 140 | int tmp; 141 | sscanf(pval, "%d", &tmp); 142 | if(tmp == 1 || tmp == 2) 143 | { 144 | H->iorder = tmp; 145 | return true; 146 | } 147 | else 148 | { 149 | return false; 150 | } 151 | } 152 | // float parameters 153 | if (strcmp(pkey, "slope_type") == 0) { 154 | double tmp; 155 | sscanf(pval, REAL_FMT, &tmp); 156 | if(tmp > 0.0) 157 | { 158 | H->slope_type = tmp; 159 | return true; 160 | } 161 | else 162 | { 163 | return false; 164 | } 165 | } 166 | if (strcmp(pkey, "tend") == 0) { 167 | double tmp; 168 | sscanf(pval, REAL_FMT, &tmp); 169 | if(tmp > 0.0) 170 | { 171 | H->tend = tmp; 172 | return true; 173 | } 174 | else 175 | { 176 | return false; 177 | } 178 | } 179 | if (strcmp(pkey, "dx") == 0) { 180 | double tmp; 181 | sscanf(pval, REAL_FMT, &tmp); 182 | if(tmp > 0.0) 183 | { 184 | H->dx = tmp; 185 | return true; 186 | } 187 | else 188 | { 189 | return false; 190 | } 191 | } 192 | if (strcmp(pkey, "courant_factor") == 0) { 193 | double tmp; 194 | sscanf(pval, REAL_FMT, &tmp); 195 | if(tmp > 0.0) 196 | { 197 | H->courant_number = tmp; 198 | return true; 199 | } 200 | else 201 | { 202 | return false; 203 | } 204 | } 205 | if (strcmp(pkey, "testcase") == 0) { 206 | int tmp; 207 | sscanf(pval, "%d", &tmp); 208 | if(tmp == 0 || tmp == 1 || tmp == 2) 209 | { 210 | H->testcase = tmp; 211 | return true; 212 | } 213 | else 214 | { 215 | return false; 216 | } 217 | } 218 | // string parameter 219 | if (strcmp(pkey, "scheme") == 0) { 220 | if (strcmp(pval, "muscl") == 0) { 221 | H->scheme = hydro::MUSCL; 222 | } else if (strcmp(pval, "plmde") == 0) { 223 | H->scheme = hydro::PLMDE; 224 | } else if (strcmp(pval, "collela") == 0) { 225 | H->scheme = hydro::COLLELA; 226 | } else { 227 | return false; 228 | } 229 | return true; 230 | } 231 | return false; 232 | } 233 | 234 | static void process_input(hydro *H, const char *datafile, int quiet) 235 | { 236 | FILE *fd = NULL; 237 | char buffer[1024]; 238 | 239 | fd = xfopen_read(datafile, "r"); 240 | if (fd == NULL) { 241 | fprintf(stderr, "can't read input file\n"); 242 | exit(1); 243 | } 244 | while (fgets(buffer, 1024, fd) == buffer) { 245 | bool res = hydro_set_kv(H, buffer); 246 | if(!res && quiet < 2) 247 | printf("[PARAMS] Skipping unused key %s\n", buffer); 248 | } 249 | fclose(fd); 250 | } 251 | 252 | bool load_hydro_params(hydro *h, const char *file, int quiet) 253 | { 254 | default_values(h); 255 | if(file) 256 | process_input(h, file, quiet); 257 | return true; 258 | } 259 | -------------------------------------------------------------------------------- /examples/hydro2d/pcl-hydro-util.cpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/pcl-hydro-util.cpp : utiltiies for setting up hydro code 2 | 3 | (C) Romain Teyssier : CEA/IRFU -- original F90 code 4 | (C) Pierre-Francois Lavallee : IDRIS -- original F90 code 5 | (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version 6 | (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86 7 | (C) John Pennycook : Intel -- augmentations to above version 8 | */ 9 | /* 10 | This software is governed by the CeCILL license under French law and 11 | abiding by the rules of distribution of free software. You can use, 12 | modify and/ or redistribute the software under the terms of the CeCILL 13 | license as circulated by CEA, CNRS and INRIA at the following URL 14 | "http://www.cecill.info". 15 | 16 | As a counterpart to the access to the source code and rights to copy, 17 | modify and redistribute granted by the license, users are provided only 18 | with a limited warranty and the software's author, the holder of the 19 | economic rights, and the successive licensors have only limited 20 | liability. 21 | 22 | In this respect, the user's attention is drawn to the risks associated 23 | with loading, using, modifying and/or developing or reproducing the 24 | software by the user in light of its specific status of free software, 25 | that may mean that it is complicated to manipulate, and that also 26 | therefore means that it is reserved for developers and experienced 27 | professionals having in-depth computer knowledge. Users are therefore 28 | encouraged to load and test the software's suitability as regards their 29 | requirements in conditions enabling the security of their systems and/or 30 | data to be ensured and, more generally, to use and operate it in the 31 | same conditions as regards security. 32 | 33 | The fact that you are presently reading this means that you have had 34 | knowledge of the CeCILL license and that you accept its terms. 35 | */ 36 | 37 | #include "pcl-hydro.hpp" 38 | #include 39 | 40 | void init_hydro(hydro *h) 41 | { 42 | if(h->nxystep == -1) 43 | h->nxystep = std::max(h->global_n[0], h->global_n[1]); 44 | h->ystride = h->global_n[0] + 2*2; 45 | h->varstride = h->ystride * (h->global_n[1] + 2*2); 46 | h->q = (REAL_T *) xmalloc(sizeof(REAL_T) * h->varstride * 4, "q"); 47 | 48 | h->inv_slope_type = 1.0/h->slope_type; 49 | 50 | for(int i = 0; i < h->varstride; ++i) 51 | h->q[i + 0*h->varstride] = (REAL_T) 1.0; 52 | for(int i = 0; i < h->varstride; ++i) 53 | h->q[i + 1*h->varstride] = (REAL_T) 0.0; 54 | for(int i = 0; i < h->varstride; ++i) 55 | h->q[i + 2*h->varstride] = (REAL_T) 0.0; 56 | for(int i = 0; i < h->varstride; ++i) 57 | h->q[i + 3*h->varstride] = (REAL_T) 1e-5; 58 | 59 | switch(h->testcase) 60 | { 61 | case 0: 62 | { 63 | const int x = h->global_n[0] / 2 + 2; 64 | const int y = h->global_n[1] / 2 + 2; 65 | h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx; 66 | } 67 | break; 68 | case 1: 69 | { 70 | const int x = 2; 71 | const int y = 2; 72 | h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx; 73 | } 74 | break; 75 | case 2: 76 | { 77 | const int x = 2; 78 | for(int j = 0; j < h->global_n[1]; ++j) 79 | { 80 | const int y = j + 2; 81 | h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx; 82 | } 83 | } 84 | break; 85 | default: 86 | die("Test case %d not implemented!\n", h->testcase); 87 | } 88 | } 89 | 90 | void destroy_hydro(hydro *h) 91 | { 92 | xmalloc_free(h->q); 93 | } 94 | 95 | void write_hydro_ts(timeseries_writer *tw, const hydro *h) 96 | { 97 | const int xw = h->global_n[0] + 2*2; 98 | const int yw = h->global_n[1] + 2*2; 99 | 100 | tw->new_frame(h->t, xw * yw * 4 * sizeof(REAL_T)); 101 | for(int v = 0; v < 4; ++v) 102 | for(int j = 0; j < yw; ++j) 103 | tw->append(h->q + v*h->varstride + j * h->ystride, xw * sizeof(REAL_T)); 104 | } 105 | 106 | static void set_boundary( REAL_T *restrict dest, 107 | const REAL_T sign, 108 | const int width, 109 | const int stride) 110 | { 111 | for(int i = 0; i < width; ++i) 112 | dest[i*stride] = sign*dest[(2*width - 1 - i)*stride]; 113 | } 114 | 115 | void set_boundaries( REAL_T *restrict dest_base, 116 | const REAL_T *restrict signs, 117 | const int width, 118 | const int stride, 119 | const int nv, 120 | const int vstride) 121 | { 122 | for(int v = 0; v < nv; ++v) 123 | set_boundary(dest_base + v*vstride, signs[v], width, stride); 124 | } 125 | 126 | REAL_T compute_timestep(const hydro *h) 127 | { 128 | REAL_T courantv = SMALLC; 129 | for(int j = 2; j < h->global_n[1] + 2; ++j) 130 | for(int i = 2; i < h->global_n[0] + 2; ++i) 131 | { 132 | const int offs = j * h->ystride + i; 133 | REAL_T prim_rho; 134 | REAL_T prim_inv_rho; 135 | REAL_T prim_u; 136 | REAL_T prim_v; 137 | REAL_T E_internal; 138 | 139 | conservative_to_primitive(&prim_rho, &prim_inv_rho, &prim_u, &prim_v, &E_internal, 140 | h->q[offs + 0*h->varstride], h->q[offs + 1*h->varstride], h->q[offs + 2*h->varstride], h->q[offs + 3*h->varstride]); 141 | const REAL_T prim_p = equation_of_state(prim_rho, E_internal); 142 | const REAL_T prim_c = speed_of_sound (prim_inv_rho, prim_p); 143 | 144 | courant(&courantv, prim_u, prim_v, prim_c); 145 | } 146 | 147 | return h->courant_number * h->dx / courantv; 148 | } 149 | 150 | bool set_scheme(hydro::hscheme s, const REAL_T dt_dx) 151 | { 152 | switch(s) 153 | { 154 | case hydro::MUSCL: 155 | ZEROL = -((REAL_T) 100.0)/dt_dx; 156 | ZEROR = ((REAL_T) 100.0)/dt_dx; 157 | PROJECT = (REAL_T) 1.0; 158 | break; 159 | case hydro::PLMDE: 160 | ZEROL = (REAL_T) 0; 161 | ZEROR = (REAL_T) 0; 162 | PROJECT = (REAL_T) 1.0; 163 | break; 164 | case hydro::COLLELA: 165 | ZEROL = (REAL_T) 0.0; 166 | ZEROR = (REAL_T) 0.0; 167 | PROJECT = (REAL_T) 0.0; 168 | break; 169 | default: 170 | return false; 171 | } 172 | return true; 173 | } 174 | -------------------------------------------------------------------------------- /examples/hydro2d/pcl-hydro.hpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/pcl-hydro.hpp : header for hydro code 2 | 3 | (C) Romain Teyssier : CEA/IRFU -- original F90 code 4 | (C) Pierre-Francois Lavallee : IDRIS -- original F90 code 5 | (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version 6 | (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86 7 | (C) John Pennycook : Intel -- augmentations to above version 8 | */ 9 | /* 10 | This software is governed by the CeCILL license under French law and 11 | abiding by the rules of distribution of free software. You can use, 12 | modify and/ or redistribute the software under the terms of the CeCILL 13 | license as circulated by CEA, CNRS and INRIA at the following URL 14 | "http://www.cecill.info". 15 | 16 | As a counterpart to the access to the source code and rights to copy, 17 | modify and redistribute granted by the license, users are provided only 18 | with a limited warranty and the software's author, the holder of the 19 | economic rights, and the successive licensors have only limited 20 | liability. 21 | 22 | In this respect, the user's attention is drawn to the risks associated 23 | with loading, using, modifying and/or developing or reproducing the 24 | software by the user in light of its specific status of free software, 25 | that may mean that it is complicated to manipulate, and that also 26 | therefore means that it is reserved for developers and experienced 27 | professionals having in-depth computer knowledge. Users are therefore 28 | encouraged to load and test the software's suitability as regards their 29 | requirements in conditions enabling the security of their systems and/or 30 | data to be ensured and, more generally, to use and operate it in the 31 | same conditions as regards security. 32 | 33 | The fact that you are presently reading this means that you have had 34 | knowledge of the CeCILL license and that you accept its terms. 35 | */ 36 | 37 | #ifndef __PCL_HYDRO_HPP__ 38 | #define __PCL_HYDRO_HPP__ 39 | 40 | #include "arch.hpp" 41 | #include "timeseries.hpp" 42 | 43 | static const REAL_T GAMMA = 1.4; 44 | static const REAL_T GAMMA6 = (GAMMA + 1) / (2.0 * GAMMA); 45 | static const REAL_T SMALLC = 1e-10; 46 | static const REAL_T SMALLR = 1e-10; 47 | static const REAL_T SMALLP = SMALLC*SMALLC / GAMMA; 48 | static const REAL_T SMALLPP = SMALLR * SMALLP; 49 | static const REAL_T PRECISION = 1e-6; 50 | 51 | static const int NITER_RIEMANN = 10; 52 | extern REAL_T ZEROR; 53 | extern REAL_T ZEROL; 54 | extern REAL_T PROJECT; 55 | 56 | struct hydro 57 | { 58 | typedef enum { MUSCL = 1, PLMDE = 2, COLLELA = 3} hscheme; 59 | int global_n[2]; 60 | 61 | int nxystep; 62 | 63 | int ystride; 64 | int varstride; 65 | 66 | int testcase; 67 | hscheme scheme; 68 | 69 | int step; 70 | unsigned int nstepmax; 71 | int iorder; 72 | REAL_T slope_type; 73 | REAL_T inv_slope_type; 74 | 75 | REAL_T courant_number; 76 | REAL_T dx; 77 | REAL_T t; 78 | REAL_T tend; 79 | 80 | REAL_T *q; 81 | }; 82 | 83 | // util functions 84 | void init_hydro(hydro *h); 85 | void destroy_hydro(hydro *h); 86 | bool load_hydro_params(hydro *h, const char *file, int quiet); 87 | bool hydro_set_kv(hydro *H, char *kvstr); 88 | 89 | void write_hydro_ts(timeseries_writer *tw, const hydro *h); 90 | 91 | REAL_T compute_timestep(const hydro *h); 92 | bool set_scheme(hydro::hscheme s, const REAL_T dt_dx); 93 | void vtkfile(int step, const REAL_T *q, const int n[2], const int padding, const int ystride, const int varstride, const double dx); 94 | 95 | void set_boundaries( REAL_T *restrict dest_base, 96 | const REAL_T *restrict signs, 97 | const int width, 98 | const int stride, 99 | const int nv, 100 | const int vstride); 101 | 102 | // serial core functions 103 | void conservative_to_primitive( REAL_T *restrict prim_rho, REAL_T *restrict inv_prim_rho, REAL_T *restrict prim_u, REAL_T *restrict prim_v, REAL_T *restrict E_internal, 104 | const REAL_T cons_rho, const REAL_T cons_rhou, const REAL_T cons_rhov, const REAL_T cons_E); 105 | REAL_T equation_of_state(const REAL_T rho, 106 | const REAL_T E_internal); 107 | REAL_T speed_of_sound(const REAL_T inv_rho, 108 | const REAL_T p); 109 | REAL_T slope(const REAL_T nbv_m, const REAL_T nbv_0, const REAL_T nbv_p, 110 | const REAL_T slope_type, const REAL_T inv_slope_type); 111 | void flux( REAL_T *restrict flux_rho, REAL_T *restrict flux_u, REAL_T *restrict flux_v, REAL_T *restrict flux_p, 112 | const REAL_T rho, const REAL_T inv_rho, const REAL_T u, const REAL_T v, const REAL_T p, 113 | const REAL_T sp_m, const REAL_T sp_0, const REAL_T sp_p, 114 | const REAL_T alpha_m, const REAL_T alpha_0r, const REAL_T alpha_0v, const REAL_T alpha_p, 115 | const REAL_T c); 116 | void trace( REAL_T *restrict flux_rho_m, REAL_T *restrict flux_u_m, REAL_T *restrict flux_v_m, REAL_T *restrict flux_p_m, 117 | REAL_T *restrict flux_rho_p, REAL_T *restrict flux_u_p, REAL_T *restrict flux_v_p, REAL_T *restrict flux_p_p, 118 | const REAL_T rho, const REAL_T inv_rho, const REAL_T u, const REAL_T v, const REAL_T p, 119 | const REAL_T drho, const REAL_T du, const REAL_T dv, const REAL_T dp, 120 | const REAL_T c, const REAL_T inv_c, 121 | const REAL_T dtdx); 122 | void riemann( REAL_T *restrict gdnv_rho, REAL_T *restrict gdnv_u, REAL_T *restrict gdnv_v, REAL_T *restrict gdnv_p, 123 | const REAL_T in_left_rho, const REAL_T in_left_u, const REAL_T in_left_v, const REAL_T in_left_p, 124 | const REAL_T in_right_rho, const REAL_T in_right_u, const REAL_T in_right_v, const REAL_T in_right_p); 125 | void cmpflx( REAL_T *restrict flux_rho, REAL_T *restrict flux_rhou, REAL_T *restrict flux_rhov, REAL_T *restrict flux_E, 126 | const REAL_T gdnv_rho, const REAL_T gdnv_u, const REAL_T gdnv_v, const REAL_T gdnv_p); 127 | REAL_T update(const REAL_T in, 128 | const REAL_T flux_left, const REAL_T flux_right, 129 | const REAL_T dtdx); 130 | 131 | inline void rtrace( REAL_T *restrict flux_rho_p, REAL_T *restrict flux_u_p, REAL_T *restrict flux_v_p, REAL_T *restrict flux_p_p, 132 | const REAL_T rho, const REAL_T inv_rho, const REAL_T u, const REAL_T v, const REAL_T p, 133 | const REAL_T drho, const REAL_T du, const REAL_T dv, const REAL_T dp, 134 | const REAL_T c, const REAL_T inv_c, 135 | const REAL_T dtdx) 136 | { 137 | const REAL_T alpha_m = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) - du) * rho * inv_c; 138 | const REAL_T alpha_p = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) + du) * rho * inv_c; 139 | const REAL_T alpha_0r = drho - dp * (inv_c*inv_c); 140 | const REAL_T alpha_0v = dv; 141 | 142 | const REAL_T right_sp_m = ((u - c) >= ZEROR) ? PROJECT : (u - c) * dtdx + ((REAL_T) 1.0); 143 | const REAL_T right_sp_p = ((u + c) >= ZEROR) ? PROJECT : (u + c) * dtdx + ((REAL_T) 1.0); 144 | const REAL_T right_sp_0 = (u >= ZEROR) ? PROJECT : u * dtdx + ((REAL_T) 1.0); 145 | 146 | flux(flux_rho_p, flux_u_p, flux_v_p, flux_p_p, 147 | rho, inv_rho, u, v, p, 148 | right_sp_m, right_sp_0, right_sp_p, 149 | alpha_m, alpha_0r, alpha_0v, alpha_p, 150 | c); 151 | // todo: handle passive terms 152 | } 153 | 154 | inline void ltrace( REAL_T *restrict flux_rho_m, REAL_T *restrict flux_u_m, REAL_T *restrict flux_v_m, REAL_T *restrict flux_p_m, 155 | const REAL_T rho, const REAL_T inv_rho, const REAL_T u, const REAL_T v, const REAL_T p, 156 | const REAL_T drho, const REAL_T du, const REAL_T dv, const REAL_T dp, 157 | const REAL_T c, const REAL_T inv_c, 158 | const REAL_T dtdx) 159 | { 160 | const REAL_T alpha_m = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) - du) * rho * inv_c; 161 | const REAL_T alpha_p = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) + du) * rho * inv_c; 162 | const REAL_T alpha_0r = drho - dp * (inv_c*inv_c); 163 | const REAL_T alpha_0v = dv; 164 | 165 | const REAL_T left_sp_m = ((u - c) <= ZEROL) ? -PROJECT : (u - c) * dtdx - ((REAL_T) 1.0); 166 | const REAL_T left_sp_p = ((u + c) <= ZEROL) ? -PROJECT : (u + c) * dtdx - ((REAL_T) 1.0); 167 | const REAL_T left_sp_0 = (u <= ZEROL) ? -PROJECT : u * dtdx - ((REAL_T) 1.0); 168 | 169 | flux(flux_rho_m, flux_u_m, flux_v_m, flux_p_m, 170 | rho, inv_rho, u, v, p, 171 | left_sp_m, left_sp_0, left_sp_p, 172 | alpha_m, alpha_0r, alpha_0v, alpha_p, 173 | c); 174 | 175 | // todo: handle passive terms 176 | } 177 | 178 | inline void courant( REAL_T *restrict courantv, 179 | const REAL_T u, const REAL_T v, 180 | const REAL_T c) 181 | { 182 | *courantv = std::max(*courantv, std::max(c + std::abs(u), c + std::abs(v))); 183 | } 184 | 185 | inline void hfav_courant( REAL_T *restrict courantv, 186 | const REAL_T u, const REAL_T v, 187 | const REAL_T c) 188 | { 189 | *courantv = std::max(c + std::abs(u), c + std::abs(v)); 190 | } 191 | 192 | inline void max_courant(REAL_T cfl, REAL_T* restrict courantv) 193 | { 194 | const REAL_T old_courant = *courantv; 195 | *courantv = std::max(old_courant, cfl); 196 | } 197 | 198 | struct strip_work 199 | { 200 | REAL_T flux [4][2]; // flux at i-1/2, i+1/2 201 | REAL_T left_flux [4][2]; // left_flux at i, i+1 202 | REAL_T prim [5][3]; // prim for i, i+1, i+2 203 | }; 204 | 205 | void strip_prime(strip_work *restrict sw, 206 | const REAL_T *restrict rho, 207 | const REAL_T *restrict rhou, 208 | const REAL_T *restrict rhov, 209 | const REAL_T *restrict E, 210 | const hydro *restrict h, 211 | const int stride, 212 | const REAL_T dtdx); 213 | 214 | REAL_T strip_stable(const hydro *restrict h, 215 | REAL_T *restrict rho, 216 | REAL_T *restrict rhou, 217 | REAL_T *restrict rhov, 218 | REAL_T *restrict E, 219 | strip_work *restrict sw, 220 | const int i, 221 | const int stride, 222 | const REAL_T dtdx, 223 | const bool do_courant); 224 | 225 | // vector core functions 226 | void vconservative_to_primitive( VREAL_T *restrict prim_rho, VREAL_T *restrict inv_prim_rho, VREAL_T *restrict prim_u, VREAL_T *restrict prim_v, VREAL_T *restrict E_internal, 227 | const VREAL_T cons_rho, const VREAL_T cons_rhou, const VREAL_T cons_rhov, const VREAL_T cons_E); 228 | VREAL_T vequation_of_state(const VREAL_T rho, 229 | const VREAL_T E_internal); 230 | VREAL_T vspeed_of_sound(const VREAL_T inv_rho, 231 | const VREAL_T p); 232 | VREAL_T vslope(const VREAL_T nbv_m, const VREAL_T nbv_0, const VREAL_T nbv_p, 233 | const VREAL_T slope_type, const VREAL_T inv_slope_type); 234 | void vflux( VREAL_T *restrict flux_rho, VREAL_T *restrict flux_u, VREAL_T *restrict flux_v, VREAL_T *restrict flux_p, 235 | const VREAL_T rho, const VREAL_T inv_rho, const VREAL_T u, const VREAL_T v, const VREAL_T p, 236 | const VREAL_T sp_m, const VREAL_T sp_0, const VREAL_T sp_p, 237 | const VREAL_T alpha_m, const VREAL_T alpha_0r, const VREAL_T alpha_0v, const VREAL_T alpha_p, 238 | const VREAL_T c); 239 | void vtrace( VREAL_T *restrict flux_rho_m, VREAL_T *restrict flux_u_m, VREAL_T *restrict flux_v_m, VREAL_T *restrict flux_p_m, 240 | VREAL_T *restrict flux_rho_p, VREAL_T *restrict flux_u_p, VREAL_T *restrict flux_v_p, VREAL_T *restrict flux_p_p, 241 | const VREAL_T rho, const VREAL_T inv_rho, const VREAL_T u, const VREAL_T v, const VREAL_T p, 242 | const VREAL_T drho, const VREAL_T du, const VREAL_T dv, const VREAL_T dp, 243 | const VREAL_T c, const VREAL_T inv_c, 244 | const VREAL_T dtdx); 245 | void vriemann( VREAL_T *restrict gdnv_rho, VREAL_T *restrict gdnv_u, VREAL_T *restrict gdnv_v, VREAL_T *restrict gdnv_p, 246 | const VREAL_T in_left_rho, const VREAL_T in_left_u, const VREAL_T in_left_v, const VREAL_T in_left_p, 247 | const VREAL_T in_right_rho, const VREAL_T in_right_u, const VREAL_T in_right_v, const VREAL_T in_right_p); 248 | void vcmpflx( VREAL_T *restrict flux_rho, VREAL_T *restrict flux_rhou, VREAL_T *restrict flux_rhov, VREAL_T *restrict flux_E, 249 | const VREAL_T gdnv_rho, const VREAL_T gdnv_u, const VREAL_T gdnv_v, const VREAL_T gdnv_p); 250 | VREAL_T vupdate(const VREAL_T in, 251 | const VREAL_T flux_left, const VREAL_T flux_right, 252 | const VREAL_T dtdx); 253 | void vcourant( VREAL_T *restrict courantv, 254 | const VREAL_T u, const VREAL_T v, 255 | const VREAL_T c, 256 | const VMASK_T write_mask); 257 | 258 | struct vstrip_work 259 | { 260 | VREAL_T flux [4][2]; // flux at i-1/2, i+1/2 261 | VREAL_T left_flux [4][2]; // left_flux at i, i+1 262 | VREAL_T prim [5][3]; // prim for i, i+1, i+2 263 | }; 264 | 265 | void vstrip_prime( vstrip_work *restrict sw, 266 | const REAL_T *restrict rho, 267 | const REAL_T *restrict rhou, 268 | const REAL_T *restrict rhov, 269 | const REAL_T *restrict E, 270 | const hydro *restrict h, 271 | const int stride, 272 | const VREAL_T dtdx); 273 | 274 | VREAL_T vstrip_stable(const hydro *restrict h, 275 | REAL_T *restrict rho, 276 | REAL_T *restrict rhou, 277 | REAL_T *restrict rhov, 278 | REAL_T *restrict E, 279 | vstrip_work *restrict sw, 280 | const int i, 281 | const int stride, 282 | const VREAL_T dtdx, 283 | const VMASK_T write_mask, 284 | const bool do_courant); 285 | 286 | VREAL_T hstrip_stable(const hydro *restrict h, 287 | REAL_T *restrict rho, 288 | REAL_T *restrict rhou, 289 | REAL_T *restrict rhov, 290 | REAL_T *restrict E, 291 | vstrip_work *restrict sw, 292 | const int i, 293 | const int stride, 294 | const VREAL_T dtdx, 295 | const VMASK_T write_mask, 296 | const bool do_courant); 297 | 298 | #endif /* __PCL_HYDRO_HPP__ */ 299 | -------------------------------------------------------------------------------- /examples/hydro2d/test.nml: -------------------------------------------------------------------------------- 1 | This namelist contains various input parameters for HYDRO runs 2 | 3 | &RUN 4 | tend=50 5 | #noutput=10 6 | nstepmax=100 7 | dtoutput=2. 8 | / 9 | 10 | &MESH 11 | nx=256 12 | ny=256 13 | nxystep=125 14 | prt=0 15 | dx=0.05 16 | boundary_left=1 17 | boundary_right=1 18 | boundary_down=1 19 | boundary_up=1 20 | testcase=1 21 | / 22 | 23 | &HYDRO 24 | courant_factor=0.8 25 | niter_riemann=10 26 | / 27 | -------------------------------------------------------------------------------- /examples/hydro2d/timeseries.cpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/timeseries.cpp : timeseries read/write code 2 | 3 | (C) Jason Sewall : Intel -- initial version 4 | */ 5 | /* 6 | This software is governed by the CeCILL license under French law and 7 | abiding by the rules of distribution of free software. You can use, 8 | modify and/ or redistribute the software under the terms of the CeCILL 9 | license as circulated by CEA, CNRS and INRIA at the following URL 10 | "http://www.cecill.info". 11 | 12 | As a counterpart to the access to the source code and rights to copy, 13 | modify and redistribute granted by the license, users are provided only 14 | with a limited warranty and the software's author, the holder of the 15 | economic rights, and the successive licensors have only limited 16 | liability. 17 | 18 | In this respect, the user's attention is drawn to the risks associated 19 | with loading, using, modifying and/or developing or reproducing the 20 | software by the user in light of its specific status of free software, 21 | that may mean that it is complicated to manipulate, and that also 22 | therefore means that it is reserved for developers and experienced 23 | professionals having in-depth computer knowledge. Users are therefore 24 | encouraged to load and test the software's suitability as regards their 25 | requirements in conditions enabling the security of their systems and/or 26 | data to be ensured and, more generally, to use and operate it in the 27 | same conditions as regards security. 28 | 29 | The fact that you are presently reading this means that you have had 30 | knowledge of the CeCILL license and that you accept its terms. 31 | */ 32 | 33 | #include "timeseries.hpp" 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #undef EXTEND_ARRAY 44 | 45 | inline void xdie(const char *fmt, ...) 46 | { 47 | va_list val; 48 | va_start(val, fmt); 49 | vfprintf(stderr, fmt, val); 50 | va_end(val); 51 | exit(EXIT_FAILURE); 52 | } 53 | 54 | static void make_path(const char *str) 55 | { 56 | char buff[1024]; 57 | memset(buff, 0, sizeof(char)*1024); 58 | char *current = buff; 59 | while(*str) 60 | { 61 | *current = *str; 62 | ++current; 63 | if(current - buff >= 1023) 64 | xdie("Prefix path is too long (allow 1023, got %d)\n", current-buff); 65 | if(*str == '/') 66 | { 67 | *current = 0; 68 | int dirres = mkdir(buff, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); 69 | if(dirres == -1 && errno != EEXIST) 70 | { 71 | perror("mkdir"); 72 | xdie("mkdir failed!"); 73 | } 74 | } 75 | ++str; 76 | } 77 | } 78 | 79 | bool timeseries_writer::initialize(const char *in_prefix, size_t mps) 80 | { 81 | max_pack_size = mps; 82 | prefix = xstrdup(in_prefix, "prefix"); 83 | 84 | make_path(prefix); 85 | 86 | char buff[1024]; 87 | memset(buff, 0, sizeof(char)*1024); 88 | xsnprintf(buff, 1023, "%s.idx", prefix); 89 | 90 | index_file = xfopen_write(buff, "w"); 91 | if(!index_file) 92 | return false; 93 | 94 | current_pack = 0; 95 | current_pack_name = 0; 96 | pack_no = -1; 97 | 98 | current_entry_str[0] = 0; 99 | 100 | return true; 101 | } 102 | 103 | void timeseries_writer::finish() 104 | { 105 | if(current_pack) 106 | { 107 | fprintf(index_file, "%s %zu\n", current_entry_str, current_pack_size); 108 | fclose(current_pack); 109 | } 110 | fclose(index_file); 111 | 112 | free(prefix); 113 | free(current_pack_name); 114 | } 115 | 116 | static bool flush_frame(timeseries_writer *tw) 117 | { 118 | if(tw->current_entry_str[0]) 119 | { 120 | assert(tw->current_pack); 121 | fprintf(tw->index_file, "%s %zu\n", tw->current_entry_str, tw->current_pack_size); 122 | fflush(tw->index_file); 123 | tw->current_entry_str[0] = 0; 124 | return true; 125 | } 126 | return false; 127 | } 128 | 129 | bool timeseries_writer::comment(const char *str) 130 | { 131 | flush_frame(this); 132 | 133 | if(*str) 134 | fputs("# ", index_file); 135 | 136 | for(; *str; ++str) 137 | { 138 | fputc(*str, index_file); 139 | if(*str == '\n') 140 | { 141 | if(*(str + 1)) 142 | fputs("# ", index_file); 143 | else 144 | return true; 145 | } 146 | } 147 | fputc('\n', index_file); 148 | fflush(index_file); 149 | return true; 150 | } 151 | 152 | static bool check_file(timeseries_writer *tw, size_t size_hint) 153 | { 154 | flush_frame(tw); 155 | 156 | if(!tw->current_pack || (size_hint < tw->max_pack_size && tw->current_pack_size + size_hint > tw->max_pack_size)) 157 | { 158 | if(tw->current_pack) 159 | { 160 | fclose(tw->current_pack); 161 | free(tw->current_pack_name); 162 | } 163 | 164 | char buff[1024]; 165 | memset(buff, 0, sizeof(char)*1024); 166 | xsnprintf(buff, 1023, "%s%05d.pak", tw->prefix, ++tw->pack_no); 167 | tw->current_pack_name = xstrdup(basename(buff), "pack name"); 168 | tw->current_pack = xfopen_write(buff, "wb"); 169 | if(!tw->current_pack) 170 | return false; 171 | tw->current_pack_size = 0; 172 | } 173 | return true; 174 | } 175 | 176 | bool timeseries_writer::new_frame(double t, size_t size_hint) 177 | { 178 | if(!check_file(this, size_hint)) 179 | return false; 180 | 181 | assert(current_entry_str[0] == 0); 182 | xsnprintf(current_entry_str, 1023, "f %20.14lf %s %zu", t, current_pack_name, current_pack_size); 183 | return true; 184 | } 185 | 186 | bool timeseries_writer::new_static(const char *name, size_t size_hint) 187 | { 188 | if(!check_file(this, size_hint)) 189 | return false; 190 | 191 | assert(current_entry_str[0] == 0); 192 | xsnprintf(current_entry_str, 1023, "s %s %s %zu", name, current_pack_name, current_pack_size); 193 | return true; 194 | } 195 | 196 | size_t timeseries_writer::append(const void *data, size_t data_size) 197 | { 198 | if(current_entry_str[0] == 0) 199 | return 0; 200 | size_t wrote = fwrite(data, data_size, 1, current_pack); 201 | current_pack_size += wrote*data_size; 202 | return wrote*data_size; 203 | } 204 | 205 | bool timeseries_reader::load(const char *index_filename) 206 | { 207 | index_file = xfopen_read(index_filename, "r"); 208 | if(!index_file) 209 | return false; 210 | struct stat st; 211 | int stat_res = fstat(fileno(index_file), &st); 212 | if(stat_res != 0) 213 | return false; 214 | if(!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode))) 215 | return false; 216 | 217 | const char *back = strrchr(index_filename, '/'); 218 | prefix = back ? xstrndup(index_filename, back-index_filename+1, "prefix") : xstrdup("", "null"); 219 | 220 | frames = 0; 221 | frames_n = 0; 222 | frames_n_allocd = 0; 223 | 224 | statics = 0; 225 | statics_n = 0; 226 | statics_n_allocd = 0; 227 | 228 | files = 0; 229 | files_n = 0; 230 | files_n_allocd = 0; 231 | 232 | refresh(); 233 | 234 | return true; 235 | } 236 | 237 | #define EXTEND_ARRAY(name, num, n_allocd) \ 238 | if(name##_n + num >= n_allocd) \ 239 | { \ 240 | n_allocd = (name##_n + num)*2; \ 241 | void *m = realloc(name, sizeof(name[0])*n_allocd); \ 242 | name = (typeof(name)) m; \ 243 | } 244 | 245 | static bool read_frame(timeseries_reader *tsr, char *file, size_t *low_offset, size_t *high_offset) 246 | { 247 | EXTEND_ARRAY(tsr->frames, 1, tsr->frames_n_allocd); 248 | int num_read = fscanf(tsr->index_file, "%lf %1023s %zu %zu", &tsr->frames[tsr->frames_n].t, file, &tsr->frames[tsr->frames_n].start_offset, &tsr->frames[tsr->frames_n].end_offset); 249 | if(num_read == 4) 250 | { 251 | *low_offset = tsr->frames[tsr->frames_n].start_offset; 252 | *high_offset = tsr->frames[tsr->frames_n].end_offset; 253 | return true; 254 | } 255 | 256 | return false; 257 | } 258 | 259 | static bool read_static(timeseries_reader *tsr, char *file, size_t *low_offset, size_t *high_offset) 260 | { 261 | EXTEND_ARRAY(tsr->statics, 1, tsr->statics_n_allocd); 262 | char buff[1024]; 263 | memset(buff, 0, sizeof(char)*1024); 264 | int num_read = fscanf(tsr->index_file, "%1023s %1023s %zu %zu", buff, file, &tsr->statics[tsr->statics_n].start_offset, &tsr->statics[tsr->statics_n].end_offset); 265 | if(num_read == 4) 266 | { 267 | tsr->statics[tsr->statics_n].name = xstrdup(buff, "static name"); 268 | *low_offset = tsr->statics[tsr->statics_n].start_offset; 269 | *high_offset = tsr->statics[tsr->statics_n].end_offset; 270 | return true; 271 | } 272 | 273 | return false; 274 | } 275 | 276 | int timeseries_reader::refresh() 277 | { 278 | char buff[1024]; 279 | memset(buff, 0, sizeof(char)*1024); 280 | int nread = 0; 281 | int back_file = files_n; 282 | while(!feof(index_file)) 283 | { 284 | off64_t last_offs = ftello64(index_file); 285 | 286 | size_t low_offset = 0; 287 | size_t high_offset = 0; 288 | static volatile int *file_no; 289 | char current = fgetc(index_file); 290 | bool reset = false; 291 | switch(current) 292 | { 293 | case '#': 294 | while(!feof(index_file) && current != '\n') 295 | current = fgetc(index_file); 296 | if(current == '\n') 297 | continue; 298 | reset = true; 299 | break; 300 | case 'f': 301 | { 302 | bool okay = read_frame(this, buff, &low_offset, &high_offset); 303 | if(okay) 304 | { 305 | file_no = &(frames[frames_n].file_no); 306 | ++frames_n; 307 | ++nread; 308 | } 309 | reset = !okay; 310 | } 311 | break; 312 | case 's': 313 | { 314 | bool okay = read_static(this, buff, &low_offset, &high_offset); 315 | if(okay) 316 | { 317 | file_no = &(statics[statics_n].file_no); 318 | ++statics_n; 319 | ++nread; 320 | } 321 | reset = !okay; 322 | } 323 | break; 324 | } 325 | 326 | if(reset) 327 | { 328 | fseeko64(index_file, last_offs, SEEK_SET); 329 | break; 330 | } 331 | 332 | if(!files_n || strcmp(files[files_n-1].name, buff) != 0) 333 | { 334 | EXTEND_ARRAY(files, 1, files_n_allocd); 335 | files[files_n].name = xstrdup(buff, "file name"); 336 | files[files_n].fp = 0; 337 | files[files_n].map_bytes = 0; 338 | files[files_n].map_base = 0; 339 | files[files_n].lowest_offset = low_offset; 340 | files[files_n].highest_offset = high_offset; 341 | ++files_n; 342 | } 343 | else 344 | { 345 | files[files_n-1].lowest_offset = std::min(files[files_n-1].lowest_offset, low_offset); 346 | files[files_n-1].highest_offset = std::max(files[files_n-1].highest_offset, high_offset); 347 | } 348 | // currently, we assume that files appear in strictly increasing order in the index file 349 | *file_no = files_n-1; 350 | } 351 | 352 | if(back_file) 353 | { 354 | if(files[back_file-1].map_bytes != files[back_file-1].highest_offset) 355 | { 356 | munmap(files[back_file-1].map_base, files[back_file-1].map_bytes); 357 | void *new_map = mmap(files[back_file-1].map_base, files[back_file-1].highest_offset, PROT_READ, MAP_PRIVATE, fileno(files[back_file-1].fp), 0); 358 | if(new_map == (void*)-1) 359 | { 360 | perror("mmap"); 361 | xdie("Couldn't mmap file: %s\n", files[back_file-1].name); 362 | } 363 | files[back_file-1].map_bytes = files[back_file-1].highest_offset; 364 | } 365 | } 366 | 367 | for(int fi = back_file; fi < files_n; ++fi) 368 | { 369 | char buff[1024]; 370 | xsnprintf(buff, 1023, "%s%s", prefix, files[fi].name); 371 | files[fi].fp = xfopen_read(buff, "r"); 372 | if(!files[fi].fp) 373 | { 374 | perror("fopen"); 375 | xdie("Couldn't open file: %s\n", buff); 376 | } 377 | 378 | files[fi].map_bytes = files[fi].highest_offset; 379 | files[fi].map_base = mmap(0, files[fi].map_bytes, PROT_READ, MAP_PRIVATE, fileno(files[fi].fp), 0); 380 | if(files[fi].map_base == (void*)-1) 381 | { 382 | perror("mmap"); 383 | xdie("Couldn't mmap file: %s\n", buff); 384 | } 385 | } 386 | 387 | return nread; 388 | } 389 | 390 | const void *timeseries_reader::get_frame(int frameno, double *t, size_t *size) const 391 | { 392 | const frame_entry *fr = frames + frameno; 393 | const file_entry *fi = files + fr->file_no; 394 | *t = fr->t; 395 | *size = fr->end_offset - fr->start_offset; 396 | return (const char*)fi->map_base + fr->start_offset; 397 | } 398 | 399 | const void *timeseries_reader::get_static(int staticno, const char **name, size_t *size) const 400 | { 401 | const static_entry *st = statics + staticno; 402 | const file_entry *fi = files + st->file_no; 403 | *name = st->name; 404 | *size = st->end_offset - st->start_offset; 405 | return (const char*)fi->map_base + st->start_offset; 406 | } 407 | 408 | const void *timeseries_reader::get_static(const char *name, size_t *size) const 409 | { 410 | const char *outname; 411 | for(int i = 0; i < statics_n; ++i) 412 | if(strcmp(statics[i].name, name) == 0) 413 | return get_static(i, &outname, size); 414 | 415 | *size = 0; 416 | return 0; 417 | } 418 | -------------------------------------------------------------------------------- /examples/hydro2d/timeseries.hpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/timeseries.hpp : timeseries read/write code 2 | 3 | (C) Jason Sewall : Intel -- initial version 4 | */ 5 | /* 6 | This software is governed by the CeCILL license under French law and 7 | abiding by the rules of distribution of free software. You can use, 8 | modify and/ or redistribute the software under the terms of the CeCILL 9 | license as circulated by CEA, CNRS and INRIA at the following URL 10 | "http://www.cecill.info". 11 | 12 | As a counterpart to the access to the source code and rights to copy, 13 | modify and redistribute granted by the license, users are provided only 14 | with a limited warranty and the software's author, the holder of the 15 | economic rights, and the successive licensors have only limited 16 | liability. 17 | 18 | In this respect, the user's attention is drawn to the risks associated 19 | with loading, using, modifying and/or developing or reproducing the 20 | software by the user in light of its specific status of free software, 21 | that may mean that it is complicated to manipulate, and that also 22 | therefore means that it is reserved for developers and experienced 23 | professionals having in-depth computer knowledge. Users are therefore 24 | encouraged to load and test the software's suitability as regards their 25 | requirements in conditions enabling the security of their systems and/or 26 | data to be ensured and, more generally, to use and operate it in the 27 | same conditions as regards security. 28 | 29 | The fact that you are presently reading this means that you have had 30 | knowledge of the CeCILL license and that you accept its terms. 31 | */ 32 | 33 | #ifndef __TIMESERIES_HPP__ 34 | #define __TIMESERIES_HPP__ 35 | 36 | #include "array-macros.hpp" 37 | 38 | struct timeseries_writer 39 | { 40 | bool initialize(const char *in_prefix, size_t max_pack_size); 41 | void finish(); 42 | size_t append(const void *data, size_t data_size); 43 | bool comment(const char *str); 44 | bool new_frame(double t, size_t size_hint); 45 | bool new_static(const char *name, size_t size_hint); 46 | 47 | char *prefix; 48 | size_t max_pack_size; 49 | 50 | FILE *index_file; 51 | 52 | FILE *current_pack; 53 | char *current_pack_name; 54 | size_t current_pack_size; 55 | int pack_no; 56 | 57 | char current_entry_str[1024]; 58 | }; 59 | 60 | struct file_entry 61 | { 62 | char *name; 63 | FILE *fp; 64 | size_t map_bytes; 65 | void *map_base; 66 | size_t lowest_offset; 67 | size_t highest_offset; 68 | }; 69 | 70 | struct frame_entry 71 | { 72 | double t; 73 | size_t start_offset; 74 | size_t end_offset; 75 | int file_no; 76 | }; 77 | 78 | struct static_entry 79 | { 80 | char *name; 81 | size_t start_offset; 82 | size_t end_offset; 83 | int file_no; 84 | }; 85 | 86 | struct timeseries_reader 87 | { 88 | bool load(const char *index_file); 89 | int refresh(); 90 | 91 | const void *get_frame(int frameno, double *t, size_t *size) const; 92 | const void *get_static(int staticno, const char **name, size_t *size) const; 93 | const void *get_static(const char *name, size_t *size) const; 94 | 95 | char *prefix; 96 | FILE *index_file; 97 | frame_entry *frames; 98 | int frames_n; 99 | int frames_n_allocd; 100 | 101 | static_entry *statics; 102 | int statics_n; 103 | int statics_n_allocd; 104 | file_entry *files; 105 | int files_n; 106 | int files_n_allocd; 107 | }; 108 | #endif 109 | -------------------------------------------------------------------------------- /examples/hydro2d/vtkfile.cpp: -------------------------------------------------------------------------------- 1 | /* examples/hydro2d/vtkfile.cpp : vtk output 2 | 3 | (C) Romain Teyssier : CEA/IRFU -- original F90 code 4 | (C) Pierre-Francois Lavallee : IDRIS -- original F90 code 5 | (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version 6 | (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86 7 | (C) John Pennycook : Intel -- augmentations to above version 8 | */ 9 | /* 10 | This software is governed by the CeCILL license under French law and 11 | abiding by the rules of distribution of free software. You can use, 12 | modify and/ or redistribute the software under the terms of the CeCILL 13 | license as circulated by CEA, CNRS and INRIA at the following URL 14 | "http://www.cecill.info". 15 | 16 | As a counterpart to the access to the source code and rights to copy, 17 | modify and redistribute granted by the license, users are provided only 18 | with a limited warranty and the software's author, the holder of the 19 | economic rights, and the successive licensors have only limited 20 | liability. 21 | 22 | In this respect, the user's attention is drawn to the risks associated 23 | with loading, using, modifying and/or developing or reproducing the 24 | software by the user in light of its specific status of free software, 25 | that may mean that it is complicated to manipulate, and that also 26 | therefore means that it is reserved for developers and experienced 27 | professionals having in-depth computer knowledge. Users are therefore 28 | encouraged to load and test the software's suitability as regards their 29 | requirements in conditions enabling the security of their systems and/or 30 | data to be ensured and, more generally, to use and operate it in the 31 | same conditions as regards security. 32 | 33 | The fact that you are presently reading this means that you have had 34 | knowledge of the CeCILL license and that you accept its terms. 35 | */ 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include "arch.hpp" 42 | #include "array-macros.hpp" 43 | 44 | typedef unsigned char byte; 45 | 46 | static const char s_CharPlusSign = '+'; 47 | static const char s_CharSlash = '/'; 48 | 49 | static char SixBitToChar(byte b); 50 | static char *ToBase64(unsigned char *data, int length); 51 | 52 | static char SixBitToChar(byte b) { 53 | char c; 54 | if (b < 26) { 55 | c = (char) ((int) b + (int) 'A'); 56 | } else if (b < 52) { 57 | c = (char) ((int) b - 26 + (int) 'a'); 58 | } else if (b < 62) { 59 | c = (char) ((int) b - 52 + (int) '0'); 60 | } else if (b == 62) { 61 | c = s_CharPlusSign; 62 | } else { 63 | c = s_CharSlash; 64 | } 65 | return c; 66 | } 67 | 68 | static char *ToBase64(unsigned char *data, int length) { 69 | int padding = length % 3; 70 | int blocks = (length - 1) / 3 + 1; 71 | size_t lalloc; 72 | char *s; 73 | int i; 74 | 75 | if (length == 0) 76 | return NULL; 77 | 78 | if (padding > 0) 79 | padding = 3 - padding; 80 | 81 | // lalloc = (blocks * 4 + 1 + 16); 82 | lalloc = blocks; 83 | lalloc *= 4; 84 | lalloc += 17; 85 | 86 | s = (char*)malloc(lalloc); 87 | if (s == NULL) { 88 | fprintf(stderr, "Length=%d, blocks=%d lalloc=%ld\n", length, blocks, lalloc); 89 | exit(1); 90 | } 91 | 92 | for (i = 0; i < blocks; i++) { 93 | bool finalBlock = i == blocks - 1; 94 | bool pad2 = false; 95 | bool pad1 = false; 96 | if (finalBlock) { 97 | pad2 = padding == 2; 98 | pad1 = padding > 0; 99 | } 100 | 101 | int index = i * 3; 102 | byte b1 = data[index]; 103 | byte b2 = pad2 ? (byte) 0 : data[index + 1]; 104 | byte b3 = pad1 ? (byte) 0 : data[index + 2]; 105 | 106 | byte temp1 = (byte) ((b1 & 0xFC) >> 2); 107 | 108 | byte temp = (byte) ((b1 & 0x03) << 4); 109 | byte temp2 = (byte) ((b2 & 0xF0) >> 4); 110 | temp2 += temp; 111 | 112 | temp = (byte) ((b2 & 0x0F) << 2); 113 | byte temp3 = (byte) ((b3 & 0xC0) >> 6); 114 | temp3 += temp; 115 | 116 | byte temp4 = (byte) (b3 & 0x3F); 117 | 118 | index = i * 4; 119 | s[index] = SixBitToChar(temp1); 120 | s[index + 1] = SixBitToChar(temp2); 121 | s[index + 2] = pad2 ? '=' : SixBitToChar(temp3); 122 | s[index + 3] = pad1 ? '=' : SixBitToChar(temp4); 123 | } 124 | s[blocks * 4] = (byte) 0; 125 | return s; 126 | } 127 | 128 | #define BINARY 1 129 | #undef MPI 130 | static void vtkwpvd(int nout, char *r) { 131 | char n[1024]; 132 | char vfname[1024]; 133 | int i; 134 | FILE *vf = NULL; 135 | char tmp[10]; 136 | 137 | vf = xfopen_write("Hydro.pvd", "w"); 138 | if(vf == NULL) 139 | { 140 | fprintf(stderr, "Can't write to Hydro.pvd\n"); 141 | exit(1); 142 | } 143 | 144 | fprintf(vf, "\n"); 145 | fprintf(vf, " \n"); 146 | fprintf(vf, " \n"); 147 | 148 | for (i = 1; i <= nout; i++) { 149 | xsnprintf(tmp, 9, "%06d", i); 150 | xsnprintf(n, 1023, "Dep/%c%c%c%c", tmp[0], tmp[1], tmp[2], tmp[3]); 151 | xsnprintf(n, 1023, "%s/%c%c", n, tmp[4], tmp[5]); 152 | xsnprintf(vfname, 1023, "%s/Hydro_%04d.pvtr", n, i); 153 | fprintf(vf, " \n", i, vfname); 154 | } 155 | 156 | fprintf(vf, " \n"); 157 | fprintf(vf, "\n"); 158 | fclose(vf); 159 | } 160 | 161 | static void vtknm(char *n, size_t len, int me, int nout) { 162 | char tmp[10]; 163 | 164 | xsnprintf(tmp, 9, "%06d", nout); 165 | xsnprintf(n, len, "Dep"); 166 | if (me == 0) { 167 | mkdir(n, 0777); 168 | } 169 | xsnprintf(n, len, "%s/%c%c%c%c", n, tmp[0], tmp[1], tmp[2], tmp[3]); 170 | if (me == 0) { 171 | mkdir(n, 0777); 172 | } 173 | xsnprintf(n, len, "%s/%c%c", n, tmp[4], tmp[5]); 174 | 175 | if (me == 0) { 176 | mkdir(n, 0777); 177 | } 178 | } 179 | 180 | void vtkfile(int step, const REAL_T *q, const int n[2], const int padding, const int ystride, const int varstride, const double dx) { 181 | char name[1024]; 182 | char vfrname[1024]; 183 | FILE *fic, *vf; 184 | int i, j, nv; 185 | 186 | enum {ID = 0, IU = 1, IV = 2, IP = 3}; 187 | 188 | // First step : create the directory structure ONLY using PE0 189 | #ifdef MPI 190 | if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD); 191 | #endif 192 | vtknm(vfrname, 1023, 0, step); // create the directory structure 193 | // if (0 == 0) fprintf(stderr, "%s\n", vfrname); 194 | #ifdef MPI 195 | if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD); 196 | #endif 197 | 198 | // Write a domain per PE 199 | xsnprintf(name, 1023, "%s/Hydro_%05d_%04d.vtr", vfrname, 0, step); 200 | fic = xfopen_write(name, "w"); 201 | if (fic == NULL) { 202 | fprintf(stderr, "Ouverture du fichier %s impossible\n", name); 203 | exit(1); 204 | } 205 | fprintf(fic, "\n"); 206 | fprintf(fic, "\n"); 207 | fprintf(fic, " \n", 208 | 0, n[0], 0, n[1], 0, 1); 209 | fprintf(fic, " \n", 210 | 0, n[0], 0, n[1], 0, 1); 211 | fprintf(fic, " \n"); 212 | 213 | fprintf(fic, " \n"); 214 | for (i = 0; i <= n[0]; i++) { 215 | fprintf(fic, "%f ", i * dx); 216 | } 217 | fprintf(fic, "\n"); 218 | fprintf(fic, " \n"); 219 | fprintf(fic, " \n"); 220 | for (j = 0; j <= n[1]; j++) { 221 | fprintf(fic, "%f ", j * dx); 222 | } 223 | fprintf(fic, "\n"); 224 | fprintf(fic, " \n"); 225 | fprintf(fic, " \n"); 226 | fprintf(fic, "%f %f\n", 0., 1. * dx); 227 | fprintf(fic, " \n"); 228 | fprintf(fic, " \n"); 229 | name[0] = 0; 230 | for (nv = 0; nv <= IP; nv++) { 231 | if (nv == ID) 232 | snprintf(name, 1023, "%s varID", name); 233 | if (nv == IU) 234 | snprintf(name, 1023, "%s varIU", name); 235 | if (nv == IV) 236 | snprintf(name, 1023, "%s varIV", name); 237 | if (nv == IP) 238 | snprintf(name, 1023, "%s varIP", name); 239 | } 240 | 241 | // declaration of the variable list 242 | fprintf(fic, " \n", name); 243 | name[0] = 0; 244 | for (nv = 0; nv <= IP; nv++) { 245 | if (nv == ID) 246 | snprintf(name, 1023, "varID"); 247 | if (nv == IU) 248 | snprintf(name, 1023, "varIU"); 249 | if (nv == IV) 250 | snprintf(name, 1023, "varIV"); 251 | if (nv == IP) 252 | snprintf(name, 1023, "varIP"); 253 | 254 | //Definition of the cell values 255 | #if BINARY == 1 256 | fprintf(fic, 257 | " \n", 258 | name); 259 | { 260 | // float tuold[h->net_n[0] * h->net_n[1]]; 261 | float *tuold = NULL; 262 | char *r64; 263 | size_t p = 0, lst; 264 | 265 | assert((n[0] * n[1]) > 0); 266 | tuold = (float *) calloc(n[0] * n[1] + 16, sizeof(float)); 267 | assert(tuold != NULL); 268 | 269 | for (j = 0; j < n[1]; j++) { 270 | for (i = 0; i < n[0]; i++) { 271 | tuold[p++] = (float) q[nv * varstride + (j + padding) * ystride + i + padding]; 272 | } 273 | } 274 | // Header = size of the following items 275 | assert(p <= n[0] * n[1]); 276 | 277 | p *= sizeof(float); 278 | r64 = ToBase64((byte *) & p, sizeof(int)); 279 | lst = strlen(r64); 280 | fwrite(r64, 1, lst, fic); 281 | free(r64); 282 | r64 = ToBase64((byte *) tuold, p); 283 | lst = strlen(r64); 284 | fwrite(r64, 1, lst, fic); 285 | free(r64); 286 | free(tuold); 287 | } 288 | #else 289 | fprintf(fic, " \n", name); 290 | 291 | // the image is the interior of the computed domain 292 | for (j = 0; j < n[1]; j++) { 293 | for (i = 0; i < n[0]; i++) { 294 | fprintf(fic, "%lf ", q[nv * (nt[0]*nt[1]) + (j + padding) * nt[0] + i + padding]); 295 | } 296 | fprintf(fic, "\n"); 297 | } 298 | #endif 299 | fprintf(fic, " \n"); 300 | } 301 | fprintf(fic, " \n"); 302 | fprintf(fic, " \n"); 303 | fprintf(fic, " \n"); 304 | fprintf(fic, "\n"); 305 | fclose(fic); 306 | 307 | // At this stage we can write VTK containers. Since only one file is 308 | // necessary even for multiple domains, it has to be written by one 309 | // PE only. 310 | 311 | #ifdef MPI 312 | if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD); 313 | #endif 314 | if (0 == 0) { 315 | xsnprintf(name, 1023, "outputvtk_%05d.pvtr", step); 316 | xsnprintf(name, 1023, "%s/Hydro_%04d.pvtr", vfrname, step); 317 | vf = xfopen_write(name, "w"); 318 | if (vf == NULL) { 319 | fprintf(stderr, "Ouverture du fichier %s impossible\n", name); 320 | exit(1); 321 | } 322 | fprintf(vf, "\n"); 323 | fprintf(vf, "\n"); 324 | fprintf(vf, "\n", n[0], n[1], 1); 325 | fprintf(vf, " \n"); 326 | for (nv = 0; nv <= IP; nv++) { 327 | name[0] = '\0'; 328 | if (nv == ID) 329 | xsnprintf(name, 1023, "varID"); 330 | if (nv == IU) 331 | xsnprintf(name, 1023, "varIU"); 332 | if (nv == IV) 333 | xsnprintf(name, 1023, "varIV"); 334 | if (nv == IP) 335 | xsnprintf(name, 1023, "varIP"); 336 | 337 | #if BINARY == 1 338 | fprintf(vf, 339 | " \n", 340 | name); 341 | #else 342 | fprintf(vf, " \n", name); 343 | #endif 344 | } 345 | fprintf(vf, " \n"); 346 | fprintf(vf, " \n"); 347 | fprintf(vf, " \n"); 348 | fprintf(vf, " \n"); 349 | fprintf(vf, " \n"); 350 | fprintf(vf, " \n"); 351 | for (i = 0; i < 1; i++) { 352 | // int box[8]; 353 | // memset(box, 0, 8 * sizeof(int)); 354 | // CalcSubSurface(0, H.n[0], 0, H.n[1], 0, H.nproc - 1, 0, box, i, 0); 355 | xsnprintf(name, 1023, "Hydro_%05d_%04d.vtr", i, step); 356 | // fprintf(vf, " \n", box[XMIN_BOX], 357 | // box[XMAX_BOX], box[YMIN_BOX], box[YMAX_BOX], 0, 1, name); 358 | fprintf(vf, " \n", 0, n[0], 0, n[1], 0, 1, name); 359 | 360 | } 361 | fprintf(vf, "\n"); 362 | fprintf(vf, "\n"); 363 | fclose(vf); 364 | 365 | // We make the time step available only now to ensure consistency 366 | vtkwpvd(step, "Dep"); 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /examples/laplace5/.gitignore: -------------------------------------------------------------------------------- 1 | generated 2 | reference 3 | laplace-gen.hpp 4 | -------------------------------------------------------------------------------- /examples/laplace5/Makefile: -------------------------------------------------------------------------------- 1 | HFAV_DIR=../../ 2 | HFAVROOT?=$(HFAV_DIR)/hfav 3 | 4 | HFAV=$(HFAV_DIR)/hfav.py 5 | 6 | all: reference generated 7 | 8 | reference: laplace5-test.cpp 9 | icpc -o reference laplace5-test.cpp -fopenmp -restrict -std=c++11 -xHost 10 | 11 | laplace5-gen.hpp: $(HFAV) laplace5.yaml 12 | $(HFAV) laplace5.yaml 13 | 14 | generated: laplace5-test.cpp laplace5-gen.hpp 15 | icpc -o generated laplace5-test.cpp -fopenmp -restrict -DUSE_GEN -std=c++11 -xHost -I$(HFAVROOT)/include 16 | 17 | clean: 18 | rm -rf generated reference laplace5-gen.hpp 19 | -------------------------------------------------------------------------------- /examples/laplace5/laplace5-test.cpp: -------------------------------------------------------------------------------- 1 | // examples/laplace5-test/laplace5-test.cpp; 5-point laplace stencil codegen example 2 | 3 | // Copyright 2017 Intel Corporation 4 | // 5 | // GENERATED CODE EXEMPTION 6 | // 7 | // The output of this tool does not automatically import the Apache 8 | // 2.0 license, except the output will continue to be subject to the 9 | // limitation of liability clause in the Apache 2.0 license. Users may 10 | // license their output under any license they choose but the liability 11 | // of the authors of the tool for that output is governed by the 12 | // limitation of liability clause in the Apache 2.0 license. 13 | // 14 | // Licensed under the Apache License, Version 2.0 (the "License"); 15 | // you may not use this file except in compliance with the License. 16 | // You may obtain a copy of the License at 17 | // 18 | // http://www.apache.org/licenses/LICENSE-2.0 19 | // 20 | // Unless required by applicable law or agreed to in writing, software 21 | // distributed under the License is distributed on an "AS IS" BASIS, 22 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | // See the License for the specific language governing permissions and 24 | // limitations under the License. 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | static int GD; 35 | 36 | static double h; 37 | static double h_inv; 38 | 39 | static void laplace5_resid(double rhs, double n, double ne, double e, double se, double s, double sw, double w, double nw, double self, double* out) 40 | { 41 | *out = rhs - h_inv*h_inv*(n + e + s + w - 4*self); 42 | } 43 | 44 | static double L2norm(const int GD, const double in[restrict][GD+2]) 45 | { 46 | double res = 0.0; 47 | for(int j = 1; j < GD+1; ++j) 48 | { 49 | for(int i = 1; i < GD+1; ++i) 50 | { 51 | double nv; 52 | laplace5_resid(0.0, in[j-1][i], in[j-1][i+1], in[j][i+1], in[j+1][i+1], in[j+1][i], in[j+1][i-1], in[j][i-1], in[j-1][i-1], in[j][i], &nv); 53 | 54 | res += nv*nv; 55 | } 56 | } 57 | return std::sqrt(res); 58 | } 59 | 60 | static double omega = 2.0/3.0; 61 | 62 | static void laplace5(double n, double e, double s, double w, double self, double* out) 63 | { 64 | *out = (1.0 - omega) * self + omega*h*h/4.0*(0.0 - h_inv*h_inv*(n + e + s + w)); 65 | } 66 | 67 | #ifdef USE_GEN 68 | #define VLEN 4 69 | #include "hfav/c99-rotate.h" 70 | #include "laplace5-gen.hpp" 71 | #endif 72 | 73 | static void test_compute(const int GD, const double in[restrict][GD+2], double out[restrict][GD+2]) 74 | { 75 | for(int j = 1; j < GD+1; ++j) 76 | { 77 | for(int i = 1; i < GD+1; ++i) 78 | { 79 | laplace5(in[j-1][i], in[j][i+1], in[j+1][i], in[j][i-1], in[j][i], &out[j][i]); 80 | } 81 | } 82 | } 83 | 84 | int main(int argc, char **argv) 85 | { 86 | if (argc != 3) 87 | { 88 | printf("Usage: %s [# iterations] [size]\n", argv[0]); 89 | exit(EXIT_FAILURE); 90 | } 91 | 92 | const int iterations = atoll(argv[1]); 93 | GD = atoll(argv[2]); 94 | 95 | h = 1.0/GD; 96 | h_inv = 1.0/h; 97 | 98 | // pad by 1 on each side so we don't have to branch in operatorr 99 | double *in = (double*) calloc((GD+2) * (GD+2), sizeof(double)); 100 | double *out = (double*) calloc((GD+2) * (GD+2), sizeof(double)); 101 | 102 | typedef double grid[GD+2][GD+2]; 103 | 104 | srand(12345); 105 | for(int j = 1; j < GD+1; ++j) 106 | { 107 | for(int i = 1; i < GD+1; ++i) 108 | { 109 | in[j*(GD+2) + i] = drand48()*h_inv*h_inv; 110 | } 111 | } 112 | 113 | for(int j = 0; j < GD+2; ++j) 114 | { 115 | in[j*(GD+2) + 0] = 1.0*h*h; 116 | out[j*(GD+2) + 0] = 1.0*h*h; 117 | in[j*(GD+2) + GD+1] = 1.0*h*h; 118 | out[j*(GD+2) + GD+1] = 1.0*h*h; 119 | } 120 | for(int i = 0; i < GD+2; ++i) 121 | { 122 | in [0*(GD+2) + i] = -1.0*h*h; 123 | out[0*(GD+2) + i] = -1.0*h*h; 124 | in [(GD+1)*(GD+2) + i] = -1.0*h*h; 125 | out[(GD+1)*(GD+2) + i] = -1.0*h*h; 126 | } 127 | in [0*(GD+2) + 0] = 0.0; 128 | out[0*(GD+2) + 0] = 0.0; 129 | in [0*(GD+2) + (GD+1)] = 0.0; 130 | out[0*(GD+2) + (GD+1)] = 0.0; 131 | 132 | in [(GD+1)*(GD+2) + 0] = 0.0; 133 | out[(GD+1)*(GD+2) + 0] = 0.0; 134 | in [(GD+1)*(GD+2) + (GD+1)] = 0.0; 135 | out[(GD+1)*(GD+2) + (GD+1)] = 0.0; 136 | 137 | printf("Initial: %30.20le\n", L2norm(GD, (double (*)[GD+2]) in)); 138 | 139 | double start = omp_get_wtime(); 140 | uint64_t start_c = _rdtsc(); 141 | for(int t = 0; t < iterations; ++t) 142 | { 143 | #ifdef USE_GEN 144 | inplace_laplace(GD, (double (*)[GD+2]) in, 1, GD+1, 1, GD+1); 145 | #else 146 | test_compute(GD, (double (*)[GD+2]) in, (double (*)[GD+2]) out); 147 | std::swap(in, out); 148 | #endif 149 | } 150 | 151 | double end = omp_get_wtime(); 152 | uint64_t end_c = _rdtsc(); 153 | printf("Took %le seconds\n", end-start); 154 | printf("Took %le cycles\n", (double)(end_c-start_c)); 155 | 156 | const uint64_t total_cyc = end_c - start_c; 157 | printf("Took %le cycles/iter\n", (double)total_cyc/iterations); 158 | printf("Took %le cycles/iter/cell\n", (double)total_cyc/iterations/(GD*GD)); 159 | 160 | printf("Final %30.20le\n", L2norm(GD, (double (*)[GD+2]) in)); 161 | } 162 | -------------------------------------------------------------------------------- /examples/laplace5/laplace5.yaml: -------------------------------------------------------------------------------- 1 | # Example: laplace5.yaml 2 | # Demonstrates integration of generated laplace5 kernel into application code. 3 | 4 | kernels: 5 | 6 | laplace: 7 | declaration: laplace5(double n, double e, double s, double w, double self, double &out); 8 | inputs: | 9 | n : q?[j?-1][i?] 10 | e : q?[j?][i?+1] 11 | s : q?[j?+1][i?] 12 | w : q?[j?][i?-1] 13 | self : q?[j?][i?] 14 | outputs: | 15 | out : laplace(q?[j?][i?]) 16 | 17 | globals: 18 | 19 | inputs: | 20 | double g_cell[j?][i?] => cell[j?][i?] 21 | outputs: | 22 | laplace(cell[j][i]) => double g_cell[j][i] 23 | 24 | codegen options: 25 | header: | 26 | static void inplace_laplace(const int GD, double g_cell[restrict][GD+2], int istart, int iend, int jstart, int jend) 27 | { 28 | footer: | 29 | } 30 | loops: 31 | - 32 | iter_ident: i 33 | start: istart 34 | end: iend 35 | stride: 1 36 | - 37 | iter_ident: j 38 | start: jstart 39 | end: jend 40 | stride: 1 41 | loop order: [j, i] 42 | language : C99 43 | vector loop: i 44 | prefix : __hfav_ 45 | output file: laplace5-gen.hpp 46 | -------------------------------------------------------------------------------- /examples/literals.yaml: -------------------------------------------------------------------------------- 1 | # Example: literals.yaml 2 | # Demonstrates usage of literals to modify indices passed to functions. 3 | # This functionality has only been tested with very simple expressions, e.g. +/- 1 4 | 5 | kernels: 6 | 7 | update: 8 | declaration: update(int im1, int i, int ip1, double om1, double old, double op1, double &new); 9 | inputs: | 10 | im1: i?-1 11 | i : i? 12 | ip1: i?+1 13 | om1: old[j?][i?-1] 14 | old: old[j?][i?] 15 | op1: old[j?][i?+1] 16 | outputs: | 17 | new: new[j?][i?] 18 | 19 | globals: 20 | 21 | inputs: | 22 | double cell[j?][i?] => old[j?][i?] 23 | outputs: | 24 | new[j][i] => double cell[j][i] 25 | 26 | codegen options: 27 | 28 | loops: 29 | - 30 | iter_ident: i 31 | start: first_i 32 | end: last_i 33 | stride: 1 34 | - 35 | iter_ident: j 36 | start: first__j 37 | end: last__j 38 | stride: 1 39 | 40 | loop order: [j, i] 41 | 42 | language: C99 43 | prefix: __hfav_ 44 | vector loop: None 45 | -------------------------------------------------------------------------------- /examples/reduction.yaml: -------------------------------------------------------------------------------- 1 | # Example: sum.yaml 2 | # Demonstrates usage of reduction(+:) syntax. 3 | 4 | kernels: 5 | 6 | sum: 7 | declaration: sum(double x, double &xSum); 8 | inputs: | 9 | x: x[i][j] 10 | outputs: | 11 | xSum: reduction(+:xSum) 12 | 13 | globals: 14 | 15 | inputs: | 16 | double x[i?][j?] 17 | 18 | outputs: | 19 | double xSum 20 | 21 | codegen options: 22 | 23 | loops: 24 | - 25 | iter_ident: i 26 | start: first_i 27 | end: last_i 28 | stride: 1 29 | - 30 | iter_ident: j 31 | start: first_j 32 | end: last_j 33 | stride: 1 34 | 35 | loop order: [i, j] 36 | 37 | language: C99 38 | prefix: __hfav_ 39 | vector loop: None 40 | -------------------------------------------------------------------------------- /examples/split-loops.yaml: -------------------------------------------------------------------------------- 1 | # Example: split-loops.yaml 2 | # Demonstrates a loop split occuring due to a reduction. 3 | # Such splits are automatically identified by hfav. 4 | 5 | kernels: 6 | 7 | sum: 8 | declaration: sum(double x, double &xSum); 9 | inputs: | 10 | x: x[i][j] 11 | outputs: | 12 | xSum: reduction(+:xSum) 13 | 14 | normalize: 15 | declaration: normalize(double& x, double& xSum); 16 | inputs: | 17 | x: x[i?][j?] 18 | xSum: xSum 19 | outputs: | 20 | x: normalized(x[i?][j?]) 21 | 22 | globals: 23 | 24 | inputs: | 25 | double x[i?][j?] 26 | 27 | outputs: | 28 | double xSum 29 | normalized(x[i][j]) => double x[i][j] 30 | 31 | codegen options: 32 | 33 | loops: 34 | - 35 | iter_ident: i 36 | start: first_i 37 | end: last_i 38 | stride: 1 39 | - 40 | iter_ident: j 41 | start: first_j 42 | end: last_j 43 | stride: 1 44 | 45 | loop order: [i, j] 46 | 47 | language: C99 48 | prefix: __hfav_ 49 | vector loop: None 50 | -------------------------------------------------------------------------------- /examples/uninitialized.yaml: -------------------------------------------------------------------------------- 1 | # Example: uninitizalized.yaml 2 | # Demonstrates usage of unintialized variables. 3 | 4 | kernels: 5 | 6 | set_to_zero: 7 | declaration: set_to_zero(double &x); 8 | outputs: | 9 | x: zero(q?) 10 | 11 | globals: 12 | 13 | outputs: | 14 | zero(x[i]) => double x[i] 15 | 16 | codegen options: 17 | 18 | loops: 19 | - 20 | iter_ident: i 21 | start: first_cell_x 22 | end: last_cell_x 23 | stride: 1 24 | 25 | loop order: [i] 26 | 27 | language: C99 28 | prefix: __hfav_ 29 | vector loop: None 30 | -------------------------------------------------------------------------------- /examples/vectorization-inner.yaml: -------------------------------------------------------------------------------- 1 | # Example: vectorization-inner.yaml 2 | # Demonstrates usage of "vector loop" to vectorize an inner loop. 3 | 4 | kernels: 5 | 6 | flux_x: 7 | declaration: flux(cell_t lc, cell_t rc, flux_t &fx); 8 | inputs: | 9 | lc : q?[j?-1][i?] 10 | rc : q?[j?][i?] 11 | outputs: | 12 | fx : flux_x(q?[j?][i?]) 13 | 14 | integrate: 15 | declaration: integrate(flux_t lf, flux_t rf, cell_t &ic); 16 | inputs: | 17 | lf : flux_x(q?[j?][i?]) 18 | rf : flux_x(q?[j?+1][i?]) 19 | outputs: | 20 | ic : integrated(q?[j?][i?]) 21 | 22 | clamp: 23 | declaration: clamp(cell_t in, int &out); 24 | inputs: | 25 | in : q? 26 | outputs: | 27 | out : clamped(q?) 28 | 29 | globals: 30 | 31 | inputs: | 32 | double d_cell[j?][i?] => cell[j?][i?] 33 | outputs: | 34 | clamped(integrated(cell[j][i])) => int i_cell[j][i] 35 | 36 | codegen options: 37 | 38 | loops: 39 | - 40 | iter_ident: i 41 | start: first_i 42 | end: last_i 43 | stride: 1 44 | - 45 | iter_ident: j 46 | start: first_j 47 | end: last_j 48 | stride: 1 49 | 50 | loop order: [j, i] 51 | 52 | language : C99 53 | vector loop: i 54 | prefix : __hfav_ 55 | types: 56 | cell_t: float64 57 | flux_t: float 58 | clamp_t: int32 59 | -------------------------------------------------------------------------------- /examples/vectorization-outer.yaml: -------------------------------------------------------------------------------- 1 | # Example: vectorization-outer.yaml 2 | # Demonstrates usage of "vector loop" to vectorize an outer loop. 3 | 4 | kernels: 5 | 6 | flux_x: 7 | declaration: flux(cell_t lc, cell_t rc, flux_t &fx); 8 | inputs: | 9 | lc : q?[j?-1][i?] 10 | rc : q?[j?][i?] 11 | outputs: | 12 | fx : flux_x(q?[j?][i?]) 13 | 14 | integrate: 15 | declaration: integrate(flux_t lf, flux_t rf, cell_t &ic); 16 | inputs: | 17 | lf : flux_x(q?[j?][i?]) 18 | rf : flux_x(q?[j?+1][i?]) 19 | outputs: | 20 | ic : integrated(q?[j?][i?]) 21 | 22 | clamp: 23 | declaration: clamp(cell_t in, int &out); 24 | inputs: | 25 | in : q? 26 | outputs: | 27 | out : clamped(q?) 28 | 29 | globals: 30 | 31 | inputs: | 32 | double d_cell[j?][i?] => cell[j?][i?] 33 | outputs: | 34 | clamped(integrated(cell[j][i])) => int i_cell[j][i] 35 | 36 | codegen options: 37 | 38 | loops: 39 | - 40 | iter_ident: i 41 | start: first_i 42 | end: last_i 43 | stride: 1 44 | - 45 | iter_ident: j 46 | start: first_j 47 | end: last_j 48 | stride: 1 49 | 50 | loop order: [i, j] 51 | 52 | language : C99 53 | vector loop: i 54 | prefix : __hfav_ 55 | types: 56 | cell_t: float64 57 | flux_t: float 58 | clamp_t: int32 59 | -------------------------------------------------------------------------------- /hfav.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # hfav.py; top-level invocation and parsing 4 | 5 | # Copyright 2017 Intel Corporation 6 | # 7 | # GENERATED CODE EXEMPTION 8 | # 9 | # The output of this tool does not automatically import the Apache 10 | # 2.0 license, except the output will continue to be subject to the 11 | # limitation of liability clause in the Apache 2.0 license. Users may 12 | # license their output under any license they choose but the liability 13 | # of the authors of the tool for that output is governed by the 14 | # limitation of liability clause in the Apache 2.0 license. 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. 27 | 28 | import logging 29 | import sys 30 | import re 31 | import os 32 | import argparse 33 | import yaml 34 | from hfav.infer import dag_chain, rule_arg, rule, axiom, goal, rule_group, ivar_axiom, codeblock, reduction_op, reduction_initializer, reduction_finalizer 35 | from hfav.ispace import iteration_space 36 | from hfav.term import symbolic_constant 37 | from hfav.analyze import simple_generator, rolling_generator, rap_dual 38 | from hfav.c99 import codegen, c99_generator 39 | from hfav.cpp import cpp_generator 40 | from hfav.inest import inest_dag 41 | from hfav import parse 42 | 43 | def parse_declaration(declaration): 44 | m = re.match(r"(?:(\w+)[\s]+)?(\w+)\(([\w\d\s,&]*)\);", declaration) 45 | if m is None: 46 | raise SyntaxError("Malformed kernel declaration: %s" % (declaration)) # TODO: More information... 47 | else: 48 | rtype = m.group(1) 49 | kname = m.group(2) 50 | types = {} 51 | positions = {} 52 | try: 53 | varlist = m.group(3).replace("&", "").replace("*", "") 54 | pos = 0 55 | for v in re.split(r",\s*", varlist): 56 | vsplit = re.split(r"\s*", v) 57 | if len(vsplit) != 2: 58 | raise SyntaxError("Can't tell if %s is a type or a variable name." % (v)) 59 | vtype = vsplit[0] 60 | vname = vsplit[1] 61 | types[vname] = vtype 62 | positions[vname] = pos 63 | pos = pos + 1 64 | except IndexError: 65 | pass 66 | if rtype is not None and rtype != 'void': 67 | types[''] = rtype 68 | positions[''] = -1 69 | return (kname, types, positions) 70 | 71 | # top level list 72 | # [ iter level 73 | # itspace 74 | # ] 75 | 76 | # loop [prologue, steady, epilogue] ident 77 | 78 | # ident parent, children (loops, raps) 79 | 80 | # loop 81 | # 82 | 83 | def hfav_run_yaml(): 84 | logging.basicConfig(level=logging.INFO) 85 | 86 | parser = argparse.ArgumentParser(description="YAML front-end for High-performance Inference Fusion Into Vectorization (hfav)") 87 | parser.add_argument('-d', '--debug', dest='debug_output', action='store_true', default=False, help='enable debug output') 88 | parser.add_argument('-o', '--output', dest='output_location', action='store', default=False, help='override output location "-" gives stdout') 89 | parser.add_argument('-s', '--storage', dest='storage', action='store', default='stack', help='where to place temporary arrays (default: stack)') 90 | parser.add_argument('-v', '--verbosity', dest='verbosity', choices=['0', '1', '2'], action='store', default=0, help='verbosity level') 91 | parser.add_argument('FILE', help='Input YAML file') 92 | args = parser.parse_args() 93 | debug_output = args.debug_output 94 | extra_output = args.verbosity 95 | filename = args.FILE 96 | storage = args.storage 97 | 98 | logging.info("Loading input file %s", filename) 99 | config = yaml.load(file(filename, 'r')) 100 | kernels = [] 101 | axioms = [] 102 | goals = [] 103 | 104 | if os.environ.get('HFAVROOT') is None: 105 | logging.warning("Please set HFAVROOT environment variable to your hfav directory...\n") 106 | hfavroot = "hfav" 107 | else: 108 | hfavroot = os.environ.get('HFAVROOT') 109 | 110 | # Read kernels 111 | for kname, kparams in config["kernels"].items(): 112 | 113 | name, vtype, vpos = parse_declaration(kparams["declaration"]) 114 | vrule = {} 115 | 116 | iargs = [] 117 | if "inputs" in kparams.keys(): 118 | for line in kparams["inputs"].splitlines(): 119 | input_li = line.partition(":") 120 | vname = input_li[0].strip() 121 | if vname == '': 122 | raise SyntaxError(" cannot be used as an input! (%s)" % (name)) 123 | vrule[vname] = input_li[2].strip() 124 | iargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "input")) 125 | 126 | oargs = [] 127 | got_return = False 128 | if "outputs" in kparams.keys(): 129 | for line in kparams["outputs"].splitlines(): 130 | output = line.partition(":") 131 | vname = output[0].strip() 132 | if vname == '': 133 | if got_return: 134 | raise SyntaxError("Got multiple s! (%s, %s)" % (vrule[vname], output[2].strip())) 135 | got_return = True 136 | if vname in vrule.keys(): 137 | logging.warning("Parameter \"%s\" specified as an input and output to kernel \"%s\" -- here be dragons...", vname, kname) 138 | 139 | m = re.match(r"reduction\((.+):(.+)\)", output[2].strip()) 140 | if m is not None: 141 | opkey = m.group(1) 142 | if opkey not in reduction_op.supported().keys(): 143 | logging.error("%s is not a recognized reduction, must be one of: %s", opkey, map(str, reduction_op.supported().keys())) 144 | red_op = reduction_op.supported()[opkey] 145 | vrule[vname] = "_reduction(%s)" % m.group(2) 146 | iargs.append(rule_arg(vpos[vname], vtype[vname], "_init(%s)" % m.group(2), "input")) 147 | kernels.append(reduction_initializer(parse.parser(m.group(2)).expr(), red_op, vtype[vname])) 148 | kernels.append(reduction_finalizer(parse.parser(m.group(2)).expr(), red_op, vtype[vname])) 149 | else: 150 | vrule[vname] = output[2].strip() 151 | 152 | oargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "output")) 153 | 154 | for vname in vtype.keys(): 155 | if vname not in vrule: 156 | logging.warning("No replacement rule for parameter \"%s\" passed to kernel \"%s\" was specified -- assuming a global input of the same name exists.", vname, kname) 157 | vrule[vname] = "%s" % (vname) 158 | iargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "input")) 159 | axioms.append(axiom.read(vrule[vname], vrule[vname], vtype[vname])) 160 | 161 | kernel = rule.read(name, iargs, oargs) 162 | kernels.append(kernel) 163 | 164 | # Read code blocks 165 | code_blocks = [] 166 | try: 167 | cbs = config["code blocks"].items() 168 | for name, cb in cbs: 169 | block = codeblock.read(name, cb) 170 | code_blocks.append(block) 171 | except KeyError: 172 | logging.warning("No code blocks specified; assuming no boundary conditions -- \"to infinity and beyond!\"") 173 | 174 | # Read inputs 175 | if "inputs" in config["globals"]: 176 | for line in config["globals"]["inputs"].splitlines(): 177 | input_li = line.partition("=>") 178 | if input_li[1] == "=>": 179 | decl = re.split(r"\s*", input_li[0].strip(), 1) 180 | axioms.append(axiom.read(decl[1], input_li[2].strip(), decl[0])) 181 | else: 182 | decl = re.split(r"\s*", input_li[0].strip(), 1) 183 | # this case makes sense, where the input is implicitly the same as the output 184 | axioms.append(axiom.read(decl[1], decl[1], decl[0])) 185 | else: 186 | logging.warning("No global inputs specified -- things are unlikely to work except in pathological cases.") 187 | 188 | # Read outputs 189 | if "outputs" in config["globals"]: 190 | for line in config["globals"]["outputs"].splitlines(): 191 | output = line.partition("=>") 192 | if output[1] == "=>": 193 | decl = re.split(r"\s*", output[2].strip(), 1) 194 | goals.append(goal.read(output[0].strip(), decl[1], decl[0])) 195 | else: 196 | decl = re.split(r"\s*", output[0].strip(), 1) 197 | # this case makes sense, where the output is explicitly different to all inputs 198 | goals.append(goal.read(decl[1], decl[1], decl[0])) 199 | else: 200 | logging.error("No global outputs specified -- nothing to generate.") 201 | 202 | pg = rule_group() 203 | pg.rules += kernels 204 | pg.rules += code_blocks 205 | 206 | try: 207 | prefix = config["codegen options"]["prefix"] 208 | except KeyError: 209 | prefix = "__" 210 | 211 | language = config["codegen options"]["language"] 212 | vector_var = None 213 | try: 214 | vector_var = config["codegen options"]["vector loop"] 215 | if vector_var == "None": 216 | vector_var = None 217 | else: 218 | vector_var = symbolic_constant(vector_var) 219 | except KeyError: 220 | pass 221 | 222 | if language == "C" or language == "C99": 223 | generator = c99_generator 224 | elif language == "C++": 225 | generator = cpp_generator 226 | else: 227 | logging.error("Unrecognized language: %s -- select one of C, C99 or C++") 228 | 229 | if debug_output: 230 | cgen = generator(hfavroot, storage, None) 231 | cgen.debug_vector_var = vector_var 232 | else: 233 | cgen = generator(hfavroot, storage, vector_var) 234 | 235 | default_typedict = cgen.typedict.copy() 236 | try: 237 | for k, v in config["codegen options"]["types"].items(): 238 | if k in cgen.typedict: 239 | logging.warning("%s already exists in dictionary -- overriding with %s.", k, v) 240 | m = re.match(r"([a-zA-Z]+)(\d+)?", v) 241 | if m is None: 242 | raise SyntaxError("Malformed type: %s -- expected " % v) 243 | elif m.group(2) == None: 244 | if m.group(1) in default_typedict.keys(): 245 | cgen.typedict[k] = list(default_typedict[m.group(1)]) 246 | else: 247 | raise SyntaxError("Malformed type: %s -- width must be specified for all types not in %s" % (v, default_typedict.keys())) 248 | else: 249 | if m.group(1) not in ["int", "float"]: 250 | raise SyntaxError("Malformed type: %s -- base type must be 'int' or 'float'" % m.group(1)) 251 | cgen.typedict[k] = [m.group(1), int(m.group(2))] 252 | except KeyError: 253 | pass 254 | logging.debug("Using type dictionary: %s", cgen.typedict) 255 | 256 | loops = iteration_space.from_yaml(config) 257 | 258 | if args.output_location: 259 | if args.output_location == '-': 260 | of = sys.stdout 261 | logging.info("Generating code to stdout (overriden)") 262 | else: 263 | of = open(args.output_location, "w") 264 | output = args.output_location 265 | logging.info("Generating code to %s (overriden)", output) 266 | else: 267 | try: 268 | output = config["codegen options"]["output file"] 269 | of = open(output, "w") 270 | logging.info("Generating code into %s", output) 271 | except KeyError: 272 | of = sys.stdout 273 | logging.info("Generating code to stdout") 274 | 275 | try: 276 | header = config["codegen options"]["header"] 277 | except KeyError: 278 | header = None 279 | 280 | try: 281 | footer = config["codegen options"]["footer"] 282 | except KeyError: 283 | footer = None 284 | 285 | logging.info("Loaded input file") 286 | 287 | for iv in loops.loop_order: 288 | axioms.append(ivar_axiom(iv)) 289 | 290 | logging.info("Chaining...") 291 | gr = dag_chain(pg, cgen.typedict, axioms).resolve(goals) 292 | logging.info("Chaining finished.") 293 | logging.info("IDAG has %s", gr.stats()) 294 | logging.info(" Iteration space is over %s", [str(x) for x in gr.ivars()]) 295 | 296 | rd = rap_dual.from_idag(gr) 297 | logging.info("Rap DUAL! %s ", rd.stats()) 298 | order = rd.level_sort() 299 | for i, o in enumerate(order): 300 | logging.info("RD %d %s ", i, o.name()) 301 | levels = rd.level_sort_levels() 302 | for i, l in enumerate(levels): 303 | logging.info("RD level %d %s ", i, [o.name() for o in l]) 304 | rd.check_reductions() 305 | 306 | rap_loops = rd.topo_sort(lambda x: (len(x.rap_ivars()), x.rap_ivars())) 307 | for i, r in enumerate(rap_loops): 308 | logging.debug("%d %s %s", i, str(r), r.rap_ivars()) 309 | 310 | if extra_output > 0: 311 | (root, ext) = os.path.splitext(os.path.basename(filename)) 312 | dagfile = root + "rapdual.dot" 313 | logging.info("Writing out rapdual dag to %s", dagfile,) 314 | with file(dagfile, "w") as fi: 315 | print >> fi, rd.dot(v_fmt=lambda x: "%s-%s" % (x.name(), [str(i) for i in x.rap_ivars()]), e_fmt=lambda x: "") 316 | logging.info("Done writing out rapdual dag.") 317 | else: 318 | logging.info("Skipping writing rapdual dag.") 319 | 320 | if extra_output > 0: 321 | (root, ext) = os.path.splitext(os.path.basename(filename)) 322 | dagfile = root + ".dot" 323 | logging.info("Writing out inference dag to %s", dagfile) 324 | with file(dagfile, "w") as fi: 325 | print >> fi, gr.dot() 326 | logging.info("Done writing out inference dag.") 327 | else: 328 | logging.info("Skipping writing inference dag.") 329 | 330 | fusion = not debug_output 331 | logging.info("Rap dual super node fusion.") 332 | indag = inest_dag(rd, loops) 333 | if extra_output > 0: 334 | (root, ext) = os.path.splitext(os.path.basename(filename)) 335 | dagfile = root + "_inest.dot" 336 | logging.info("Writing out inest dag to %s", dagfile) 337 | with file(dagfile, "w") as fi: 338 | print >> fi, indag.dot(lambda v: str(v.inest), lambda v: "") 339 | logging.info("Done writing out inest dag.") 340 | else: 341 | logging.info("Skipping writing inest dag.") 342 | 343 | if not debug_output: 344 | logging.info("Fusing inest_dag") 345 | indag.topo_fuse() 346 | if extra_output > 0: 347 | (root, ext) = os.path.splitext(os.path.basename(filename)) 348 | dagfile = root + "_inest_fused.dot" 349 | logging.info("Writing out fused dag to %s", dagfile) 350 | with file(dagfile, "w") as fi: 351 | print >> fi, indag.dot(lambda v: str(v.inest), lambda v: "") 352 | logging.info("Done writing out fused dag.") 353 | else: 354 | logging.info("Skipping writing fused dag.") 355 | else: 356 | logging.info("Not fusing inest_dag") 357 | 358 | rolling = True 359 | if rolling and fusion: 360 | logging.info("Preparing rolling generator") 361 | generator = rolling_generator 362 | else: 363 | logging.info("Preparing simple generator") 364 | generator = simple_generator 365 | 366 | ig = generator(indag, loops, cgen, prefix) 367 | logging.info("Generator initialized") 368 | lst = codegen.listing() 369 | cgen.header(lst, header) 370 | logging.info("Generating") 371 | ig.generate(lst) 372 | logging.info("Done generating") 373 | cgen.footer(lst, footer) 374 | if of != sys.stdout: 375 | logging.info("Writing code to %s", os.path.abspath(output)) 376 | else: 377 | logging.info("Writing code to stdout") 378 | 379 | print >> of, lst.emit() 380 | 381 | if of != sys.stdout: 382 | of.close() 383 | 384 | logging.info("Finished generating code.") 385 | logging.info("Done; exiting.") 386 | 387 | sys.exit(0) 388 | 389 | if __name__ == '__main__': 390 | hfav_run_yaml() 391 | -------------------------------------------------------------------------------- /hfav/__init__.py: -------------------------------------------------------------------------------- 1 | # hfav/__init__.py; module header file 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | global extra_output 27 | extra_output = 0 28 | -------------------------------------------------------------------------------- /hfav/c99.py: -------------------------------------------------------------------------------- 1 | # hfav/c99.py; code generation for c99 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import codegen 27 | import os 28 | 29 | 30 | class c99_generator(codegen.codegen): 31 | 32 | def __init__(self, root, storage, vector_var): 33 | super(c99_generator, self).__init__(root, storage) 34 | self.vector_var = vector_var 35 | self.remainder = False 36 | pass 37 | 38 | def begin_vector_loop(self, lst): 39 | lst.append("#pragma simd assert\n") 40 | lst.append("for (int __hfav_vlane = 0; __hfav_vlane < VLEN; ++__hfav_vlane)\n") 41 | self.begin_scope(lst) 42 | 43 | def end_vector_loop(self, lst): 44 | self.end_scope(lst) 45 | 46 | def begin_loop(self, lst, itervar, interval, phase): 47 | if not (itervar == self.vector_var and 1 in phase): 48 | super(c99_generator, self).begin_loop(lst, itervar, interval, phase) 49 | else: 50 | stride = str(interval.stride) 51 | vstride = stride + "*VLEN" 52 | start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride) 53 | end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride) 54 | vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start) 55 | lst.append("const int %s_vbound = %s;\n" % (itervar, vbound)) 56 | lst.append("for (%s = %s; %s < %s_vbound; %s += %s)\n" % (itervar, start, itervar, itervar, itervar, vstride)) 57 | self.vectorize = True 58 | self.begin_scope(lst) 59 | 60 | def end_loop(self, lst, itervar, interval, phase): 61 | if (itervar == self.vector_var and 1 in phase): 62 | self.vectorize = False 63 | super(c99_generator, self).end_loop(lst, itervar, interval, phase) 64 | 65 | def begin_remainder_loop(self, lst, itervar, interval, phase): 66 | stride = interval.stride 67 | start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride) 68 | end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride) 69 | vbound = vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start) 70 | lst.append("for (%s = %s_vbound; %s < %s; %s += %s)\n" % (itervar, itervar, itervar, end, itervar, interval.stride)) 71 | self.begin_scope(lst) 72 | self.remainder = True 73 | 74 | def end_remainder_loop(self, lst, itervar, interval, phase): 75 | self.end_scope(lst) 76 | self.remainder = False 77 | 78 | def rotate(self, type, ident, start, end, roll_var): 79 | if roll_var == self.vector_var: 80 | if self.vectorize: 81 | return self.invoke("rotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "VLEN"]) 82 | else: 83 | return super(c99_generator, self).rotate(type, ident, start, end, roll_var) 84 | elif self.vector_var is not None: 85 | return self.invoke("vrotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "1"]) 86 | else: 87 | return super(c99_generator, self).rotate(type, ident, start, end, roll_var) 88 | 89 | def rotate_ptr(self, type, ident, len, roll_var): 90 | if roll_var == self.vector_var: 91 | if self.vectorize: 92 | raise NotImplementedError("rotate_ptr is not implemented for vector types") 93 | else: 94 | return super(c99_generator, self).rotate_ptr(type, ident, str(len), roll_var) 95 | else: 96 | return super(c99_generator, self).rotate_ptr(type, ident, str(len), roll_var) 97 | -------------------------------------------------------------------------------- /hfav/codegen.py: -------------------------------------------------------------------------------- 1 | # hfav/codegen.py; code generation base class 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from operator import attrgetter 27 | import os 28 | import logging 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | class codegen(object): 33 | 34 | def __init__(self, root, storage): 35 | self.root = root 36 | self.typedict = {'char': ['int', 8], 'byte': ['int', 8], 'int': ['int', 32], 'long': ['int', 64], 'float': ['float', 32], 'double': ['float', 64]} 37 | self.vector_var = None 38 | self.vectorize = False 39 | self.hindent = 0 40 | self.debug_vector_var = None # TODO: Sorry Jason 41 | self.storage = storage 42 | pass 43 | 44 | def byref(self, ident): 45 | return "&" + ident 46 | 47 | def header(self, lst, h): 48 | self.rotate_function(lst) 49 | if h is not None: 50 | lst.append(h) 51 | opened = h.count('{') 52 | closed = h.count('}') 53 | self.hindent = opened - closed 54 | for i in range(0, opened - closed): 55 | lst.indent() 56 | 57 | def footer(self, lst, f): 58 | if f is not None: 59 | if self.hindent > 0: 60 | lst.deindent() 61 | lst.append(f) 62 | 63 | def ident_offset(self, ident, offset): 64 | if offset > 0: 65 | return ident + "+" + str(offset) 66 | elif offset == 0: 67 | return ident 68 | else: 69 | return ident + "-" + str(abs(offset)) 70 | 71 | def prologue_gen(self): 72 | return self 73 | 74 | def epilogue_gen(self): 75 | return None 76 | 77 | def read_aref(self, ident, offset): 78 | return "%s%s" % (ident, "".join([("[%s]" % o) for o in offset])) 79 | 80 | def write_aref(self, ident, offset): 81 | return "%s%s" % (ident, "".join([("[%s]" % o) for o in offset])) 82 | 83 | def read_ref(self, ident): 84 | return "%s" % (ident,) 85 | 86 | def write_ref(self, ident): 87 | return "%s" % (ident,) 88 | 89 | def assign(self, dst, src): 90 | return "%s = %s" % (dst, src) 91 | 92 | def invoke(self, ident, args): 93 | return "%s(%s)" % (ident, ", ".join(args)) 94 | 95 | def array_declaration(self, type, ident, size): 96 | if self.storage == "stack": 97 | return "%s %s%s" % (type, ident, "".join([("[%s]" % s) for s in size])) 98 | else: 99 | 100 | if size == []: 101 | return "%s %s" % (type, ident) 102 | 103 | if len(size) > 1: 104 | unroll_str = "".join([("[%s]" % s) for s in size[1:]]) 105 | else: 106 | unroll_str = "" 107 | decl = "%s (*%s)%s" % (type, ident, unroll_str) 108 | cast = "%s(*)%s" % (type, unroll_str) 109 | flatsize = "*".join([("(%s)" % s) for s in size]) 110 | return "%s = (%s) _mm_malloc((%s)*sizeof(%s), 64)" % (decl, cast, flatsize, type) 111 | 112 | def array_free(self, type, ident, size): 113 | if self.storage == "stack": 114 | return None 115 | else: 116 | if size == []: 117 | return None 118 | else: 119 | return "_mm_free(%s)" % (ident) 120 | 121 | def array_ptr_declaration(self, type, ptr_ident, src_ident, size): 122 | roll_str = "[%s]" % str(size[0]) 123 | if len(size) > 2: 124 | unroll_str = "".join([("[%s]" % s) for s in size[2:]]) 125 | else: 126 | unroll_str = "" 127 | dst = "%s (*%s%s)%s " % (type, ptr_ident, roll_str, unroll_str) 128 | srcs = [] 129 | for r in range(size[0]): 130 | srcs.append(src_ident + ("[%s]" % str(r))) 131 | return self.assign(dst, "{" + ", ".join(srcs) + "}") 132 | 133 | def statement(self, lst, state): 134 | if state is not None: 135 | lst.append(state + ";\n") 136 | 137 | def init_iters(self, lst, loops): 138 | if len(loops.loop_dict.keys()) > 0: 139 | lst.append("int %s;\n" % (", ".join(map(attrgetter("ident"), loops.loop_dict.keys())))) 140 | 141 | def begin_scope(self, lst): 142 | lst.append("{\n") 143 | lst.indent() 144 | 145 | def end_scope(self, lst): 146 | lst.deindent() 147 | lst.append("}\n") 148 | 149 | def begin_loop(self, lst, itervar, interval, phase): 150 | stride = interval.stride 151 | if phase == [0]: 152 | lst.append("%s = %s;\n" % (itervar, interval.start)) 153 | lst.append("if (%s < %s)\n" % (itervar, interval.end)) 154 | elif 1 in phase: 155 | if self.debug_vector_var is not None and self.debug_vector_var == itervar: 156 | lst.append("#pragma simd assert\n") 157 | start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride) 158 | end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride) 159 | lst.append("for (%s = %s; %s < %s; %s += %s)\n" % (itervar, start, itervar, end, itervar, stride)) 160 | elif phase == [2]: 161 | lst.append("%s = %s-1;\n" % (itervar, interval.end)) 162 | lst.append("if (%s > %s)\n" % (itervar, interval.start)) 163 | self.begin_scope(lst) 164 | 165 | def end_loop(self, lst, itervar, interval, phase): 166 | self.end_scope(lst) 167 | 168 | def rotate_header(self): 169 | return "\n" # TODO: Decide if we should remove this completely. 170 | # return "#include \"hfav/c99-rotate.h\"\n" 171 | 172 | def rotate_function(self, lst): 173 | lst.append(self.rotate_header()) 174 | 175 | def rotate(self, type, ident, start, end, roll_var): 176 | return self.invoke("rotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "1"]) 177 | 178 | def rotate_ptr(self, type, ident, len, roll_var): 179 | return self.invoke("rotate_%s%s_ptr" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(len)]) 180 | 181 | def comment(self, lst, lines): 182 | if len(lines) > 2: 183 | lst.append("/* " + lines[0] + "\n") 184 | for li in [" " + z + "\n" for z in lines[1:-1]]: 185 | lst.append(li) 186 | lst.append(" " + lines[-1] + "*/\n") 187 | else: 188 | for li in ["// " + z + "\n" for z in lines]: 189 | lst.append(li) 190 | 191 | 192 | class listing(object): 193 | 194 | def __init__(self): 195 | self.indent_level = 0 196 | self.lines = [] 197 | self.indent_width = 4 198 | 199 | def indent(self): 200 | self.indent_level += 1 201 | 202 | def deindent(self): 203 | self.indent_level -= 1 204 | 205 | def append(self, string): 206 | assert string[-1] == '\n' 207 | self.lines.append(self.indent_level * self.indent_width * " " + string) 208 | 209 | def emit(self): 210 | return "".join(self.lines) 211 | -------------------------------------------------------------------------------- /hfav/cpp.py: -------------------------------------------------------------------------------- 1 | # hfav/cpp.py; C++ code generation 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import codegen 27 | import os 28 | 29 | 30 | class cpp_generator(codegen.codegen): 31 | 32 | def __init__(self, root, storage, vector_var): 33 | super(cpp_generator, self).__init__(root, storage) 34 | self.vector_var = vector_var 35 | self.remainder = False 36 | pass 37 | 38 | def byref(self, ident): 39 | return ident 40 | 41 | def begin_vector_loop(self, lst): 42 | lst.append("#pragma simd assert\n") 43 | lst.append("for (int __hfav_vlane = 0; __hfav_vlane < VLEN; ++__hfav_vlane)\n") 44 | self.begin_scope(lst) 45 | 46 | def end_vector_loop(self, lst): 47 | self.end_scope(lst) 48 | 49 | def begin_loop(self, lst, itervar, interval, phase): 50 | if not (itervar == self.vector_var and 1 in phase): 51 | super(cpp_generator, self).begin_loop(lst, itervar, interval, phase) 52 | else: 53 | stride = str(interval.stride) 54 | vstride = stride + "*VLEN" 55 | start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride) 56 | end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride) 57 | vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start) 58 | lst.append("const int %s_vbound = %s;\n" % (itervar, vbound)) 59 | lst.append("for (%s = %s; %s < %s_vbound; %s += %s)\n" % (itervar, start, itervar, itervar, itervar, vstride)) 60 | self.vectorize = True 61 | self.begin_scope(lst) 62 | 63 | def end_loop(self, lst, itervar, interval, phase): 64 | if itervar == self.vector_var and 1 in phase: 65 | self.vectorize = False 66 | super(cpp_generator, self).end_loop(lst, itervar, interval, phase) 67 | 68 | def begin_remainder_loop(self, lst, itervar, interval, phase): 69 | stride = interval.stride 70 | start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride) 71 | end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride) 72 | vbound = vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start) 73 | lst.append("for (%s = %s_vbound; %s < %s; %s += %s)\n" % (itervar, itervar, itervar, end, itervar, interval.stride)) 74 | self.begin_scope(lst) 75 | self.remainder = True 76 | 77 | def end_remainder_loop(self, lst, itervar, interval, phase): 78 | self.end_scope(lst) 79 | self.remainder = False 80 | 81 | def rotate(self, type, ident, start, end, roll_var): 82 | if roll_var == self.vector_var: 83 | if self.vectorize: 84 | return self.invoke("hfav::rotate", [ident, str(start), str(end), "VLEN"]) 85 | else: 86 | return self.invoke("hfav::rotate", [ident, str(start), str(end), "1"]) 87 | elif self.vector_var is not None: 88 | return self.invoke("hfav::vrotate", [ident, str(start), str(end), "1"]) 89 | else: 90 | return self.invoke("hfav::rotate", [ident, str(start), str(end), "1"]) 91 | 92 | def rotate_ptr(self, type, ident, len, roll_var): 93 | if roll_var == self.vector_var: 94 | if self.vectorize: 95 | raise NotImplementedError("hfav::rotate_ptr is not implemented for vector types") 96 | else: 97 | return self.invoke("hfav::rotate_ptr", [ident, str(len)]) 98 | else: 99 | return self.invoke("hfav::rotate_ptr", [ident, str(len)]) 100 | -------------------------------------------------------------------------------- /hfav/dot.py: -------------------------------------------------------------------------------- 1 | # hfav/dot.py; graphviz 'dot' output 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import codegen 27 | 28 | 29 | class dot_generator(codegen.codegen): 30 | 31 | def __init__(self): 32 | self.lines = [] 33 | self.indent = 0 34 | 35 | def header(self, lst): 36 | lst.append("digraph\n") 37 | lst.append("{\n") 38 | lst.indent() 39 | lst.append("size=\"20,20\";\n") 40 | lst.append("ratio=fill;\n") 41 | lst.append("node [shape=box];\n") 42 | 43 | def footer(self, lst): 44 | lst.append("}\n") 45 | lst.deindent() 46 | 47 | def offset_string(self, offset): 48 | offstring = str(offset) 49 | offstring = offstring.replace("+", "p") 50 | return offstring.replace("-", "m") 51 | 52 | def read_aref(self, ident, offset): 53 | return "%s_%s" % (ident, self.offset_string(offset)) 54 | 55 | def write_aref(self, ident, offset): 56 | return "%s_%s" % (ident, self.offset_string(offset)) 57 | 58 | def assign(self, dst, src): 59 | return "%s -> %s" % (src, dst) 60 | 61 | def invoke(self, ident, outputs, inputs): 62 | assignments = [] 63 | first = 1 64 | for i in inputs: 65 | for o in outputs: 66 | if first == 1: 67 | assignments.append(self.assign(o, i)) 68 | first = 0 69 | else: 70 | assignments.append(self.assign(o, i)) 71 | return "\n".join(assignments)[:-1] # hackily remove trailing newline 72 | 73 | def array_declaration(self, ident, size): 74 | declarations = [] 75 | for offset in range(0, size): 76 | declarations.append(self.indent * " " + "%s_%s [label=\"%s[%s]\"];" % (ident, self.offset_string(offset), ident, offset)) 77 | return "\n".join(declarations)[:-1] 78 | 79 | def begin_loop(self, lst, loopi): 80 | pass 81 | 82 | def end_loop(self, lst): 83 | pass 84 | -------------------------------------------------------------------------------- /hfav/include/cpp-rotate.hpp: -------------------------------------------------------------------------------- 1 | namespace hfav 2 | { 3 | template 4 | static inline void rotate(T v[], int len) 5 | { 6 | for(int i = 0; i < len-1; ++i) 7 | v[i] = v[i+1]; 8 | } 9 | 10 | template 11 | static inline void rotate_ptr(T v[], int len) 12 | { 13 | const T temp = v[0]; 14 | for(int i = 0; i < len-1; ++i) 15 | v[i] = v[i+1]; 16 | v[len-1] = temp; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /hfav/include/hfav/c99-rotate.h: -------------------------------------------------------------------------------- 1 | #ifndef _HFAV_C99_ROTATE_H_ 2 | #define _HFAV_C99_ROTATE_H_ 3 | 4 | #include 5 | 6 | #define __hfav_max(x, y) (((x) > (y)) ? (x) : (y)) 7 | #define __hfav_min(x, y) (((x) < (y)) ? (x) : (y)) 8 | 9 | typedef int int32; 10 | typedef long int64; 11 | typedef float float32; 12 | typedef double float64; 13 | 14 | #define ROTATE(T) \ 15 | static inline void rotate_##T(T v[], int start, int end, int s) \ 16 | { \ 17 | for (int i = start; i < end; ++i) \ 18 | { \ 19 | v[i] = v[i+s]; \ 20 | } \ 21 | } 22 | 23 | #define VROTATE(T) \ 24 | static inline void vrotate_##T(T v[][VLEN], int start, int end, int s) \ 25 | { \ 26 | for (int i = start; i < end; ++i) \ 27 | { \ 28 | _Pragma("simd assert") \ 29 | for (int j = 0; j < VLEN; ++j) \ 30 | { \ 31 | v[i][j] = v[i+s][j]; \ 32 | } \ 33 | } \ 34 | } 35 | 36 | #define ROTATE_PTR(T) \ 37 | static inline void rotate_##T##_ptr(T* v[], int len) \ 38 | { \ 39 | T* temp = v[0]; \ 40 | for (int i = 0; i < len-1; ++i) \ 41 | { \ 42 | v[i] = v[i+1]; \ 43 | } \ 44 | v[len-1] = temp; \ 45 | } 46 | 47 | ROTATE(int32) 48 | ROTATE(int64) 49 | ROTATE(float32) 50 | ROTATE(float64) 51 | 52 | VROTATE(int32) 53 | VROTATE(int64) 54 | VROTATE(float32) 55 | VROTATE(float64) 56 | 57 | ROTATE_PTR(int32) 58 | ROTATE_PTR(int64) 59 | ROTATE_PTR(float32) 60 | ROTATE_PTR(float64) 61 | 62 | #endif /* _HFAV_C99_ROTATE_H_ */ 63 | -------------------------------------------------------------------------------- /hfav/include/hfav/cpp-rotate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _HFAV_CPP_ROTATE_H_ 2 | #define _HFAV_CPP_ROTATE_H_ 3 | 4 | namespace hfav 5 | { 6 | 7 | template 8 | static inline void rotate(T v[], int start, int end, int s) 9 | { 10 | for (int i = start; i < end; ++i) 11 | { 12 | v[i] = v[i+s]; 13 | } 14 | } 15 | 16 | template 17 | static inline void vrotate(T v[][VLEN], int start, int end, int s) 18 | { 19 | for (int i = start; i < end; ++i) 20 | { 21 | #pragma simd assert 22 | for (int j = 0; j < VLEN; ++j) 23 | { 24 | v[i][j] = v[i+s][j]; 25 | } 26 | } 27 | } 28 | 29 | template 30 | static inline void rotate_ptr(T v[], int len) 31 | { 32 | const T temp = v[0]; 33 | for(int i = 0; i < len-1; ++i) 34 | { 35 | v[i] = v[i+1]; 36 | } 37 | v[len-1] = temp; 38 | } 39 | 40 | } 41 | 42 | #endif /* _HFAV_CPP_ROTATE_H_ */ 43 | -------------------------------------------------------------------------------- /hfav/ispace.py: -------------------------------------------------------------------------------- 1 | # hfav/ispace.py; iteration space manipulation tools 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | import logging 27 | from . import term 28 | 29 | 30 | class strided_interval(object): 31 | 32 | def __init__(self, start, end, stride=1): 33 | self.start = str(start) 34 | self.end = str(end) 35 | self.stride = stride 36 | 37 | def __str__(self): 38 | return "[%s:%s:%s]" % (self.start, self.end, self.stride) 39 | 40 | @classmethod 41 | def num(cls, n): 42 | return cls.__init__(n, n + 1) 43 | 44 | def sweep(self, n): 45 | return strided_interval(self.start + n, self.end + n, self.stride) 46 | 47 | def explicit(self): 48 | return range(self.start, self.end, self.stride) 49 | 50 | 51 | class iteration_space(object): 52 | 53 | def __init__(self, loop_dict, loop_order): 54 | self.loop_dict = loop_dict 55 | self.loop_order = loop_order 56 | 57 | def dim(self): 58 | return len(self.loop_order) 59 | 60 | def strides(self): 61 | return [(k, self.loop_dict[k].stride) for k in self.loop_order] 62 | 63 | def copy(self): 64 | return iteration_space(self.loop_dict.copy(), list(self.loop_order)) 65 | 66 | def map_offset(self, iter_var, offs, roll_var=None): 67 | if roll_var is not None and iter_var == roll_var: 68 | return offs 69 | else: 70 | loop = self.loop_dict[iter_var] 71 | if loop.stride != 1: 72 | res_str = "(%s-%s)/%s" % (iter_var, loop.start, loop.stride) 73 | else: 74 | res_str = "%s-%s" % (iter_var, loop.start) 75 | if offs != 0: 76 | return res_str + "+%d" % offs 77 | else: 78 | return res_str 79 | 80 | def interval(self, ivar): 81 | return self.loop_dict[ivar] 82 | 83 | def subspace(self, ivars): 84 | loop_dict = {} 85 | for iv in ivars: 86 | loop_dict[iv] = self.loop_dict[iv] 87 | loop_order = [] 88 | for iv in self.loop_order: 89 | if iv in ivars: 90 | loop_order.append(iv) 91 | return iteration_space(loop_dict, loop_order) 92 | 93 | def is_iter(self, var): 94 | return term.symbolic_constant(var) in self.loop_dict 95 | 96 | @classmethod 97 | def from_yaml(cls, config): 98 | loop_stuff = config["codegen options"]["loops"] 99 | if not isinstance(loop_stuff, list): 100 | loop_stuff = [loop_stuff] 101 | 102 | loops = dict((term.symbolic_constant(x["iter_ident"]), strided_interval(x["start"], x["end"], x["stride"])) for x in loop_stuff) 103 | 104 | try: 105 | loop_order = list(reversed([term.symbolic_constant(x) for x in config["codegen options"]["loop order"]])) 106 | except KeyError: 107 | loop_order = list(reversed([x for x in loops])) 108 | 109 | logging.debug("loop_order: %s" % map(str, loop_order),) 110 | logging.debug("loops: %s" % (str(loops),)) 111 | return cls(loops, loop_order) 112 | -------------------------------------------------------------------------------- /hfav/iter_plot.py: -------------------------------------------------------------------------------- 1 | # hfav/iter_plot.py; iteration space plotting tools 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | import matplotlib 27 | matplotlib.use('agg') 28 | import pylab 29 | import math 30 | import itertools as it 31 | import logging 32 | 33 | import matplotlib.patches as mpatches 34 | 35 | radius = 0.1 36 | hl = 0.1 37 | 38 | 39 | def arrow(s, e, estyle): 40 | delta = tuple((ev - sv for sv, ev in zip(s, e))) 41 | l = math.sqrt(delta[0] * delta[0] + delta[1] * delta[1]) 42 | unit = (delta[0] / l, delta[1] / l) 43 | start = (s[0] + unit[0] * radius, s[1] + unit[1] * radius) 44 | edel = (delta[0] - unit[0] * (hl + 2 * radius), delta[1] - unit[1] * (hl + 2 * radius)) 45 | return mpatches.FancyArrow(start[0], start[1], edel[0], edel[1], head_length=hl, head_width=0.05, **estyle) 46 | 47 | 48 | def iter_plot_start(): 49 | fig = pylab.figure(figsize=(8, 8)) 50 | ax = fig.add_subplot(111) 51 | return ax 52 | 53 | 54 | def iter_plot(ax, dag, vstyle={}, estyle={}): 55 | seen = set() 56 | 57 | patches = [] 58 | for s, e in dag.edges.keys(): 59 | ps = s.plotting_point() 60 | pe = e.plotting_point() 61 | seen.add(ps) 62 | seen.add(pe) 63 | ar = arrow(ps, pe, estyle) 64 | ax.add_patch(ar) 65 | 66 | xmin = None 67 | xmax = None 68 | ymin = None 69 | ymax = None 70 | for p in (x.plotting_point() for x in dag.vertices.keys()): 71 | if xmin is None or p[0] < xmin: 72 | xmin = p[0] 73 | if xmax is None or p[0] > xmax: 74 | xmax = p[0] 75 | 76 | if ymin is None or p[1] < ymin: 77 | ymin = p[1] 78 | if ymax is None or p[1] > ymax: 79 | ymax = p[1] 80 | point = mpatches.Circle(p, 0.1, **vstyle) 81 | ax.add_patch(point) 82 | 83 | xticks = range(xmin - 1, xmax + 2) 84 | yticks = range(ymin - 1, ymax + 2) 85 | 86 | pylab.xticks(xticks, xticks) 87 | pylab.yticks(yticks, yticks) 88 | 89 | ax.xaxis.grid(True) 90 | ax.yaxis.grid(True) 91 | 92 | ax.set_aspect('equal') 93 | 94 | 95 | def iter_plot_finish(fp): 96 | pylab.savefig(fp) 97 | -------------------------------------------------------------------------------- /hfav/old_cpp.py: -------------------------------------------------------------------------------- 1 | # hfav/old_cpp.py; Unmaintained C++ code generation 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import c99 27 | import os 28 | 29 | 30 | class cpp_generator(c99.c99_generator): 31 | 32 | def __init__(self, root): 33 | super(cpp_generator, self).__init__(root) 34 | pass 35 | 36 | def byref(self, ident): 37 | return ident 38 | 39 | def rotate_header(self): 40 | return "\n" 41 | #return "#include \"hfav/cpp-rotate.hpp\"\n" 42 | 43 | def rotate(self, type, ident, start, end, roll_var): 44 | return self.invoke("rotate", [ident, str(start), str(end), "1"]) 45 | 46 | def rotate_ptr(self, type, ident, len, roll_var): 47 | return self.invoke("rotate_ptr", [ident, str(len)]) 48 | 49 | 50 | class cpp_autovec_generator(cpp_generator): 51 | 52 | def __init__(self, root, vector_var): 53 | super(cpp_autovec_generator, self).__init__(root, vector_var) 54 | pass 55 | 56 | def rotate(self, type, ident, start, end, roll_var): 57 | if roll_var == self.vector_var: 58 | if self.vectorize: 59 | return self.invoke("rotate", [ident, str(start), str(end), "VLEN"]) 60 | else: 61 | return super(cpp_autovec_generator, self).rotate(type, ident, start, end, roll_var) 62 | elif self.vector_var is not None: 63 | return self.invoke("vrotate", [ident, str(start), str(end), "1"]) 64 | else: 65 | return super(cpp_autovec_generator, self).rotate(type, ident, start, end, roll_var) 66 | 67 | def rotate_ptr(self, type, ident, len, roll_var): 68 | if roll_var == self.vector_var: 69 | if self.vectorize: 70 | raise NotImplementedError("rotate_ptr is not implemented for vector types") 71 | else: 72 | return super(cpp_autovec_generator, self).rotate_ptr(type, ident, len, roll_var) 73 | else: 74 | return super(cpp_autovec_generator, self).rotate_ptr(type, ident, len, roll_var) 75 | -------------------------------------------------------------------------------- /hfav/parse.py: -------------------------------------------------------------------------------- 1 | # hfav/parse.py; Parse iteration/variable descriptions 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | from . import term 27 | 28 | 29 | class parse_error(ValueError): 30 | pass 31 | 32 | class fatal_parse_error(ValueError): 33 | pass 34 | 35 | """grammar: 36 | := + 37 | := [|]*['!'] 38 | := [|] 39 | := [|]*'?' 40 | := [|]*' 41 | := '('')' 42 | := [||] 43 | := ['-'|'+'] 44 | := ['-'|'+'] 45 | := '['']' 46 | (had to modify to remove a left-recursive term, see below) 47 | := [|||] 48 | := [?|','] 49 | 50 | this 51 | := [|||] 52 | 53 | is left-recusive, so we modify to have: 54 | := [|] 55 | := [<>||] 56 | """ 57 | 58 | 59 | class parser(object): 60 | 61 | """Each parser either consumes some of string (advances pos) and returns something or resets pos and throws a parser_error""" 62 | 63 | def __init__(self, string): 64 | self.string = string 65 | self.pos = 0 66 | 67 | def digit(self): 68 | c = self.string[self.pos:self.pos + 1] 69 | if c.isdigit(): 70 | self.pos += 1 71 | return c 72 | raise parse_error("Not a digit") 73 | 74 | def alpha(self): 75 | c = self.string[self.pos:self.pos + 1] 76 | if c.isalpha() or c == '_': 77 | self.pos += 1 78 | return c 79 | raise parse_error("Not an alpha character") 80 | 81 | def alpha_digit(self): 82 | c = self.string[self.pos:self.pos + 1] 83 | if c.isalpha() or c == '_' or c.isdigit(): 84 | self.pos += 1 85 | return c 86 | raise parse_error("Not an alpha or digit character") 87 | 88 | def match(self, char): 89 | c = self.string[self.pos:self.pos + 1] 90 | if c == char: 91 | self.pos += 1 92 | return c 93 | raise parse_error("Not a %s" % (char,)) 94 | 95 | def whitespace(self): 96 | while self.pos < len(self.string) and self.string[self.pos].isspace(): 97 | self.pos += 1 98 | 99 | def numeric_literal(self): 100 | """ := +""" 101 | pos = self.pos 102 | res = [] 103 | try: 104 | while True: 105 | res.append(self.digit()) 106 | except parse_error: 107 | if len(res) < 1: 108 | self.pos = pos 109 | raise parse_error("Not a number") 110 | return term.numeric_constant(int(str(''.join(res)))).canonize() 111 | 112 | def symbolic_const(self): 113 | """ := [|]*['!']""" 114 | pos = self.pos 115 | res = [] 116 | res.append(self.alpha()) 117 | try: 118 | while True: 119 | res.append(self.alpha_digit()) 120 | except parse_error: 121 | try: 122 | res.append(self.match('!')) 123 | except parse_error: 124 | pass 125 | return term.symbolic_constant(''.join(res)).canonize() 126 | 127 | def const(self): 128 | """ := [|]""" 129 | pos = self.pos 130 | try: 131 | try: 132 | return self.symbolic_const() 133 | except parse_error: 134 | self.pos = pos 135 | return self.numeric_literal() 136 | except parse_error: 137 | self.pos = pos 138 | raise parse_error("Not a const") 139 | 140 | def variable(self): 141 | """ := [|]*'?'""" 142 | pos = self.pos 143 | try: 144 | res = [] 145 | res.append(self.alpha()) 146 | try: 147 | while True: 148 | res.append(self.alpha_digit()) 149 | except parse_error: 150 | res.append(self.match('?')) 151 | return term.variable(''.join(res)).canonize() 152 | except parse_error: 153 | self.pos = pos 154 | raise parse_error("Not a variable") 155 | 156 | def function_identifier(self): 157 | """ := [|]*""" 158 | pos = self.pos 159 | try: 160 | res = [] 161 | res.append(self.alpha()) 162 | try: 163 | while True: 164 | res.append(self.alpha_digit()) 165 | except parse_error: 166 | return ''.join(res) 167 | except parse_error: 168 | self.pos = pos 169 | raise parse_error("Not a function identifier") 170 | 171 | def function(self): 172 | """ := '('')'""" 173 | pos = self.pos 174 | try: 175 | fi = self.function_identifier() 176 | self.whitespace() 177 | self.match('(') 178 | try: 179 | li = self.expr_list() 180 | self.whitespace() 181 | self.match(')') 182 | except parse_error: 183 | raise fatal_parse_error("Not a valid expression list.") 184 | return term.function(fi, li).canonize() 185 | except parse_error: 186 | self.pos = pos 187 | raise parse_error("Not a function") 188 | 189 | def expr_list(self): 190 | """ := [?|',']""" 191 | pos = self.pos 192 | res = [] 193 | try: 194 | res.append(self.expr()) 195 | while True: 196 | self.whitespace() 197 | self.match(',') 198 | res.append(self.expr()) 199 | except parse_error: 200 | return res 201 | 202 | def term(self): 203 | """ := [||]""" 204 | pos = self.pos 205 | self.whitespace() 206 | try: 207 | return self.function() 208 | except parse_error: 209 | pass 210 | try: 211 | return self.variable() 212 | except parse_error: 213 | pass 214 | try: 215 | return self.const() 216 | except parse_error: 217 | pass 218 | self.whitespace() 219 | 220 | self.pos = pos 221 | raise parse_error("Not a term") 222 | 223 | def prefix_op(self): 224 | """ := ['-'|'+']""" 225 | try: 226 | self.match('-') 227 | return term.neg 228 | except parse_error: 229 | try: 230 | self.match('+') 231 | return term.add 232 | except parse_error: 233 | raise parse_error("not a prefix op") 234 | 235 | def infix_op(self): 236 | """ := ['-'|'+']""" 237 | try: 238 | self.match('+') 239 | return term.add 240 | except parse_error: 241 | pass 242 | try: 243 | self.match('-') 244 | return term.neg 245 | except parse_error: 246 | raise parse_error("not an infix op") 247 | 248 | def suffix_op(self, left): 249 | """ := '['']'""" 250 | pos = self.pos 251 | try: 252 | self.whitespace() 253 | self.match('[') 254 | expr1 = self.expr() 255 | self.match(']') 256 | return term.at(left, expr1).canonize() 257 | except parse_error: 258 | self.pos = pos 259 | raise parse_error("Not a suffix op") 260 | 261 | def tail_expr(self, left): 262 | """ := [||]""" 263 | pos = self.pos 264 | try: 265 | self.whitespace() 266 | op = self.infix_op() 267 | expr0 = self.expr() 268 | newleft = op([left, expr0]) 269 | return self.tail_expr(newleft) 270 | except parse_error: 271 | pass 272 | try: 273 | self.whitespace() 274 | op = self.suffix_op(left) 275 | return self.tail_expr(op) 276 | except parse_error: 277 | self.pos = pos 278 | return left 279 | 280 | def expr(self): 281 | """ := |""" 282 | pos = self.pos 283 | try: 284 | self.whitespace() 285 | a = self.term() 286 | self.whitespace() 287 | return self.tail_expr(a) 288 | except parse_error: 289 | pos = self.pos 290 | try: 291 | self.whitespace() 292 | a = self.prefix_op() 293 | b = self.expr() 294 | return a([self.tail_expr(b)]).canonize() 295 | except parse_error: 296 | pos = self.pos 297 | raise parse_error("no expression found") 298 | 299 | if __name__ == '__main__': 300 | print(parser(" a (x? , a(q![i!-1] ) [ 1 + 1 ], -1 )").expr()) 301 | -------------------------------------------------------------------------------- /regress-results/test-examples.txt: -------------------------------------------------------------------------------- 1 | examples/boundary-conditions.yaml 2 | examples/broadcast.yaml 3 | examples/constants.yaml 4 | examples/hydro2d/hydro2d-x.yaml 5 | examples/hydro2d/hydro2d-y.yaml 6 | examples/laplace5/laplace5.yaml 7 | examples/literals.yaml 8 | examples/reduction.yaml 9 | examples/split-loops.yaml 10 | examples/uninitialized.yaml 11 | examples/vectorization-inner.yaml 12 | examples/vectorization-outer.yaml 13 | -------------------------------------------------------------------------------- /regress.sh: -------------------------------------------------------------------------------- 1 | # regress.sh; run regression test on each example 2 | 3 | # Copyright 2017 Intel Corporation 4 | # 5 | # GENERATED CODE EXEMPTION 6 | # 7 | # The output of this tool does not automatically import the Apache 8 | # 2.0 license, except the output will continue to be subject to the 9 | # limitation of liability clause in the Apache 2.0 license. Users may 10 | # license their output under any license they choose but the liability 11 | # of the authors of the tool for that output is governed by the 12 | # limitation of liability clause in the Apache 2.0 license. 13 | # 14 | # Licensed under the Apache License, Version 2.0 (the "License"); 15 | # you may not use this file except in compliance with the License. 16 | # You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | 26 | jobfile=regress-results/jobs.txt 27 | parallel -a regress-results/test-examples.txt --joblog $jobfile --results regress-results ./hfav.py > /dev/null 2> /dev/null 28 | cat $jobfile | head -1; cat $jobfile | tail -n +2 | sort -k7 -n 29 | --------------------------------------------------------------------------------