├── .gitignore
├── LICENSE
├── README.md
├── examples
    ├── boundary-conditions.yaml
    ├── broadcast.yaml
    ├── constants.yaml
    ├── hydro2d
    │   ├── .gitignore
    │   ├── BUILD-HOST-GEN
    │   ├── GIT-VERSION-GEN
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── arch.hpp
    │   ├── array-macros.hpp
    │   ├── compare.cpp
    │   ├── config.c
    │   ├── hydro2d-x.yaml
    │   ├── hydro2d-y.yaml
    │   ├── pcl-hydro-core.cpp
    │   ├── pcl-hydro-params.cpp
    │   ├── pcl-hydro-util.cpp
    │   ├── pcl-hydro-vcore.cpp
    │   ├── pcl-hydro.hpp
    │   ├── run-tile.cpp
    │   ├── test.nml
    │   ├── timeseries.cpp
    │   ├── timeseries.hpp
    │   └── vtkfile.cpp
    ├── laplace5
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── laplace5-test.cpp
    │   └── laplace5.yaml
    ├── literals.yaml
    ├── reduction.yaml
    ├── split-loops.yaml
    ├── uninitialized.yaml
    ├── vectorization-inner.yaml
    └── vectorization-outer.yaml
├── hfav.py
├── hfav
    ├── __init__.py
    ├── analyze.py
    ├── c99.py
    ├── codegen.py
    ├── cpp.py
    ├── dag.py
    ├── dot.py
    ├── include
    │   ├── cpp-rotate.hpp
    │   └── hfav
    │   │   ├── c99-rotate.h
    │   │   └── cpp-rotate.hpp
    ├── inest.py
    ├── infer.py
    ├── ispace.py
    ├── iter_plot.py
    ├── old_cpp.py
    ├── parse.py
    └── term.py
├── regress-results
    └── test-examples.txt
└── regress.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.dot
3 | *.png
4 | *.asm
5 | *.optrpt
6 | *.o
7 | *.mod
8 | *.out
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | GENERATED CODE EXEMPTION
  6 | 
  7 |    The output of this tool does not automatically import the Apache
  8 |    2.0 license, except the output will continue to be subject to the
  9 |    limitation of liability clause in the Apache 2.0 license. Users may
 10 |    license their output under any license they choose but the liability
 11 |    of the authors of the tool for that output is governed by the
 12 |    limitation of liability clause in the Apache 2.0 license.
 13 | 
 14 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 15 | 
 16 | 1. Definitions.
 17 | 
 18 |    "License" shall mean the terms and conditions for use, reproduction,
 19 |    and distribution as defined by Sections 1 through 9 of this document.
 20 | 
 21 |    "Licensor" shall mean the copyright owner or entity authorized by
 22 |    the copyright owner that is granting the License.
 23 | 
 24 |    "Legal Entity" shall mean the union of the acting entity and all
 25 |    other entities that control, are controlled by, or are under common
 26 |    control with that entity. For the purposes of this definition,
 27 |    "control" means (i) the power, direct or indirect, to cause the
 28 |    direction or management of such entity, whether by contract or
 29 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 30 |    outstanding shares, or (iii) beneficial ownership of such entity.
 31 | 
 32 |    "You" (or "Your") shall mean an individual or Legal Entity
 33 |    exercising permissions granted by this License.
 34 | 
 35 |    "Source" form shall mean the preferred form for making modifications,
 36 |    including but not limited to software source code, documentation
 37 |    source, and configuration files.
 38 | 
 39 |    "Object" form shall mean any form resulting from mechanical
 40 |    transformation or translation of a Source form, including but
 41 |    not limited to compiled object code, generated documentation,
 42 |    and conversions to other media types.
 43 | 
 44 |    "Work" shall mean the work of authorship, whether in Source or
 45 |    Object form, made available under the License, as indicated by a
 46 |    copyright notice that is included in or attached to the work
 47 |    (an example is provided in the Appendix below).
 48 | 
 49 |    "Derivative Works" shall mean any work, whether in Source or Object
 50 |    form, that is based on (or derived from) the Work and for which the
 51 |    editorial revisions, annotations, elaborations, or other modifications
 52 |    represent, as a whole, an original work of authorship. For the purposes
 53 |    of this License, Derivative Works shall not include works that remain
 54 |    separable from, or merely link (or bind by name) to the interfaces of,
 55 |    the Work and Derivative Works thereof.
 56 | 
 57 |    "Contribution" shall mean any work of authorship, including
 58 |    the original version of the Work and any modifications or additions
 59 |    to that Work or Derivative Works thereof, that is intentionally
 60 |    submitted to Licensor for inclusion in the Work by the copyright owner
 61 |    or by an individual or Legal Entity authorized to submit on behalf of
 62 |    the copyright owner. For the purposes of this definition, "submitted"
 63 |    means any form of electronic, verbal, or written communication sent
 64 |    to the Licensor or its representatives, including but not limited to
 65 |    communication on electronic mailing lists, source code control systems,
 66 |    and issue tracking systems that are managed by, or on behalf of, the
 67 |    Licensor for the purpose of discussing and improving the Work, but
 68 |    excluding communication that is conspicuously marked or otherwise
 69 |    designated in writing by the copyright owner as "Not a Contribution."
 70 | 
 71 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 72 |    on behalf of whom a Contribution has been received by Licensor and
 73 |    subsequently incorporated within the Work.
 74 | 
 75 | 2. Grant of Copyright License. Subject to the terms and conditions of
 76 |    this License, each Contributor hereby grants to You a perpetual,
 77 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |    copyright license to reproduce, prepare Derivative Works of,
 79 |    publicly display, publicly perform, sublicense, and distribute the
 80 |    Work and such Derivative Works in Source or Object form.
 81 | 
 82 | 3. Grant of Patent License. Subject to the terms and conditions of
 83 |    this License, each Contributor hereby grants to You a perpetual,
 84 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 85 |    (except as stated in this section) patent license to make, have made,
 86 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 87 |    where such license applies only to those patent claims licensable
 88 |    by such Contributor that are necessarily infringed by their
 89 |    Contribution(s) alone or by combination of their Contribution(s)
 90 |    with the Work to which such Contribution(s) was submitted. If You
 91 |    institute patent litigation against any entity (including a
 92 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 93 |    or a Contribution incorporated within the Work constitutes direct
 94 |    or contributory patent infringement, then any patent licenses
 95 |    granted to You under this License for that Work shall terminate
 96 |    as of the date such litigation is filed.
 97 | 
 98 | 4. Redistribution. You may reproduce and distribute copies of the
 99 |    Work or Derivative Works thereof in any medium, with or without
100 |    modifications, and in Source or Object form, provided that You
101 |    meet the following conditions:
102 | 
103 |    (a) You must give any other recipients of the Work or
104 |        Derivative Works a copy of this License; and
105 | 
106 |    (b) You must cause any modified files to carry prominent notices
107 |        stating that You changed the files; and
108 | 
109 |    (c) You must retain, in the Source form of any Derivative Works
110 |        that You distribute, all copyright, patent, trademark, and
111 |        attribution notices from the Source form of the Work,
112 |        excluding those notices that do not pertain to any part of
113 |        the Derivative Works; and
114 | 
115 |    (d) If the Work includes a "NOTICE" text file as part of its
116 |        distribution, then any Derivative Works that You distribute must
117 |        include a readable copy of the attribution notices contained
118 |        within such NOTICE file, excluding those notices that do not
119 |        pertain to any part of the Derivative Works, in at least one
120 |        of the following places: within a NOTICE text file distributed
121 |        as part of the Derivative Works; within the Source form or
122 |        documentation, if provided along with the Derivative Works; or,
123 |        within a display generated by the Derivative Works, if and
124 |        wherever such third-party notices normally appear. The contents
125 |        of the NOTICE file are for informational purposes only and
126 |        do not modify the License. You may add Your own attribution
127 |        notices within Derivative Works that You distribute, alongside
128 |        or as an addendum to the NOTICE text from the Work, provided
129 |        that such additional attribution notices cannot be construed
130 |        as modifying the License.
131 | 
132 |    You may add Your own copyright statement to Your modifications and
133 |    may provide additional or different license terms and conditions
134 |    for use, reproduction, or distribution of Your modifications, or
135 |    for any such Derivative Works as a whole, provided Your use,
136 |    reproduction, and distribution of the Work otherwise complies with
137 |    the conditions stated in this License.
138 | 
139 | 5. Submission of Contributions. Unless You explicitly state otherwise,
140 |    any Contribution intentionally submitted for inclusion in the Work
141 |    by You to the Licensor shall be under the terms and conditions of
142 |    this License, without any additional terms or conditions.
143 |    Notwithstanding the above, nothing herein shall supersede or modify
144 |    the terms of any separate license agreement you may have executed
145 |    with Licensor regarding such Contributions.
146 | 
147 | 6. Trademarks. This License does not grant permission to use the trade
148 |    names, trademarks, service marks, or product names of the Licensor,
149 |    except as required for reasonable and customary use in describing the
150 |    origin of the Work and reproducing the content of the NOTICE file.
151 | 
152 | 7. Disclaimer of Warranty. Unless required by applicable law or
153 |    agreed to in writing, Licensor provides the Work (and each
154 |    Contributor provides its Contributions) on an "AS IS" BASIS,
155 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
156 |    implied, including, without limitation, any warranties or conditions
157 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
158 |    PARTICULAR PURPOSE. You are solely responsible for determining the
159 |    appropriateness of using or redistributing the Work and assume any
160 |    risks associated with Your exercise of permissions under this License.
161 | 
162 | 8. Limitation of Liability. In no event and under no legal theory,
163 |    whether in tort (including negligence), contract, or otherwise,
164 |    unless required by applicable law (such as deliberate and grossly
165 |    negligent acts) or agreed to in writing, shall any Contributor be
166 |    liable to You for damages, including any direct, indirect, special,
167 |    incidental, or consequential damages of any character arising as a
168 |    result of this License or out of the use or inability to use the
169 |    Work (including but not limited to damages for loss of goodwill,
170 |    work stoppage, computer failure or malfunction, or any and all
171 |    other commercial damages or losses), even if such Contributor
172 |    has been advised of the possibility of such damages.
173 | 
174 | 9. Accepting Warranty or Additional Liability. While redistributing
175 |    the Work or Derivative Works thereof, You may choose to offer,
176 |    and charge a fee for, acceptance of support, warranty, indemnity,
177 |    or other liability obligations and/or rights consistent with this
178 |    License. However, in accepting such obligations, You may act only
179 |    on Your own behalf and on Your sole responsibility, not on behalf
180 |    of any other Contributor, and only if You agree to indemnify,
181 |    defend, and hold each Contributor harmless for any liability
182 |    incurred by, or claims asserted against, such Contributor by reason
183 |    of your accepting any such warranty or additional liability.
184 | 
185 | END OF TERMS AND CONDITIONS
186 | 
187 | APPENDIX: How to apply the Apache License to your work.
188 | 
189 |    To apply the Apache License to your work, attach the following
190 |    boilerplate notice, with the fields enclosed by brackets "{}"
191 |    replaced with your own identifying information. (Don't include
192 |    the brackets!)  The text should be enclosed in the appropriate
193 |    comment syntax for the file format. We also recommend that a
194 |    file or class name and description of purpose be included on the
195 |    same "printed page" as the copyright notice for easier
196 |    identification within third-party archives.
197 | 
198 | Copyright {yyyy} {name of copyright owner}
199 | 
200 | Licensed under the Apache License, Version 2.0 (the "License");
201 | you may not use this file except in compliance with the License.
202 | You may obtain a copy of the License at
203 | 
204 |     http://www.apache.org/licenses/LICENSE-2.0
205 | 
206 | Unless required by applicable law or agreed to in writing, software
207 | distributed under the License is distributed on an "AS IS" BASIS,
208 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
209 | See the License for the specific language governing permissions and
210 | limitations under the License.
211 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | DISCONTINUATION OF PROJECT.
 2 | 
 3 | This project will no longer be maintained by Intel.
 4 | 
 5 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project. 
 6 | 
 7 | Intel no longer accepts patches to this project.
 8 | 
 9 | If you have an ongoing need to use this project, are interested in independently developing it, or would like to maintain patches for the open source software community, please create your own fork of this project. 
10 | HFAV
11 | ====
12 | 
13 | High-performance Fusion And Vectorization (formerly "Rolling Thunder")
14 | 
15 | Overview
16 | --------
17 | 
18 | This is a prototype that demonstrates how certain code transformation techniques may be automatically applied to a suitable input; in particular, it aims to automatically fuse and vectorize kernels while minimizing intermediate storage. For computations where *pure* kernels are applied to regular grids, particularly where kernels pass information to on another, hfav may provide speedup.
19 | 
20 | hfav accepts a declarative input file that specifies the function prototype for each kernel along with information about each parameter and the iteration space that the kernel should be applied to. Terminal conditions (*axioms* and *goals*) are supplied, along with options about code generation and output. The resulting output is indended to be linked into the original code, perferably in a fashion that enables inlining (which is necessary for auto-vectorization).
21 | 
22 | License
23 | -------
24 | 
25 | This software and all but one example is distributed with a modified Apache License 2.0. See LICENSE for details; the modification is an exception that code generated with this software is only subject to the limited liability clauses of the Apache 2.0 license (in particular, we don't retain copyright on generated code).
26 | 
27 | The hydro2d example is subject to the CeCILL license; see examples/hydro2d/LICENSE for details.
28 | 
29 | Usage
30 | -----
31 | 
32 | hfav.py is the top-level interface to hfav. It is invoked as:
33 | 
34 |     hfav.py [-h] [-d] [-o OUTPUT_LOCATION] [-s STORAGE] [-v {0,1,2}] FILE
35 | 
36 | ### Options
37 | 
38 | - `FILE`: input YAML file (*mandatory*)
39 | - `-h, --help`: show help message and exit
40 | - `-d, --debug`: enable debug output
41 | - `-o OUTPUT_LOCATION, --output OUTPUT_LOCATION`: override output location; "-" gives stdout
42 | - `-s STORAGE, --storage STORAGE`: where to place temporary arrays (default: stack)
43 | - `-v {0,1,2}, --verbosity {0,1,2}` level of verbosity while processing
44 | 
45 | It can be useful to export the environment variable `HFAVROOT` to the `hfav/` directory contained in this source distribution.
46 | 
47 | Examples
48 | --------
49 | 
50 | The YAML format accepted by hfav is best understood by looking at examples. See the `examples/` directory for more detail.
51 | 
52 | The `hydro2d/` directory contains a more comprehensive example complete with Makefile integration.
53 | 
54 | More information
55 | ----------------
56 | 
57 | A paper on the ideas behind HFAV will be presented at the Seventh Internation Workshop on Domain-Specific Languages and High-Level Framworks for High Performance Computing (WOLFHPC) at ACM/IEEE Supercomputing in Denver in November 2017.
58 | 
59 |     Jason D. Sewall and Simon J. Pennycook. 2017. High-Performance Code Generation though Fusion and Vectorization. To be presented at WOLFHPC 2017, Denver. Nobember 2017.
60 | 
61 | A preprint is available at arXiv: [https://arxiv.org/abs/1710.08774](https://arxiv.org/abs/1710.08774).
62 | 
63 | Contributors
64 | ------------
65 | 
66 | - John Pennycook (john.pennycook@intel.com)
67 | - Jason Sewall (jason.sewall@intel.com)
68 | 


--------------------------------------------------------------------------------
/examples/boundary-conditions.yaml:
--------------------------------------------------------------------------------
 1 | # Example: boundary-conditions.yaml
 2 | # Demonstrates usage of "code blocks" to implement boundary conditions.
 3 | # The "code blocks" functionality is very brittle, and may break unexpectedly.
 4 | 
 5 | kernels:
 6 | 
 7 |     flux_x:
 8 |         declaration: flux(cell_t lc, cell_t rc, flux_t &fx);
 9 |         inputs: |
10 |             lc : cell[j?-1][i?]
11 |             rc : cell[j?][i?]
12 |         outputs: |
13 |             fx : flux_x[j?][i?]
14 | 
15 |     integrate:
16 |         declaration: integrate(flux_t lf, flux_t rf, cell_t &ic);
17 |         inputs: |
18 |             lf : boundary(flux_x[j?][i?])
19 |             rf : boundary(flux_x[j?+1][i?])
20 |         outputs: |
21 |             ic : integrated(cell[j?][i?])
22 | 
23 | # Code blocks contain arbitrary user code (e.g. MPI)
24 | # Inputs and outputs can contain wildcards and ranges, and can optionally redirect to a global variable.
25 | # The user is responsible for ensuring that:
26 | #   1) References to hfav temporaries use appropriately mangled names; and/or
27 | #   2) Appropriate rules are in place to move data between hfav temporaries and global variables.
28 | code blocks:
29 |     exchange_fluxes:
30 |         code: |
31 |             exchange_fluxes(__hfav_flux_x, __hfav_boundary_flux_x);
32 |         inputs: |
33 |             flux_t flux_x[j*][i*]
34 |         outputs: |
35 |             flux_t boundary(flux_x[j?-1:+1][i?])
36 | 
37 | globals:
38 | 
39 |     inputs: |
40 |         cell_t d_cell[j?][i?] => cell[j?][i?]
41 | 
42 |     outputs: |
43 |         integrated(cell[j][i]) => cell_t d_cell[j][i]
44 | 
45 | codegen options:
46 | 
47 |     loops:
48 |     -
49 |       iter_ident: i
50 |       start: first_i
51 |       end: last_i
52 |       stride: 1
53 |     -
54 |       iter_ident: j
55 |       start: first_j
56 |       end: last_j
57 |       stride: 1
58 | 
59 |     loop order: [j, i]
60 | 
61 |     language   : C99
62 |     vector loop: None
63 |     prefix     : __hfav_
64 |     types:
65 |         cell_t: float64
66 |         flux_t: float
67 |         clamp_t: int32
68 | 


--------------------------------------------------------------------------------
/examples/broadcast.yaml:
--------------------------------------------------------------------------------
 1 | # Example: broadcast.yaml
 2 | # Demonstrates re-use of lower-dimensionality quantities across dimensions.
 3 | 
 4 | kernels:
 5 | 
 6 |     compute_slice:
 7 |         declaration: compute_slice(double in, double& out);
 8 |         inputs: |
 9 |             in: input[i?][j?]
10 |         outputs: |
11 |             out: slice[i?][j?]
12 | 
13 |     broadcast:
14 |         declaration: broadcast(double slice, double& cube);
15 |         inputs: |
16 |             slice: slice[i?][j?]
17 |         outputs: |
18 |             cube: cube[i?][j?][k?]
19 | 
20 | globals:
21 |     inputs: |
22 |         double input[i?][j?]
23 |     outputs: |
24 |         cube[i][j][k] => double output[i][j][k]
25 | 
26 | codegen options:
27 | 
28 |     loops:
29 |     -
30 |         iter_ident: i
31 |         start: first_i
32 |         end: last_i
33 |         stride: 1
34 |     -
35 |         iter_ident: j
36 |         start: first_j
37 |         end: last_j
38 |         stride: 1
39 |     -
40 |         iter_ident: k
41 |         start: first_k
42 |         end: last_k
43 |         stride: 1
44 | 
45 |     loop order: [i, j, k]
46 | 
47 |     language: C99
48 |     prefix: __hfav_
49 |     vector loop: None
50 | 


--------------------------------------------------------------------------------
/examples/constants.yaml:
--------------------------------------------------------------------------------
 1 | # Example: constants.yaml
 2 | # Demonstrates usage of scalar constants (dt).
 3 | 
 4 | kernels:
 5 | 
 6 |     update:
 7 |         declaration: update(double dt, double om1, double old, double op1, double &new);
 8 |         inputs: |
 9 |             dt: dt
10 |             om1: old[j?][i?-1]
11 |             old: old[j?][i?]
12 |             op1: old[j?][i?+1]
13 |         outputs: |
14 |             new: new[j?][i?]
15 | 
16 | globals:
17 | 
18 |     inputs: |
19 |         double dt
20 |         double cell[j?][i?] => old[j?][i?]
21 |     outputs: |
22 |         new[j][i] => double cell[j][i]
23 | 
24 | codegen options:
25 | 
26 |     loops:
27 |     -
28 |         iter_ident: i
29 |         start: first_i
30 |         end: last_i
31 |         stride: 1
32 |     -
33 |         iter_ident: j
34 |         start: first_j
35 |         end: last_j
36 |         stride: 1
37 | 
38 |     loop order: [j, i]
39 | 
40 |     language: C99
41 |     prefix: __hfav_
42 |     vector loop: None
43 | 


--------------------------------------------------------------------------------
/examples/hydro2d/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | run-tile
 3 | run-gen
 4 | compare
 5 | version
 6 | Dep/
 7 | Hydro.pvd
 8 | *.idx
 9 | *.pak
10 | hydro2d-x-gen.hpp
11 | hydro2d-y-gen.hpp
12 | 


--------------------------------------------------------------------------------
/examples/hydro2d/BUILD-HOST-GEN:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | BUILD_NAME=$(uname -n)
4 | BUILD_MACHINE=$(uname -m)
5 | BUILD_SYSTEM=$(uname -s)
6 | BUILD_RELEASE=$(uname -r)
7 | 
8 | echo "$BUILD_NAME-$BUILD_MACHINE-$BUILD_SYSTEM-$BUILD_RELEASE"
9 | 


--------------------------------------------------------------------------------
/examples/hydro2d/GIT-VERSION-GEN:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LF='
 4 | '
 5 | 
 6 | GITDIR="$PWD"
 7 | OUTDIR="$PWD"
 8 | if  test x"$2" != x""
 9 | then
10 |     OUTDIR="$2"
11 | fi
12 | if test x"$1" != x""
13 | then
14 |     GITDIR="$1"
15 | fi
16 | 
17 | # First see if there is a version file (included in release tarballs),
18 | # then try git-describe, then default.
19 | if test x"$(git -C $GITDIR rev-parse --git-dir 2>/dev/null)" != x""
20 | 	then
21 | 	VN=$(git -C $GITDIR describe --abbrev=4 HEAD 2>/dev/null)
22 | 	if test x"$VN" == x"" -o x"$VN" == x"$LF"
23 | 	then
24 | 	VN=$(git -C $GITDIR log --pretty=format:"%h" -n 1)
25 | 	fi
26 | 	git update-index -q --refresh >/dev/null 2>/dev/null
27 | 	test -z "$(git -C $GITDIR diff-index --name-only HEAD --)" ||
28 | 	VN="$VN.dirty"
29 | 	echo "$VN"> $OUTDIR/version.cand
30 | 	if test ! -f $OUTDIR/version -o -n "$(diff -q $OUTDIR/version.cand $OUTDIR/version 2>/dev/null)"
31 | 	then
32 | 	    echo "$VN"> $OUTDIR/version
33 | 	fi
34 | 	rm -rf $OUTDIR/version.cand
35 | else
36 |     if test -f $OUTDIR/version
37 |     then
38 | 	VN=$(cat $OUTDIR/version)
39 |     else
40 | 	VN="unknown"
41 |     fi
42 | fi
43 | 
44 | echo $(expr "$VN" : v*'\(.*\)')
45 | 


--------------------------------------------------------------------------------
/examples/hydro2d/Makefile:
--------------------------------------------------------------------------------
 1 | CXX=icpc
 2 | 
 3 | GIT_VERSION:=$(shell sh -c './GIT-VERSION-GEN')
 4 | COMPILER_VERSION:="$(CXX)-$(shell $(CXX) --version | head -n1 | cut -d' ' -f4)"
 5 | BUILD_HOST=$(shell sh -c './BUILD-HOST-GEN')
 6 | 
 7 | HFAV_DIR=../../
 8 | HFAVROOT?=$(HFAV_DIR)/hfav
 9 | 
10 | HFAV=$(HFAV_DIR)/hfav.py
11 | 
12 | SSE_CXXFLAGS=-DSSE -xSSE4.2
13 | KNC_CXXFLAGS=-DKNC -mmic
14 | AVX_CXXFLAGS=-DAVX -xAVX
15 | AVX2_CXXFLAGS=-DAVX -xCORE-AVX2
16 | KNL_CXXFLAGS=-DAVX3 -xMIC-AVX512
17 | SKL_CXXFLAGS=-DAVX3 -xCORE-AVX512
18 | 
19 | COMPILER_INC?=/opt/intel/compilers_and_libraries/linux/include/mic
20 | ARCH_CXXFLAGS=
21 | 
22 | ifeq ($(ARCH),SSE)
23 | 	ARCH_CXXFLAGS=$(SSE_CXXFLAGS)
24 | endif
25 | 
26 | ifeq ($(ARCH),KNC)
27 | 	ARCH_CXXFLAGS=$(KNC_CXXFLAGS)
28 | endif
29 | 
30 | ifeq ($(ARCH),AVX)
31 | 	ARCH_CXXFLAGS=$(AVX_CXXFLAGS)
32 | endif
33 | 
34 | ifeq ($(ARCH),AVX2)
35 | 	ARCH_CXXFLAGS=$(AVX2_CXXFLAGS)
36 | endif
37 | 
38 | ifeq ($(ARCH),KNL)
39 | 	ARCH_CXXFLAGS=$(KNL_CXXFLAGS) -I$(COMPILER_INC)
40 | endif
41 | 
42 | ifeq ($(ARCH),SKL)
43 | 	ARCH_CXXFLAGS=$(KNL_CXXFLAGS)
44 | endif
45 | 
46 | 
47 | BASE_CXXFLAGS=-DGIT_VERSION=\"$(GIT_VERSION)\" -DCOMPILER_VERSION=\"$(COMPILER_VERSION)\" \
48 | 	      -DBUILD_HOST=\"$(BUILD_HOST)\" \
49 | 	      -DDOUBLE \
50 | 	      -Wall -wd167 -ggdb \
51 | 	      -qopenmp \
52 | 	      -lnuma \
53 |           -O3 -ipo -restrict -qopt-report=5 -inline-forceinline \
54 |           -no-prec-div -no-prec-sqrt -fimf-precision=low -fimf-domain-exclusion=15 \
55 | 	      -I$(HFAVROOT)/include
56 | 
57 | ull: run-tile run-gen compare
58 | 
59 | run-tile: run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp pcl-hydro.hpp arch.hpp config.c Makefile
60 | 	$(CXX) -o $@ run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp config.c $(BASE_CXXFLAGS) $(ARCH_CXXFLAGS)
61 | 
62 | hydro2d-x-gen.hpp: hydro2d-x.yaml $(HFAV) $(HFAV_DIR)/hfav/infer.py $(HFAV_DIR)/hfav/dag.py $(HFAV_DIR)/hfav/analyze.py $(HFAV_DIR)/hfav/codegen.py $(HFAV_DIR)/hfav/term.py $(HFAV_DIR)/hfav/c99.py $(HFAV_DIR)/hfav/parse.py
63 | 	$(HFAV) $(HFAV_FLAGS) hydro2d-x.yaml
64 | 
65 | hydro2d-y-gen.hpp: hydro2d-y.yaml $(HFAV) $(HFAV_DIR)/hfav/infer.py $(HFAV_DIR)/hfav/dag.py $(HFAV_DIR)/hfav/analyze.py $(HFAV_DIR)/hfav/codegen.py $(HFAV_DIR)/hfav/term.py $(HFAV_DIR)/hfav/c99.py $(HFAV_DIR)/hfav/parse.py
66 | 	$(HFAV) $(HFAV_FLAGS) hydro2d-y.yaml
67 | 
68 | run-gen: run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp pcl-hydro.hpp config.c arch.hpp Makefile hydro2d-x-gen.hpp hydro2d-y-gen.hpp
69 | 	$(CXX) -o $@ run-tile.cpp pcl-hydro-core.cpp pcl-hydro-vcore.cpp pcl-hydro-util.cpp pcl-hydro-params.cpp vtkfile.cpp timeseries.cpp config.c -I $(HFAVROOT)/include/ $(BASE_CXXFLAGS) $(ARCH_CXXFLAGS) -DUSE_GEN_X -DUSE_GEN_Y -qopt-report=5
70 | 
71 | compare: compare.cpp timeseries.cpp Makefile
72 | 	$(CXX) -o $@ compare.cpp timeseries.cpp $(BASE_CXXFLAGS)
73 | 
74 | clean:
75 | 	rm -rf run-tile compare hydro2d-x-gen.hpp hydro2d-y-gen.hpp *.optrpt
76 | 


--------------------------------------------------------------------------------
/examples/hydro2d/array-macros.hpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/array-macros.hpp: utilities for HPC codes
  2 | 
  3 |    (C) Jason Sewall : Intel -- initial version
  4 |    (C) John Pennycook : Intel -- augmentations to above version
  5 | */
  6 | /*
  7 |   This software is governed by the CeCILL license under French law and
  8 |   abiding by the rules of distribution of free software.  You can  use,
  9 |   modify and/ or redistribute the software under the terms of the CeCILL
 10 |   license as circulated by CEA, CNRS and INRIA at the following URL
 11 |   "http://www.cecill.info".
 12 | 
 13 |   As a counterpart to the access to the source code and  rights to copy,
 14 |   modify and redistribute granted by the license, users are provided only
 15 |   with a limited warranty  and the software's author,  the holder of the
 16 |   economic rights,  and the successive licensors  have only  limited
 17 |   liability.
 18 | 
 19 |   In this respect, the user's attention is drawn to the risks associated
 20 |   with loading,  using,  modifying and/or developing or reproducing the
 21 |   software by the user in light of its specific status of free software,
 22 |   that may mean  that it is complicated to manipulate,  and  that  also
 23 |   therefore means  that it is reserved for developers  and  experienced
 24 |   professionals having in-depth computer knowledge. Users are therefore
 25 |   encouraged to load and test the software's suitability as regards their
 26 |   requirements in conditions enabling the security of their systems and/or
 27 |   data to be ensured and,  more generally, to use and operate it in the
 28 |   same conditions as regards security.
 29 | 
 30 |   The fact that you are presently reading this means that you have had
 31 |   knowledge of the CeCILL license and that you accept its terms.
 32 | */
 33 | 
 34 | #ifndef _ARRAY_MACROS_HPP__
 35 | #define _ARRAY_MACROS_HPP__
 36 | 
 37 | #include <cstdarg>
 38 | #include <cstdio>
 39 | #include <cstdlib>
 40 | #include <algorithm>
 41 | #include <cstring>
 42 | #include <cmath>
 43 | #include <fcntl.h>
 44 | 
 45 | __attribute__((noreturn))
 46 | static inline void die(const char *fmt, ...)
 47 | {
 48 |     va_list val;
 49 |     va_start(val, fmt);
 50 |     vfprintf(stderr, fmt, val);
 51 |     va_end(val);
 52 |     exit(EXIT_FAILURE);
 53 | }
 54 | 
 55 | typedef unsigned long long u64;
 56 | 
 57 | #define CACHE_LINE_BYTES 64
 58 | 
 59 | inline void divvy(u64 *start, u64 *end, const u64 nitems, u64 chunkno, u64 nchunks)
 60 | {
 61 |     const u64 items_per_chunk = nitems/nchunks;
 62 |     const u64 remainder       = nitems - nchunks*items_per_chunk;
 63 | 
 64 |     *start = chunkno*items_per_chunk     + std::min(chunkno,   remainder);
 65 |     *end   = (chunkno+1)*items_per_chunk + std::min(chunkno+1, remainder);
 66 | }
 67 | 
 68 | inline unsigned long long round_to_alignment(unsigned long long x, int alignment)
 69 | {
 70 |     if(x & (alignment-1))
 71 |         x = (x & ~(alignment-1)) + alignment;
 72 |     return x;
 73 | }
 74 | 
 75 | 
 76 | inline void *aligned_malloc(size_t bytes)
 77 | {
 78 |     void *ptr;
 79 |     if(posix_memalign(&ptr, CACHE_LINE_BYTES, bytes))
 80 |         return 0;
 81 |     return ptr;
 82 | }
 83 | 
 84 | inline void aligned_free(void *ptr)
 85 | {
 86 |     free(ptr);
 87 | }
 88 | 
 89 | static void *xmalloc(size_t sze, const char *name)
 90 | {
 91 |     void *res = malloc(sze);
 92 |     if(!res)
 93 |         die("Failed to allocate %zub for %s!\n", sze, name);
 94 |     return res;
 95 | }
 96 | 
 97 | static void xmalloc_free(void *p)
 98 | {
 99 |     free(p);
100 | }
101 | 
102 | static void *xaligned_malloc(size_t sze, const char *name)
103 | {
104 |     void *res = aligned_malloc(sze);
105 |     if(!res)
106 |         die("Failed to allocate %zub for %s!\n", sze, name);
107 |     return res;
108 | }
109 | 
110 | static void xaligned_malloc_free(void *p)
111 | {
112 |     free(p);
113 | }
114 | 
115 | static void *xrealloc(void *ptr, size_t sze, const char *name)
116 | {
117 |     void *res = realloc(ptr, sze);
118 |     if(!res)
119 |         die("Failed to allocate %zub for %s!\n", sze, name);
120 |     return res;
121 | }
122 | 
123 | static char *xstrdup(const char *ptr, const char *name)
124 | {
125 |     char *res = strdup(ptr);
126 |     if(!res)
127 |         die("Failed to strdup %zub for %s!\n", strlen(ptr), name);
128 |     return res;
129 | }
130 | 
131 | static void xstrdup_free(void *ptr)
132 | {
133 |     free(ptr);
134 | }
135 | 
136 | static char *xstrndup(const char *ptr, size_t n, const char *name)
137 | {
138 |     char *res = strndup(ptr,n );
139 |     if(!res)
140 |         die("Failed to strndup %zub for %s!\n", strlen(ptr), name);
141 |     return res;
142 | }
143 | 
144 | static int xsnprintf(char *str, size_t n, const char *fmt, ...)
145 | {
146 |     va_list val;
147 |     va_start(val, fmt);
148 |     int wanted_out = vsnprintf(str, n, fmt, val);
149 |     va_end(val);
150 |     if(wanted_out > n)
151 |     {
152 |         die("Ran out of buffer space for output string!\n");
153 |     }
154 |     return wanted_out;
155 | }
156 | 
157 | static FILE *xfopen_write(const char *str, char *mode)
158 | {
159 |     int fd = creat(str, S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP);
160 |     if(fd == -1)
161 |         return 0;
162 | 
163 |     FILE *file = fdopen(fd, mode);
164 |     return file;
165 | }
166 | 
167 | static FILE *xfopen_read(const char *str, char *mode)
168 | {
169 |     int fd = open(str, O_RDONLY);
170 |     if(fd == -1)
171 |         return 0;
172 | 
173 |     FILE *file = fdopen(fd, mode);
174 |     return file;
175 | }
176 | 
177 | static char *human_format(double in)
178 | {
179 |     static const char cf_chars[]              = {'t', 'g', 'm', 'k', 0};
180 |     static const unsigned long long cf_vals[] = {
181 |         1ULL << 40,
182 |         1ULL << 30,
183 |         1ULL << 20,
184 |         1ULL << 10,
185 |         0ULL,
186 |     };
187 |     const double     ain     = std::abs(in);
188 | 
189 |     int i;
190 |     for(i = 0; ain < cf_vals[i]; ++i);
191 | 
192 |     double v = in/std::max(cf_vals[i], 1ULL);
193 |     char buff[1024];
194 |     xsnprintf(buff, 1023, "%.1lf%c", v, cf_chars[i]);
195 |     return strdup(buff);
196 | }
197 | 
198 | static long long suffixed_atoll(const char *nptr, int nthreads)
199 | {
200 |     char      *mod;
201 |     double     mul = strtod(nptr, &mod);
202 |     while(*mod)
203 |     {
204 |         switch(*mod)
205 |         {
206 |         case 't':
207 |             mul *= nthreads;
208 |             break;
209 |         case 'T':
210 |             mul *= nthreads;
211 |             break;
212 |         case 'k':
213 |             mul *= 1024;
214 |             break;
215 |         case 'K':
216 |             mul *= 1000;
217 |             break;
218 |         case 'm':
219 |                 mul *= 1024*1024;
220 |                 break;
221 |             case 'M':
222 |                 mul *= 1000000;
223 |                 break;
224 |             case 'g':
225 |                 mul *= 1024*1024*1024;
226 |                 break;
227 |             case 'G':
228 |                 mul *= 1000000000;
229 |                 break;
230 |             default:
231 |                 return mul;
232 |             }
233 |             ++mod;
234 |         }
235 |         return mul;
236 |     }
237 | 
238 |     #define DECLARE_ARRAY_ALL(type, name)       \
239 |     int name##_n;                               \
240 |     int name##_n_allocd;                        \
241 |     type* name
242 | 
243 | #define INIT_ARRAY(name, size)                  \
244 |     name##_n = 0;                               \
245 |     name##_n_allocd = size;                     \
246 |     name     = (typeof(name)) xmalloc(sizeof(name[0])*name##_n_allocd, #name);
247 | 
248 | #define INIT_ARRAY_ALIGNED(name, size)          \
249 |     name##_n        = 0;                        \
250 |     name##_n_allocd = size;                     \
251 |     name            = (typeof(name)) xaligned_malloc(sizeof(name[0])*name##_n_allocd, #name);
252 | 
253 | #define EXTEND_ARRAY(name, num)                 \
254 |     if(name##_n + num >= name##_n_allocd)       \
255 |     {                                           \
256 |         name##_n_allocd = (name##_n + num)*2;   \
257 |         void *m         = xrealloc(name, sizeof(name[0])*name##_n_allocd, #name); \
258 |         name            = (typeof(name)) m;                             \
259 |     }
260 | 
261 | #define EXTEND_ARRAY_ALIGNED(name, num)         \
262 |     if(name##_n + num >= name##_n_allocd)       \
263 |     {                                           \
264 |         name##_n_allocd = (name##_n + num)*2;   \
265 |         void *m         = xaligned_malloc(sizeof(name[0])*name##_n_allocd, #name); \
266 |         memcpy(m, name, sizeof(name[0])*name##_n);      \
267 |         aligned_free(name);                             \
268 |         name            = (typeof(name)) m;             \
269 |     }
270 | 
271 | #define FREE_ARRAY_ALL(name)             \
272 |     name##_n        = 0;                 \
273 |     name##_n_allocd = 0;                 \
274 |     free(name);                          \
275 |     name            = 0;
276 | 
277 | #define FREE_ARRAY(name)                 \
278 |     name##_n = 0;                        \
279 |     free(name);                          \
280 |     name     = 0;
281 | 
282 | #define FREE_ARRAY_ALIGNED(name)                \
283 |     name##_n = 0;                               \
284 |     aligned_free(name);                         \
285 |     name     = 0;
286 | 
287 | #endif
288 | 


--------------------------------------------------------------------------------
/examples/hydro2d/compare.cpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/compare.cpp: compare timeseries from hydro code
  2 | 
  3 |    (C) Jason Sewall : Intel -- initial version
  4 |    (C) John Pennycook : Intel -- augmentations to above version
  5 | */
  6 | /*
  7 |   This software is governed by the CeCILL license under French law and
  8 |   abiding by the rules of distribution of free software.  You can  use,
  9 |   modify and/ or redistribute the software under the terms of the CeCILL
 10 |   license as circulated by CEA, CNRS and INRIA at the following URL
 11 |   "http://www.cecill.info".
 12 | 
 13 |   As a counterpart to the access to the source code and  rights to copy,
 14 |   modify and redistribute granted by the license, users are provided only
 15 |   with a limited warranty  and the software's author,  the holder of the
 16 |   economic rights,  and the successive licensors  have only  limited
 17 |   liability.
 18 | 
 19 |   In this respect, the user's attention is drawn to the risks associated
 20 |   with loading,  using,  modifying and/or developing or reproducing the
 21 |   software by the user in light of its specific status of free software,
 22 |   that may mean  that it is complicated to manipulate,  and  that  also
 23 |   therefore means  that it is reserved for developers  and  experienced
 24 |   professionals having in-depth computer knowledge. Users are therefore
 25 |   encouraged to load and test the software's suitability as regards their
 26 |   requirements in conditions enabling the security of their systems and/or
 27 |   data to be ensured and,  more generally, to use and operate it in the
 28 |   same conditions as regards security.
 29 | 
 30 |   The fact that you are presently reading this means that you have had
 31 |   knowledge of the CeCILL license and that you accept its terms.
 32 | */
 33 | 
 34 | #include <cstdarg>
 35 | #include <cstdlib>
 36 | #include <cstdio>
 37 | #include <cmath>
 38 | #include <algorithm>
 39 | #include <cstring>
 40 | #include <getopt.h>
 41 | #include "timeseries.hpp"
 42 | #include <unistd.h>
 43 | static const char usage_str[] = "USAGE:\t%s [-s start] [-e end] [-l] [-h] idxfile1 idxfile2\n";
 44 | 
 45 | static void usage(const char *name)
 46 | {
 47 |     die(usage_str, basename(name));
 48 | }
 49 | 
 50 | static void help(const char *name)
 51 | {
 52 |     fprintf(stderr, usage_str, name);
 53 |     fprintf(stderr, "DESCRIPTION\n"
 54 |             "\t Compare timseries results with vortex particles\n");
 55 |     fprintf(stderr, "OPTIONS\n"
 56 |             "\t-s,--start <frame>\n\t    Start at frame <frame> (default 1)\n"
 57 |             "\t-e,--end <frame>\n\t    Run up to <frame> (not inclusive, defaults to end of shorter)>\n"
 58 |             "\t-l,--last-only\n\t    Only test last common frame in inputs (ignores -s and -e options)\n"
 59 |             "\t-h,--help\n\t    print this help message\n"
 60 |             );
 61 | }
 62 | 
 63 | bool compare(int nx,
 64 |              int ny,
 65 |              double *l2,
 66 |              double *linf,
 67 |              int    *linfarg,
 68 |              int numstream,
 69 |              int frameno,
 70 |              const void *frame1, const void *frame2)
 71 | {
 72 |     const int stride = nx*ny;
 73 | 
 74 |     for(int s = 0; s < numstream; ++s)
 75 |     {
 76 |         l2[s]               = 0.0;
 77 |         linf[s]             = 0.0;
 78 |         linfarg[s]          = -1;
 79 |         const double *base1 = ((const double*)frame1) + s*stride;
 80 |         const double *base2 = ((const double*)frame2) + s*stride;
 81 | 
 82 |         for(int i = 0; i < stride; ++i)
 83 |         {
 84 |             l2[s]   += (base1[i] - base2[i])*(base1[i] - base2[i]);
 85 |             if(linf[s] < std::abs(base1[i] - base2[i]))
 86 |             {
 87 |                 linf[s]    = std::abs(base1[i] - base2[i]);
 88 |                 linfarg[s] = i;
 89 |             }
 90 |         }
 91 |     }
 92 |     return true;
 93 | }
 94 | 
 95 | int main(int argc, char *argv[])
 96 | {
 97 |     int  start_frame = 0;
 98 |     int  end_frame   = -1;
 99 |     bool last_only   = false;
100 |     option opts[]    =
101 |     {
102 |         {"start",              required_argument, 0, 's'},
103 |         {"end",                required_argument, 0, 'e'},
104 |         {"last-only",          required_argument, 0, 'l'},
105 |         {"help",               false,             0, 'h'},
106 |         {0,                    0,                 0,   0},
107 |     };
108 | 
109 |     int opt;
110 |     while((opt = getopt_long(argc, argv, "s:e:lh", opts, 0)) != -1)
111 |     {
112 |         switch(opt)
113 |         {
114 |         case 0:
115 |             break;
116 |         case 's':
117 |             start_frame = atoi(optarg);
118 |             if(start_frame < 0)
119 |                 die("--[s]start is %d, must be >= 0\n", start_frame);
120 |             break;
121 |         case 'e':
122 |             end_frame = atoi(optarg);
123 |             if(end_frame < 0)
124 |                 die("--[e]nd is %d, must be >= 0\n", end_frame);
125 |             break;
126 |         case 'l':
127 |             last_only = true;
128 |             break;
129 |         case 'h':
130 |             help(argv[0]);
131 |             exit(0);
132 |         default:
133 |             usage(argv[0]);
134 |         }
135 |     }
136 | 
137 |     if(optind >= argc + 1)
138 |         die("Expected 2 arguments (index files) after options\n");
139 | 
140 |     timeseries_reader idx1;
141 | 
142 |     if(!idx1.load(argv[optind]))
143 |         die("Can't load (first) index file %s\n", argv[optind]);
144 | 
145 |     timeseries_reader idx2;
146 | 
147 |     if(!idx2.load(argv[optind + 1]))
148 |         die("Can't load (second) index file %s\n", argv[optind + 1]);
149 | 
150 |     int last_frame = std::min(idx1.frames_n, idx2.frames_n);
151 |     if(end_frame != -1)
152 |         last_frame = std::min(last_frame, end_frame);
153 | 
154 |     if(last_only)
155 |         start_frame = last_frame-1;
156 |     for(int current_frame = start_frame; current_frame < last_frame; ++current_frame)
157 |     {
158 |         double time1;
159 |         size_t size1;
160 |         const void *fr1 = idx1.get_frame(current_frame, &time1, &size1);
161 |         if(!fr1)
162 |             die("Woah, couldn't get frame %d from idx1", current_frame);
163 | 
164 |         double time2;
165 |         size_t size2;
166 |         const void *fr2 = idx2.get_frame(current_frame, &time2, &size2);
167 |         if(!fr2)
168 |             die("Woah, couldn't get frame %d from idx2", current_frame);
169 | 
170 |         if(time1 != time2)
171 |             die("Frame %d: times differ! (First = %le, second = %le, first-second = %le)\n", current_frame, time1, time2, time1-time2);
172 | 
173 |         if(size1 != size2)
174 |             die("Frame %d: sizes differ! (First = %zu, second = %zu)\n", current_frame, size1, size2);
175 | 
176 |         double l2[4];
177 |         double linf[4];
178 |         int    linfarg[4];
179 | 
180 |         size_t size;
181 |         const int nx1 = ((const int*)(idx1.get_static("nx", &size)))[0];
182 |         const int ny1 = ((const int*)(idx1.get_static("ny", &size)))[0];
183 | 
184 |         const int nx2 = ((const int*)(idx2.get_static("nx", &size)))[0];
185 |         const int ny2 = ((const int*)(idx2.get_static("ny", &size)))[0];
186 | 
187 |         if(nx1 != nx2)
188 |             die("Differing grid x dimensions! (First = %d, second = %d)\n", nx1, nx2);
189 |         if(ny1 != ny2)
190 |             die("Differing grid y dimensions! (First = %d, second = %d)\n", ny1, ny2);
191 | 
192 |         compare(nx1, ny1, l2, linf, linfarg, 4, current_frame, fr1, fr2);
193 |         fprintf(stderr, "Frame %d\n", current_frame);
194 |         for(int s = 0; s < 4; ++s)
195 |             fprintf(stderr, "          %d err:l2 = %le linf = %le (inf @ %d)\n", s, std::sqrt(l2[s]), linf[s], linfarg[s]);
196 | 
197 |     }
198 |     if(idx1.frames_n - last_frame > 0)
199 |         fprintf(stderr, "[warning] First has %d more frames unchecked\n", idx1.frames_n - last_frame);
200 | 
201 |     if(idx2.frames_n - last_frame > 0)
202 |         fprintf(stderr, "[warning] Second has %d more frames unchecked\n", idx2.frames_n - last_frame);
203 | 
204 |     return EXIT_SUCCESS;
205 | }
206 | 


--------------------------------------------------------------------------------
/examples/hydro2d/config.c:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/config.c: print out configuration of software and hardware
  2 | 
  3 |    (C) Jason Sewall : Intel -- inital version
  4 |    (C) John Pennycook : Intel -- augmentations to above version
  5 | */
  6 | /*
  7 |   This software is governed by the CeCILL license under French law and
  8 |   abiding by the rules of distribution of free software.  You can  use,
  9 |   modify and/ or redistribute the software under the terms of the CeCILL
 10 |   license as circulated by CEA, CNRS and INRIA at the following URL
 11 |   "http://www.cecill.info".
 12 | 
 13 |   As a counterpart to the access to the source code and  rights to copy,
 14 |   modify and redistribute granted by the license, users are provided only
 15 |   with a limited warranty  and the software's author,  the holder of the
 16 |   economic rights,  and the successive licensors  have only  limited
 17 |   liability.
 18 | 
 19 |   In this respect, the user's attention is drawn to the risks associated
 20 |   with loading,  using,  modifying and/or developing or reproducing the
 21 |   software by the user in light of its specific status of free software,
 22 |   that may mean  that it is complicated to manipulate,  and  that  also
 23 |   therefore means  that it is reserved for developers  and  experienced
 24 |   professionals having in-depth computer knowledge. Users are therefore
 25 |   encouraged to load and test the software's suitability as regards their
 26 |   requirements in conditions enabling the security of their systems and/or
 27 |   data to be ensured and,  more generally, to use and operate it in the
 28 |   same conditions as regards security.
 29 | 
 30 |   The fact that you are presently reading this means that you have had
 31 |   knowledge of the CeCILL license and that you accept its terms.
 32 | */
 33 | 
 34 | #define USE_OMP
 35 | #define USE_NUMACTL
 36 | #include <gnu/libc-version.h>
 37 | #include <unistd.h>
 38 | #include <error.h>
 39 | #include <string.h>
 40 | #ifdef USE_MPI
 41 | #include <mpi.h>
 42 | #endif
 43 | #ifdef USE_MKL
 44 | #include "mkl.h"
 45 | #endif
 46 | #ifdef USE_OMP
 47 | #include <omp.h>
 48 | #endif
 49 | #ifdef USE_NUMACTL
 50 | #include <numa.h>
 51 | #endif
 52 | #include <sys/utsname.h>
 53 | #include <stdio.h>
 54 | #include <stdlib.h>
 55 | #include <cstdarg>
 56 | 
 57 | static char config_null[] = "<undefined>";
 58 | 
 59 | static const char *xgetenv_name(const char *str)
 60 | {
 61 |     const char *res = getenv(str);
 62 |     if(res == 0)
 63 |         return config_null;
 64 |     else
 65 |         return res;
 66 | }
 67 | 
 68 | #ifdef USE_NUMACTL
 69 | struct node
 70 | {
 71 |     struct bitmask *cpus;
 72 |     int has_cpu;
 73 |     int nearest_memonly;
 74 | };
 75 | 
 76 | static void readnodes(struct node *nodes, int n)
 77 | {
 78 |     int i;
 79 |     for (i = 0; i < n; ++i)
 80 |     {
 81 |         nodes[i].cpus = numa_allocate_cpumask();
 82 |         if (nodes[i].cpus == 0)
 83 |             perror("allocate cpu bitmask");
 84 |         int ret = numa_node_to_cpus(i, nodes[i].cpus);
 85 |         if (ret != 0)
 86 |             perror("numa_node_to_cpus");
 87 |         nodes[i].has_cpu = numa_bitmask_weight(nodes[i].cpus);
 88 |         nodes[i].nearest_memonly = -1;
 89 |     }
 90 | }
 91 | 
 92 | static void findmem(struct node *nodes, int n)
 93 | {
 94 |     int i;
 95 |     for (i = 0; i < n; ++i)
 96 |     {
 97 |         if (nodes[i].has_cpu == 0)
 98 |             continue;
 99 |         // look for a memory-only node with closest distance
100 |         int memidx = -1;
101 |         int distance = 0x7FFFFFFF;
102 |         int j;
103 |         for (j = 0; j < n; ++j)
104 |         {
105 |             if (nodes[j].has_cpu != 0)
106 |                 continue;
107 |             int d = numa_distance(i, j);
108 |             if (d < distance)
109 |             {
110 |                 distance = d;
111 |                 memidx = j;
112 |             }
113 |         }
114 |         nodes[i].nearest_memonly = memidx;
115 |     }
116 | }
117 | 
118 | static int xsnprintf(char *str, size_t n, const char *fmt, ...)
119 | {
120 |     va_list val;
121 |     va_start(val, fmt);
122 |     int wanted_out = vsnprintf(str, n, fmt, val);
123 |     va_end(val);
124 |     if(wanted_out > n)
125 |     {
126 |         fprintf(stderr, "Ran out of buffer space for output string!\n");
127 |         exit(1);
128 |     }
129 |     return wanted_out;
130 | }
131 | 
132 | static char *mem_nodes(struct node *nodes, struct bitmask *mynodes, int nnodes)
133 | {
134 |     int i;
135 |     char  temp[1024];
136 |     memset(temp, 0, sizeof(char)*1024);
137 |     char *curr = temp;
138 |     *curr = 0;
139 |     for (i = 0; i < nnodes; ++i)
140 |     {
141 |         if (numa_bitmask_isbitset(mynodes, i) &&
142 |             nodes[i].nearest_memonly > 0)
143 |         {
144 |             if (curr != temp)
145 |             {
146 |                 curr += snprintf(curr, 1023-(curr-temp), ",");
147 |             }
148 |             curr += snprintf(curr, 1023-(curr-temp), "%d", nodes[i].nearest_memonly);
149 |         }
150 |     }
151 |     char *res = strdup(temp);
152 |     if(!res)
153 |         return config_null;
154 |     return res;
155 | }
156 | 
157 | static char *cpu_nodes(struct bitmask *mynodes, int nnodes)
158 | {
159 |     int i;
160 |     char  temp[1024];
161 |     memset(temp, 0, sizeof(char)*1024);
162 |     char *curr = temp;
163 |     for (i = 0; i < nnodes; ++i)
164 |     {
165 |         if (numa_bitmask_isbitset(mynodes, i))
166 |         {
167 |             if (curr != temp)
168 |             {
169 |                 curr += snprintf(curr, 1023-(curr-temp), ",");
170 |             }
171 |             curr += snprintf(curr, 1023-(curr-temp), "%d", i);
172 |         }
173 |     }
174 |     char *res = strdup(temp);
175 |     if(!res)
176 |         return config_null;
177 |     return res;
178 | }
179 | #endif
180 | 
181 | static char *format_uname()
182 | {
183 |     struct utsname un;
184 |     if(uname(&un) == -1)
185 |     {
186 |         perror("uname");
187 |         exit(1);
188 |     }
189 |     char buff[1024];
190 |     snprintf(buff, 1023, "%s-%s-%s-%s", un.nodename, un.machine, un.sysname, un.release);
191 |     char *res = strdup(buff);
192 |     if(!res)
193 |         return config_null;
194 |     return res;
195 | }
196 | 
197 | static void cpuid(const unsigned int info, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
198 | {
199 |     __asm__("cpuid;"
200 |             :"=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
201 |             :"a" (info));
202 | }
203 | 
204 | static void vendorid(char id[13])
205 | {
206 |     unsigned int temp;
207 |     cpuid(0, &temp, (unsigned int*)id, (unsigned int*)(id+8), (unsigned int*)(id+4));
208 |     id[12] = 0;
209 | }
210 | 
211 | static void proc_brand(char str[49])
212 | {
213 |     unsigned int i;
214 |     static char nope[] = "Unknown";
215 |     unsigned int okay;
216 |     cpuid(0x80000000, &okay, (unsigned int*)str, (unsigned int*)(str+4), (unsigned int*)(str+8));
217 |     if(okay < 0x80000004)
218 |     {
219 |         strcpy(str, nope);
220 |     }
221 |     else
222 |     {
223 |         for(i = 0; i < 3; ++i)
224 |         {
225 |             cpuid(0x80000002+i, (unsigned int*)(str+16*i), (unsigned int*)(str+16*i+4), (unsigned int*)(str+16*i+8), (unsigned int*)(str+16*i+12));
226 |         }
227 |     }
228 |     str[48] = 0;
229 | }
230 | 
231 | typedef struct cpuinfo
232 | {
233 |     unsigned stepping : 4;
234 |     unsigned model : 4;
235 |     unsigned family_id : 4;
236 |     unsigned proc_type : 2;
237 |     unsigned nothing : 2;
238 |     unsigned extended_model_id : 4;
239 |     unsigned extended_family_id : 8;
240 |     unsigned nothing2 : 6;
241 |     unsigned display_family;
242 |     unsigned display_model;
243 | } cpuinfo;
244 | 
245 | static void cpu_info(struct cpuinfo *ci)
246 | {
247 |     unsigned int b, c, d;
248 |     cpuid(0x1, (unsigned int*)ci, &b, &c, &d);
249 |     if(ci->family_id == 0x0F)
250 |     {
251 |         ci->display_family = ci->extended_family_id + ci->family_id;
252 |     }
253 |     else
254 |     {
255 |         ci->display_family = ci->family_id;
256 |     }
257 |     if(ci->family_id == 0x0F || ci->family_id == 0x06)
258 |     {
259 |         ci->display_model = (ci->extended_model_id << 4) + ci->model;
260 |     }
261 |     else
262 |     {
263 |         ci->display_model = ci->model;
264 |     }
265 | }
266 | 
267 | void print_config(FILE *fp)
268 | {
269 |     fprintf(fp, "%20s = %s\n", "GIT_VERSION", GIT_VERSION);
270 |     fprintf(fp, "%20s = %s\n", "BUILD_HOST", BUILD_HOST);
271 |     fprintf(fp, "%20s = %s\n", "COMPILER_VERSION", COMPILER_VERSION);
272 |     fprintf(fp, "%20s = %s\n", "GLIBC_VERSION", gnu_get_libc_version ());
273 | #ifdef USE_MKL
274 |     char mkl_version[1024];
275 |     mkl_get_version_string(mkl_version, 1024);
276 |     fprintf(fp, "%20s = %s\n", "MKL_VERSION", mkl_version);
277 | #endif
278 | #ifdef USE_MPI
279 |     int mpi_major,  mpi_minor;
280 |     MPI_Get_version(&mpi_major, &mpi_minor);
281 |     fprintf(fp, "%20s = %d.%d\n", "MPI_VERSION", mpi_major, mpi_minor);
282 |     int mpi_len;
283 |     char mpi_library_version[MPI_MAX_LIBRARY_VERSION_STRING];
284 |     MPI_Get_library_version(mpi_library_version, &mpi_len);
285 |     char *mpi_newline = strchr(mpi_library_version, '\n');
286 |     if(mpi_newline)
287 |         *mpi_newline = 0;
288 |     fprintf(fp, "%20s = %s\n", "MPI_LIBRARY_VERSION", mpi_library_version);
289 | #endif
290 |     fprintf(fp, "%20s = %s %s\n", "BUILD_DATE", __DATE__, __TIME__);
291 |     fprintf(fp, "\n");
292 |     char *host = format_uname();
293 |     fprintf(fp, "%20s = %s\n", "HOST", host);
294 |     if(host != config_null)
295 |         free(host);
296 |     char vid[13];
297 |     vendorid(vid);
298 |     char pb[49];
299 |     proc_brand(pb);
300 |     fprintf(fp, "%20s = %s %s\n", "CPU", vid, pb);
301 |     cpuinfo ci;
302 |     cpu_info(&ci);
303 |     fprintf(fp, "%20s = %s\n", "LD_PRELOAD", xgetenv_name("LD_PRELOAD"));
304 |     fprintf(fp, "%20s = Family %u Model %u Stepping %u\n", "CPUINFO", ci.display_family, ci.display_model, ci.stepping);
305 |     fprintf(fp, "\n");
306 |     #ifdef USE_OMP
307 |     fprintf(fp, "%20s = %d\n", "NTHREADS", omp_get_max_threads());
308 |     fprintf(fp, "%20s = %s\n", "KMP_AFFINITY", xgetenv_name("KMP_AFFINITY"));
309 |     fprintf(fp, "%20s = %s\n", "KMP_PLACE_THREADS", xgetenv_name("KMP_PLACE_THREADS"));
310 |     fprintf(fp, "%20s = %s\n", "KMP_BLOCKTIME", xgetenv_name("KMP_BLOCKTIME"));
311 |     fprintf(fp, "\n");
312 | #endif
313 | #ifdef USE_NUMACTL
314 |     if(numa_available() != -1)
315 |     {
316 |         fprintf(fp, "%20s = %s\n", "NUMA_AVAILABLE", "YES");
317 |         const int nnodes = numa_max_node()+1;
318 |         fprintf(fp, "%20s = %d\n", "NUMA_NODES", nnodes);
319 |         struct node nodes[nnodes];
320 |         readnodes(nodes, nnodes);
321 |         findmem(nodes, nnodes);
322 |         struct bitmask *mynodes = numa_allocate_nodemask();
323 |         struct bitmask *mycpus = numa_allocate_cpumask();
324 |         int ret = numa_sched_getaffinity(0, mycpus);
325 |         if (ret <= 0)       // returns # bytes copied
326 |             perror("numa_sched_getaffinity");
327 |         const int ncpus = numa_num_possible_cpus();
328 |         int i;
329 |         for (i = 0; i < nnodes; ++i)
330 |         {
331 |             // check if there is any intersection with this node
332 |             int j;
333 |             for (j = 0; j < ncpus; ++j)
334 |                 if (numa_bitmask_isbitset(mycpus, j) &&
335 |                     numa_bitmask_isbitset(nodes[i].cpus, j))
336 |                     numa_bitmask_setbit(mynodes, i);
337 |         }
338 |         numa_free_cpumask(mycpus);
339 |         char *mem_node_str = mem_nodes(nodes, mynodes, nnodes);
340 |         fprintf(fp, "%20s = %s\n", "NUMA_MEM_NODES", mem_node_str);
341 |         if(mem_node_str != config_null)
342 |             free(mem_node_str);
343 |         char *cpu_node_str = cpu_nodes(mynodes, nnodes);
344 |         fprintf(fp, "%20s = %s\n", "NUMA_CPU_NODES", cpu_node_str);
345 |         if(cpu_node_str != config_null)
346 |             free(cpu_node_str);
347 |     }
348 |     else
349 |     {
350 |         fprintf(fp, "%20s = %s\n", "NUMA_AVAILABLE", "NO");
351 |     }
352 |     fprintf(fp, "\n");
353 | #endif
354 |     fprintf(fp, "\n");
355 | }
356 | 
357 | #ifdef TEST_PROG
358 | int main()
359 | {
360 |     print_config(stderr);
361 |     return 0;
362 | }
363 | #endif
364 | 


--------------------------------------------------------------------------------
/examples/hydro2d/hydro2d-x.yaml:
--------------------------------------------------------------------------------
  1 | kernels:
  2 | 
  3 |   rcp:
  4 |     declaration: REAL_T rcp(REAL_T x);
  5 |     inputs: |
  6 |       x : q?
  7 |     outputs: |
  8 |       <return> : rcp(q?)
  9 | 
 10 |   constoprim:
 11 |     declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E);
 12 |     inputs: |
 13 |       cons_rho  : rho[j?][i?]              # cell (x0, x1)
 14 |       cons_rhou : rhou[j?][i?]             # cell (x0, x1)
 15 |       cons_rhov : rhov[j?][i?]             # cell (x0, x1)
 16 |       cons_E    : E[j?][i?]                # cell (x0, x1)
 17 |     outputs: |
 18 |       prim_rho     : prim_rho[j?][i?]      # cell (x0, x1)
 19 |       inv_prim_rho : inv_prim_rho[j?][i?]  # cell (x0, x1)
 20 |       prim_u       : prim_u[j?][i?]        # cell (x0, x1)
 21 |       prim_v       : prim_v[j?][i?]        # cell (x0, x1)
 22 |       E_internal   : Einternal[j?][i?]     # cell (x0, x1)
 23 | 
 24 |   new_constoprim:
 25 |     declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E);
 26 |     inputs: |
 27 |       cons_rho  : new(rho[j?][i?])              # cell (x0, x1)
 28 |       cons_rhou : new(rhou[j?][i?])             # cell (x0, x1)
 29 |       cons_rhov : new(rhov[j?][i?])             # cell (x0, x1)
 30 |       cons_E    : new(E[j?][i?])                # cell (x0, x1)
 31 |     outputs: |
 32 |       prim_rho     : new(prim_rho[j?][i?])      # cell (x0, x1)
 33 |       inv_prim_rho : new(inv_prim_rho[j?][i?])  # cell (x0, x1)
 34 |       prim_u       : new(prim_u[j?][i?])        # cell (x0, x1)
 35 |       prim_v       : new(prim_v[j?][i?])        # cell (x0, x1)
 36 |       E_internal   : new(Einternal[j?][i?])     # cell (x0, x1)
 37 | 
 38 |   eqstate:
 39 |     declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal);
 40 |     inputs: |
 41 |       rho       : prim_rho[j?][i?]   # cell (x0, x1)
 42 |       Einternal : Einternal[j?][i?]  # cell (x0, x1)
 43 |     outputs: |
 44 |       <return> : prim_p[j?][i?]     # cell (x0, x1)
 45 | 
 46 |   new_eqstate:
 47 |     declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal);
 48 |     inputs: |
 49 |       rho       : new(prim_rho[j?][i?])   # cell (x0, x1)
 50 |       Einternal : new(Einternal[j?][i?])  # cell (x0, x1)
 51 |     outputs: |
 52 |       <return> : new(prim_p[j?][i?])     # cell (x0, x1)
 53 | 
 54 |   sound:
 55 |     declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p);
 56 |     inputs: |
 57 |       inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1)
 58 |       p       : prim_p[j?][i?]       # cell (x0, x1)
 59 |     outputs: |
 60 |       <return> : prim_c[j?][i?]       # cell (x0, x1)
 61 | 
 62 |   new_sound:
 63 |     declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p);
 64 |     inputs: |
 65 |       inv_rho : new(inv_prim_rho[j?][i?]) # cell (x0, x1)
 66 |       p       : new(prim_p[j?][i?])       # cell (x0, x1)
 67 |     outputs: |
 68 |       <return> : new(prim_c[j?][i?])       # cell (x0, x1)
 69 | 
 70 | 
 71 |   # Slope is computed for v = 0, 1, 2, 3 (prim_rho, prim_u, prim_v, prim_p)
 72 |   slope:
 73 |     declaration: REAL_T slope(REAL_T qm1, REAL_T q0, REAL_T qp1, REAL_T slope_type, REAL_T inv_slope_type);
 74 |     inputs: |
 75 |       qm1 : q?[j?][i?-1]      # cell (x-1, x0)
 76 |       q0  : q?[j?][i?  ]      # cell (x0, x1)
 77 |       qp1 : q?[j?][i?+1]      # cell (x1, x2)
 78 |     outputs: |
 79 |       <return> : delta(q?[j?][i?])  # cell (x0, x1)
 80 | 
 81 |   # Handling of rcp(c) is terrible here.
 82 |   rtrace:
 83 |     declaration: rtrace(REAL_T& flux_rho_p, REAL_T& flux_u_p, REAL_T& flux_v_p, REAL_T& flux_p_p, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx);
 84 |     inputs: |
 85 |       rho : prim_rho[j?][i?]            # cell (x0, x1)
 86 |       inv_rho : inv_prim_rho[j?][i?]    # cell (x0, x1)
 87 |       u : prim_u[j?][i?]                # cell (x0, x1)
 88 |       v : prim_v[j?][i?]                # cell (x0, x1)
 89 |       p : prim_p[j?][i?]                # cell (x0, x1)
 90 |       drho : delta(prim_rho[j?][i?])    # cell (x0, x1)
 91 |       du : delta(prim_u[j?][i?])        # cell (x0, x1)
 92 |       dv : delta(prim_v[j?][i?])        # cell (x0, x1)
 93 |       dp : delta(prim_p[j?][i?])        # cell (x0, x1)
 94 |       c : prim_c[j?][i?]                # cell (x0, x1)
 95 |       inv_c : rcp(prim_c[j?][i?])       # cell (x0, x1)
 96 |     outputs: |
 97 |       flux_rho_p : rflux(rho[j?][i?])   # interface x0
 98 |       flux_u_p   : rflux(u[j?][i?])     # interface x0
 99 |       flux_v_p   : rflux(v[j?][i?])     # interface x0
100 |       flux_p_p   : rflux(p[j?][i?])     # interface x0
101 | 
102 |   ltrace:
103 |     declaration: ltrace(REAL_T& flux_rho_m, REAL_T& flux_u_m, REAL_T& flux_v_m, REAL_T& flux_p_m, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx);
104 |     inputs: |
105 |       rho : prim_rho[j?][i?-1]          # cell (x0, x1)
106 |       inv_rho : inv_prim_rho[j?][i?-1]  # cell (x0, x1)
107 |       u : prim_u[j?][i?-1]              # cell (x0, x1)
108 |       v : prim_v[j?][i?-1]              # cell (x0, x1)
109 |       p : prim_p[j?][i?-1]              # cell (x0, x1)
110 |       drho : delta(prim_rho[j?][i?-1])  # cell (x0, x1)
111 |       du : delta(prim_u[j?][i?-1])      # cell (x0, x1)
112 |       dv : delta(prim_v[j?][i?-1])      # cell (x0, x1)
113 |       dp : delta(prim_p[j?][i?-1])      # cell (x0, x1)
114 |       c : prim_c[j?][i?-1]              # cell (x0, x1)
115 |       inv_c : rcp(prim_c[j?][i?-1])     # cell (x0, x1)
116 |     outputs: |
117 |       flux_rho_m : lflux(rho[j?][i?]) # interface x1
118 |       flux_u_m   : lflux(u[j?][i?])   # interface x1
119 |       flux_v_m   : lflux(v[j?][i?])   # interface x1
120 |       flux_p_m   : lflux(p[j?][i?])   # interface x1
121 | 
122 |   riemann:
123 |     declaration: riemann(REAL_T& gdnv_rho, REAL_T& gdnv_u, REAL_T& gdnv_v, REAL_T& gdnv_p, REAL_T in_left_rho, REAL_T in_left_u, REAL_T in_left_v, REAL_T in_left_p, REAL_T in_right_rho, REAL_T in_right_u, REAL_T in_right_v, REAL_T in_right_p);
124 |     inputs: |
125 |       in_left_rho  : lflux(rho[j?][i?])  # interface x0
126 |       in_left_u    : lflux(u[j?][i?])    # interface x0
127 |       in_left_v    : lflux(v[j?][i?])    # interface x0
128 |       in_left_p    : lflux(p[j?][i?])    # interface x0
129 |       in_right_rho : rflux(rho[j?][i?])  # interface x0
130 |       in_right_u   : rflux(u[j?][i?])    # interface x0
131 |       in_right_v   : rflux(v[j?][i?])    # interface x0
132 |       in_right_p   : rflux(p[j?][i?])    # interface x0
133 |     outputs: |
134 |       gdnv_rho : gdnv(rho[j?][i?])      # interface x0
135 |       gdnv_u   : gdnv(u[j?][i?])        # interface x0
136 |       gdnv_v   : gdnv(v[j?][i?])        # interface x0
137 |       gdnv_p   : gdnv(p[j?][i?])        # interface x0
138 | 
139 |   cmpflx:
140 |     declaration: cmpflx(REAL_T& flux_rho, REAL_T& flux_rhou, REAL_T& flux_rhov, REAL_T& flux_E, REAL_T gdnv_rho, REAL_T gdnv_u, REAL_T gdnv_v, REAL_T gdnv_p);
141 |     inputs: |
142 |       gdnv_rho : gdnv(rho[j?][i?])     # interface x0
143 |       gdnv_u   : gdnv(u[j?][i?])       # interface x0
144 |       gdnv_v   : gdnv(v[j?][i?])       # interface x0
145 |       gdnv_p   : gdnv(p[j?][i?])       # interface x0
146 |     outputs: |
147 |       flux_rho  : flux(rho[j?][i?])    # interface x0
148 |       flux_rhou : flux(rhou[j?][i?])   # interface x0
149 |       flux_rhov : flux(rhov[j?][i?])   # interface x0
150 |       flux_E    : flux(E[j?][i?])      # interface x0
151 | 
152 |   update:
153 |     declaration: REAL_T update(REAL_T in, REAL_T flux_left, REAL_T flux_right, REAL_T dtdx);
154 |     inputs: |
155 |       in : q?[j?][i?]                  # cell (x0, x1)
156 |       flux_left  : flux(q?[j?][i?])    # interface x0
157 |       flux_right : flux(q?[j?][i?+1])  # interface x1
158 |     outputs: |
159 |       <return> : new(q?[j?][i?])            # cell (x0, x1)
160 | 
161 |   courant:
162 |     declaration: hfav_courant(REAL_T& cfl, REAL_T u, REAL_T v, REAL_T c);
163 |     inputs: |
164 |       u : new(prim_u[j?][i?])
165 |       v : new(prim_v[j?][i?])
166 |       c : new(prim_c[j?][i?])
167 |     outputs: |
168 |       cfl : cfl[j?][i?]
169 | 
170 |   max_courant:
171 |     declaration: max_courant(REAL_T cfl, REAL_T& courantv);
172 |     inputs: |
173 |       cfl: cfl[j][i]
174 |     outputs: |
175 |       courantv: reduction(max:courantv)
176 | 
177 | globals:
178 |     inputs: |
179 |       REAL_T rho[j?][i?]
180 |       REAL_T rhou[j?][i?]
181 |       REAL_T rhov[j?][i?]
182 |       REAL_T E[j?][i?]
183 | 
184 |     outputs: |
185 |       new(rho[j][i])  => REAL_T rho[j][i]
186 |       new(rhou[j][i]) => REAL_T rhou[j][i]
187 |       new(rhov[j][i]) => REAL_T rhov[j][i]
188 |       new(E[j][i])    => REAL_T E[j][i]
189 |       courantv => REAL_T courantv
190 | 
191 | codegen options:
192 |     header: |
193 |       static void gen_xstrip(REAL_T &courantv, const int ystride, const int istart, const int iend, const int jstart, const int jend, REAL_T (*rho)[ystride], REAL_T (*rhou)[ystride], REAL_T (*rhov)[ystride], REAL_T (*E)[ystride], const REAL_T slope_type, const REAL_T inv_slope_type, const REAL_T dtdx)
194 |       {
195 |     footer: |
196 |       }
197 |     loops:
198 |     -
199 |       iter_ident: i
200 |       start: istart
201 |       end: iend
202 |       stride: 1
203 |     -
204 |       iter_ident: j
205 |       start: jstart
206 |       end: jend
207 |       stride: 1
208 |     loop order : [j, i]
209 | 
210 |     vector loop  : i
211 | 
212 |     language     : C99
213 |     types:
214 |       REAL_T : double
215 | 
216 |     prefix       : __hfav_
217 |     output file  : hydro2d-x-gen.hpp
218 | 


--------------------------------------------------------------------------------
/examples/hydro2d/hydro2d-y.yaml:
--------------------------------------------------------------------------------
  1 | kernels:
  2 | 
  3 |   rcp:
  4 |     declaration: REAL_T rcp(REAL_T x);
  5 |     inputs: |
  6 |       x : q?
  7 |     outputs: |
  8 |       <return> : rcp(q?)
  9 | 
 10 |   constoprim:
 11 |     declaration: conservative_to_primitive(REAL_T &prim_rho, REAL_T &inv_prim_rho, REAL_T& prim_u, REAL_T &prim_v, REAL_T &E_internal, REAL_T cons_rho, REAL_T cons_rhou, REAL_T cons_rhov, REAL_T cons_E);
 12 |     inputs: |
 13 |       cons_rho  : rho[j?][i?]              # cell (x0, x1)
 14 |       cons_rhou : rhou[j?][i?]             # cell (x0, x1)
 15 |       cons_rhov : rhov[j?][i?]             # cell (x0, x1)
 16 |       cons_E    : E[j?][i?]                # cell (x0, x1)
 17 |     outputs: |
 18 |       prim_rho     : prim_rho[j?][i?]      # cell (x0, x1)
 19 |       inv_prim_rho : inv_prim_rho[j?][i?]  # cell (x0, x1)
 20 |       prim_u       : prim_u[j?][i?]        # cell (x0, x1)
 21 |       prim_v       : prim_v[j?][i?]        # cell (x0, x1)
 22 |       E_internal   : Einternal[j?][i?]     # cell (x0, x1)
 23 | 
 24 |   eqstate:
 25 |     declaration: REAL_T equation_of_state(REAL_T rho, REAL_T Einternal);
 26 |     inputs: |
 27 |       rho       : prim_rho[j?][i?]   # cell (x0, x1)
 28 |       Einternal : Einternal[j?][i?]  # cell (x0, x1)
 29 |     outputs: |
 30 |       <return> : prim_p[j?][i?]     # cell (x0, x1)
 31 | 
 32 |   sound:
 33 |     declaration: REAL_T speed_of_sound(REAL_T inv_rho, REAL_T p);
 34 |     inputs: |
 35 |       inv_rho : inv_prim_rho[j?][i?] # cell (x0, x1)
 36 |       p       : prim_p[j?][i?]       # cell (x0, x1)
 37 |     outputs: |
 38 |       <return> : prim_c[j?][i?]       # cell (x0, x1)
 39 | 
 40 |   # Slope is computed for v = 0, 1, 2, 3 (prim_rho, prim_u, prim_v, prim_p)
 41 |   slope:
 42 |     declaration: REAL_T slope(REAL_T qm1, REAL_T q0, REAL_T qp1, REAL_T slope_type, REAL_T inv_slope_type);
 43 |     inputs: |
 44 |       qm1 : q?[j?-1][i?]      # cell (x-1, x0)
 45 |       q0  : q?[j?  ][i?]      # cell (x0, x1)
 46 |       qp1 : q?[j?+1][i?]      # cell (x1, x2)
 47 |     outputs: |
 48 |       <return> : delta(q?[j?][i?])  # cell (x0, x1)
 49 | 
 50 |   # Handling of rcp(c) is terrible here.
 51 |   ltrace:
 52 |     declaration: ltrace(REAL_T& flux_rho_m, REAL_T& flux_u_m, REAL_T& flux_v_m, REAL_T& flux_p_m, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx);
 53 |     inputs: |
 54 |       rho : prim_rho[j?][i?]            # cell (x0, x1)
 55 |       inv_rho : inv_prim_rho[j?][i?]    # cell (x0, x1)
 56 |       u : prim_u[j?][i?]                # cell (x0, x1)
 57 |       v : prim_v[j?][i?]                # cell (x0, x1)
 58 |       p : prim_p[j?][i?]                # cell (x0, x1)
 59 |       drho : delta(prim_rho[j?][i?])    # cell (x0, x1)
 60 |       du : delta(prim_u[j?][i?])        # cell (x0, x1)
 61 |       dv : delta(prim_v[j?][i?])        # cell (x0, x1)
 62 |       dp : delta(prim_p[j?][i?])        # cell (x0, x1)
 63 |       c : prim_c[j?][i?]                # cell (x0, x1)
 64 |       inv_c : rcp(prim_c[j?][i?])       # cell (x0, x1)
 65 |     outputs: |
 66 |       flux_rho_m : lflux(rho[j?+1][i?]) # interface x1
 67 |       flux_u_m   : lflux(u[j?+1][i?])   # interface x1
 68 |       flux_v_m   : lflux(v[j?+1][i?])   # interface x1
 69 |       flux_p_m   : lflux(p[j?+1][i?])   # interface x1
 70 | 
 71 |   rtrace:
 72 |     declaration: rtrace(REAL_T& flux_rho_p, REAL_T& flux_u_p, REAL_T& flux_v_p, REAL_T& flux_p_p, REAL_T rho, REAL_T inv_rho, REAL_T u, REAL_T v, REAL_T p, REAL_T drho, REAL_T du, REAL_T dv, REAL_T dp, REAL_T c, REAL_T inv_c, REAL_T dtdx);
 73 |     inputs: |
 74 |       rho : prim_rho[j?][i?]            # cell (x0, x1)
 75 |       inv_rho : inv_prim_rho[j?][i?]    # cell (x0, x1)
 76 |       u : prim_u[j?][i?]                # cell (x0, x1)
 77 |       v : prim_v[j?][i?]                # cell (x0, x1)
 78 |       p : prim_p[j?][i?]                # cell (x0, x1)
 79 |       drho : delta(prim_rho[j?][i?])    # cell (x0, x1)
 80 |       du : delta(prim_u[j?][i?])        # cell (x0, x1)
 81 |       dv : delta(prim_v[j?][i?])        # cell (x0, x1)
 82 |       dp : delta(prim_p[j?][i?])        # cell (x0, x1)
 83 |       c : prim_c[j?][i?]                # cell (x0, x1)
 84 |       inv_c : rcp(prim_c[j?][i?])       # cell (x0, x1)
 85 |     outputs: |
 86 |       flux_rho_p : rflux(rho[j?][i?])   # interface x0
 87 |       flux_u_p   : rflux(u[j?][i?])     # interface x0
 88 |       flux_v_p   : rflux(v[j?][i?])     # interface x0
 89 |       flux_p_p   : rflux(p[j?][i?])     # interface x0
 90 | 
 91 |   riemann:
 92 |     declaration: riemann(REAL_T& gdnv_rho, REAL_T& gdnv_u, REAL_T& gdnv_v, REAL_T& gdnv_p, REAL_T in_left_rho, REAL_T in_left_u, REAL_T in_left_v, REAL_T in_left_p, REAL_T in_right_rho, REAL_T in_right_u, REAL_T in_right_v, REAL_T in_right_p);
 93 |     inputs: |
 94 |       in_left_rho  : lflux(rho[j?][i?])  # interface x0
 95 |       in_left_u    : lflux(u[j?][i?])    # interface x0
 96 |       in_left_v    : lflux(v[j?][i?])    # interface x0
 97 |       in_left_p    : lflux(p[j?][i?])    # interface x0
 98 |       in_right_rho : rflux(rho[j?][i?])  # interface x0
 99 |       in_right_u   : rflux(u[j?][i?])    # interface x0
100 |       in_right_v   : rflux(v[j?][i?])    # interface x0
101 |       in_right_p   : rflux(p[j?][i?])    # interface x0
102 |     outputs: |
103 |       gdnv_rho : gdnv(rho[j?][i?])      # interface x0
104 |       gdnv_u   : gdnv(u[j?][i?])        # interface x0
105 |       gdnv_v   : gdnv(v[j?][i?])        # interface x0
106 |       gdnv_p   : gdnv(p[j?][i?])        # interface x0
107 | 
108 |   cmpflx:
109 |     declaration: cmpflx(REAL_T& flux_rho, REAL_T& flux_rhou, REAL_T& flux_rhov, REAL_T& flux_E, REAL_T gdnv_rho, REAL_T gdnv_u, REAL_T gdnv_v, REAL_T gdnv_p);
110 |     inputs: |
111 |       gdnv_rho : gdnv(rho[j?][i?])     # interface x0
112 |       gdnv_u   : gdnv(u[j?][i?])       # interface x0
113 |       gdnv_v   : gdnv(v[j?][i?])       # interface x0
114 |       gdnv_p   : gdnv(p[j?][i?])       # interface x0
115 |     outputs: |
116 |       flux_rho  : flux(rho[j?][i?])    # interface x0
117 |       flux_rhou : flux(rhou[j?][i?])   # interface x0
118 |       flux_rhov : flux(rhov[j?][i?])   # interface x0
119 |       flux_E    : flux(E[j?][i?])      # interface x0
120 | 
121 |   update:
122 |     declaration: REAL_T update(REAL_T in, REAL_T flux_left, REAL_T flux_right, REAL_T dtdx);
123 |     inputs: |
124 |       in : q?[j?][i?]                  # cell (x0, x1)
125 |       flux_left  : flux(q?[j?][i?])    # interface x0
126 |       flux_right : flux(q?[j?+1][i?])  # interface x1
127 |     outputs: |
128 |       <return> : new(q?[j?][i?])            # cell (x0, x1)
129 | 
130 | globals:
131 |     inputs: |
132 |       REAL_T rho[j?][i?]
133 |       REAL_T rhou[j?][i?]
134 |       REAL_T rhov[j?][i?]
135 |       REAL_T E[j?][i?]
136 | 
137 |     outputs: |
138 |       new(rho[j][i])  => REAL_T rho[j][i]
139 |       new(rhou[j][i]) => REAL_T rhou[j][i]
140 |       new(rhov[j][i]) => REAL_T rhov[j][i]
141 |       new(E[j][i])    => REAL_T E[j][i]
142 | 
143 | codegen options:
144 |     header: |
145 |       static void gen_ystrip(const int ystride, const int istart, const int iend, const int jstart, const int jend, REAL_T (*rho)[ystride], REAL_T (*rhou)[ystride], REAL_T (*rhov)[ystride], REAL_T (*E)[ystride], const REAL_T slope_type, const REAL_T inv_slope_type, const REAL_T dtdx)
146 |       {
147 |     footer: |
148 |       }
149 |     loops:
150 |     -
151 |       iter_ident: i
152 |       start: istart
153 |       end: iend
154 |       stride: 1
155 |     -
156 |       iter_ident: j
157 |       start: jstart
158 |       end: jend
159 |       stride: 1
160 |     loop order : [i, j]
161 | 
162 |     vector loop  : i
163 | 
164 |     language     : C99
165 |     types:
166 |       REAL_T : double
167 | 
168 |     prefix       : __hfav_
169 |     output file  : hydro2d-y-gen.hpp
170 | 


--------------------------------------------------------------------------------
/examples/hydro2d/pcl-hydro-params.cpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/pcl-hydro-params.cpp : parameter parsing for hydro
  2 | 
  3 |    (C) Romain Teyssier : CEA/IRFU           -- original F90 code
  4 |    (C) Pierre-Francois Lavallee : IDRIS      -- original F90 code
  5 |    (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version
  6 |    (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86
  7 |    (C) John Pennycook : Intel -- augmentations to above version
  8 | */
  9 | /*
 10 |   This software is governed by the CeCILL license under French law and
 11 |   abiding by the rules of distribution of free software.  You can  use,
 12 |   modify and/ or redistribute the software under the terms of the CeCILL
 13 |   license as circulated by CEA, CNRS and INRIA at the following URL
 14 |   "http://www.cecill.info".
 15 | 
 16 |   As a counterpart to the access to the source code and  rights to copy,
 17 |   modify and redistribute granted by the license, users are provided only
 18 |   with a limited warranty  and the software's author,  the holder of the
 19 |   economic rights,  and the successive licensors  have only  limited
 20 |   liability.
 21 | 
 22 |   In this respect, the user's attention is drawn to the risks associated
 23 |   with loading,  using,  modifying and/or developing or reproducing the
 24 |   software by the user in light of its specific status of free software,
 25 |   that may mean  that it is complicated to manipulate,  and  that  also
 26 |   therefore means  that it is reserved for developers  and  experienced
 27 |   professionals having in-depth computer knowledge. Users are therefore
 28 |   encouraged to load and test the software's suitability as regards their
 29 |   requirements in conditions enabling the security of their systems and/or
 30 |   data to be ensured and,  more generally, to use and operate it in the
 31 |   same conditions as regards security.
 32 | 
 33 |   The fact that you are presently reading this means that you have had
 34 |   knowledge of the CeCILL license and that you accept its terms.
 35 | */
 36 | #include "pcl-hydro.hpp"
 37 | 
 38 | #include <cstring>
 39 | #include <cstdio>
 40 | 
 41 | static void default_values(hydro *H)
 42 | {
 43 |     // Default values should be given
 44 |     H->global_n[0]    = 20;
 45 |     H->global_n[1]    = 20;
 46 |     H->nxystep        = -1;
 47 |     H->dx             = 1.0;
 48 |     H->t              = 0.0;
 49 |     H->step           = 0;
 50 |     H->tend           = 0.0;
 51 |     H->courant_number = 0.5;
 52 |     H->iorder         = 2;
 53 |     H->slope_type     = 1.;
 54 |     H->scheme         = hydro::MUSCL;
 55 |     H->nstepmax       = (unsigned int)-1;
 56 |     H->testcase       = 0;
 57 | }
 58 | 
 59 | static void keyval(char *buffer, char **pkey, char **pval)
 60 | {
 61 |     char *ptr;
 62 |     *pkey = buffer;
 63 |     *pval = buffer;
 64 | 
 65 |     // kill the newline
 66 |     *pval = strchr(buffer, '\n');
 67 |     if (*pval)
 68 |         **pval = 0;
 69 | 
 70 |     // suppress leading whites or tabs
 71 |     while ((**pkey == ' ') || (**pkey == '\t'))
 72 |         (*pkey)++;
 73 |     *pval = strchr(buffer, '=');
 74 |     if (*pval) {
 75 |         **pval = 0;
 76 |         (*pval)++;
 77 |     }
 78 |     // strip key from white or tab
 79 |     while ((ptr = strchr(*pkey, ' ')) != NULL) {
 80 |         *ptr = 0;
 81 |     }
 82 |     while ((ptr = strchr(*pkey, '\t')) != NULL) {
 83 |         *ptr = 0;
 84 |     }
 85 | }
 86 | 
 87 | bool hydro_set_kv(hydro *H, char *kvstr)
 88 | {
 89 |     char *pkey, *pval;
 90 |     keyval(kvstr, &pkey, &pval);
 91 | 
 92 |     if(!pkey || !pval)
 93 |         return false;
 94 | 
 95 |     // int parameters
 96 |     if (strcmp(pkey, "nstepmax") == 0) {
 97 |         sscanf(pval, "%u", &H->nstepmax);
 98 |         return true;
 99 |     }
100 |     if (strcmp(pkey, "nx") == 0) {
101 |         int tmp;
102 |         sscanf(pval, "%d", &tmp);
103 |         if(tmp > 0)
104 |         {
105 |             H->global_n[0] = tmp;
106 |             return true;
107 |         }
108 |         else
109 |         {
110 |             return false;
111 |         }
112 |     }
113 |     if (strcmp(pkey, "ny") == 0) {
114 |         int tmp;
115 |         sscanf(pval, "%d", &tmp);
116 |         if(tmp > 0)
117 |         {
118 |             H->global_n[1] = tmp;
119 |             return true;
120 |         }
121 |         else
122 |         {
123 |             return false;
124 |         }
125 |     }
126 |     if (strcmp(pkey, "nxystep") == 0) {
127 |         int tmp;
128 |         sscanf(pval, "%d", &tmp);
129 |         if(tmp > 0)
130 |         {
131 |             H->nxystep = tmp;
132 |             return true;
133 |         }
134 |         else
135 |         {
136 |             return false;
137 |         }
138 |     }
139 |     if (strcmp(pkey, "iorder") == 0) {
140 |         int tmp;
141 |         sscanf(pval, "%d", &tmp);
142 |         if(tmp == 1 || tmp == 2)
143 |         {
144 |             H->iorder = tmp;
145 |             return true;
146 |         }
147 |         else
148 |         {
149 |             return false;
150 |         }
151 |     }
152 |     // float parameters
153 |     if (strcmp(pkey, "slope_type") == 0) {
154 |         double tmp;
155 |         sscanf(pval, REAL_FMT, &tmp);
156 |         if(tmp > 0.0)
157 |         {
158 |             H->slope_type = tmp;
159 |             return true;
160 |         }
161 |         else
162 |         {
163 |             return false;
164 |         }
165 |     }
166 |     if (strcmp(pkey, "tend") == 0) {
167 |         double tmp;
168 |         sscanf(pval, REAL_FMT, &tmp);
169 |         if(tmp > 0.0)
170 |         {
171 |             H->tend = tmp;
172 |             return true;
173 |         }
174 |         else
175 |         {
176 |             return false;
177 |         }
178 |     }
179 |     if (strcmp(pkey, "dx") == 0) {
180 |         double tmp;
181 |         sscanf(pval, REAL_FMT, &tmp);
182 |         if(tmp > 0.0)
183 |         {
184 |             H->dx = tmp;
185 |             return true;
186 |         }
187 |         else
188 |         {
189 |             return false;
190 |         }
191 |     }
192 |     if (strcmp(pkey, "courant_factor") == 0) {
193 |         double tmp;
194 |         sscanf(pval, REAL_FMT, &tmp);
195 |         if(tmp > 0.0)
196 |         {
197 |             H->courant_number = tmp;
198 |             return true;
199 |         }
200 |         else
201 |         {
202 |             return false;
203 |         }
204 |     }
205 |     if (strcmp(pkey, "testcase") == 0) {
206 |         int tmp;
207 |         sscanf(pval, "%d", &tmp);
208 |         if(tmp == 0 || tmp == 1 || tmp == 2)
209 |         {
210 |             H->testcase = tmp;
211 |             return true;
212 |         }
213 |         else
214 |         {
215 |             return false;
216 |         }
217 |     }
218 |     // string parameter
219 |     if (strcmp(pkey, "scheme") == 0) {
220 |         if (strcmp(pval, "muscl") == 0) {
221 |             H->scheme = hydro::MUSCL;
222 |         } else if (strcmp(pval, "plmde") == 0) {
223 |             H->scheme = hydro::PLMDE;
224 |         } else if (strcmp(pval, "collela") == 0) {
225 |             H->scheme = hydro::COLLELA;
226 |         } else {
227 |             return false;
228 |         }
229 |         return true;
230 |     }
231 |     return false;
232 | }
233 | 
234 | static void process_input(hydro *H, const char *datafile, int quiet)
235 | {
236 |     FILE *fd = NULL;
237 |     char buffer[1024];
238 | 
239 |     fd = xfopen_read(datafile, "r");
240 |     if (fd == NULL) {
241 |         fprintf(stderr, "can't read input file\n");
242 |         exit(1);
243 |     }
244 |     while (fgets(buffer, 1024, fd) == buffer) {
245 |         bool res = hydro_set_kv(H, buffer);
246 |         if(!res && quiet < 2)
247 |             printf("[PARAMS] Skipping unused key %s\n", buffer);
248 |     }
249 |     fclose(fd);
250 | }
251 | 
252 | bool load_hydro_params(hydro *h, const char *file, int quiet)
253 | {
254 |     default_values(h);
255 |     if(file)
256 |         process_input(h, file, quiet);
257 |     return true;
258 | }
259 | 


--------------------------------------------------------------------------------
/examples/hydro2d/pcl-hydro-util.cpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/pcl-hydro-util.cpp : utiltiies for setting up hydro code
  2 | 
  3 |    (C) Romain Teyssier : CEA/IRFU           -- original F90 code
  4 |    (C) Pierre-Francois Lavallee : IDRIS      -- original F90 code
  5 |    (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version
  6 |    (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86
  7 |    (C) John Pennycook : Intel -- augmentations to above version
  8 | */
  9 | /*
 10 |   This software is governed by the CeCILL license under French law and
 11 |   abiding by the rules of distribution of free software.  You can  use,
 12 |   modify and/ or redistribute the software under the terms of the CeCILL
 13 |   license as circulated by CEA, CNRS and INRIA at the following URL
 14 |   "http://www.cecill.info".
 15 | 
 16 |   As a counterpart to the access to the source code and  rights to copy,
 17 |   modify and redistribute granted by the license, users are provided only
 18 |   with a limited warranty  and the software's author,  the holder of the
 19 |   economic rights,  and the successive licensors  have only  limited
 20 |   liability.
 21 | 
 22 |   In this respect, the user's attention is drawn to the risks associated
 23 |   with loading,  using,  modifying and/or developing or reproducing the
 24 |   software by the user in light of its specific status of free software,
 25 |   that may mean  that it is complicated to manipulate,  and  that  also
 26 |   therefore means  that it is reserved for developers  and  experienced
 27 |   professionals having in-depth computer knowledge. Users are therefore
 28 |   encouraged to load and test the software's suitability as regards their
 29 |   requirements in conditions enabling the security of their systems and/or
 30 |   data to be ensured and,  more generally, to use and operate it in the
 31 |   same conditions as regards security.
 32 | 
 33 |   The fact that you are presently reading this means that you have had
 34 |   knowledge of the CeCILL license and that you accept its terms.
 35 | */
 36 | 
 37 | #include "pcl-hydro.hpp"
 38 | #include <cstring>
 39 | 
 40 | void init_hydro(hydro *h)
 41 | {
 42 |     if(h->nxystep == -1)
 43 |         h->nxystep = std::max(h->global_n[0], h->global_n[1]);
 44 |     h->ystride   = h->global_n[0] + 2*2;
 45 |     h->varstride = h->ystride * (h->global_n[1] + 2*2);
 46 |     h->q         = (REAL_T *) xmalloc(sizeof(REAL_T) * h->varstride * 4, "q");
 47 | 
 48 |     h->inv_slope_type = 1.0/h->slope_type;
 49 | 
 50 |     for(int i = 0; i < h->varstride; ++i)
 51 |         h->q[i + 0*h->varstride] = (REAL_T) 1.0;
 52 |     for(int i = 0; i < h->varstride; ++i)
 53 |         h->q[i + 1*h->varstride] = (REAL_T) 0.0;
 54 |     for(int i = 0; i < h->varstride; ++i)
 55 |         h->q[i + 2*h->varstride] = (REAL_T) 0.0;
 56 |     for(int i = 0; i < h->varstride; ++i)
 57 |         h->q[i + 3*h->varstride] = (REAL_T) 1e-5;
 58 | 
 59 |     switch(h->testcase)
 60 |     {
 61 |     case 0:
 62 |         {
 63 |             const int x                             = h->global_n[0] / 2 + 2;
 64 |             const int y                             = h->global_n[1] / 2 + 2;
 65 |             h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx;
 66 |         }
 67 |         break;
 68 |     case 1:
 69 |         {
 70 |             const int x                             = 2;
 71 |             const int y                             = 2;
 72 |             h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx;
 73 |         }
 74 |         break;
 75 |     case 2:
 76 |         {
 77 |             const int x = 2;
 78 |             for(int j = 0; j < h->global_n[1]; ++j)
 79 |             {
 80 |                 const int y                             = j + 2;
 81 |                 h->q[h->ystride*y + x + 3*h->varstride] = ((REAL_T) 1.0) / h->dx / h->dx;
 82 |             }
 83 |         }
 84 |         break;
 85 |     default:
 86 |         die("Test case %d not implemented!\n", h->testcase);
 87 |     }
 88 | }
 89 | 
 90 | void destroy_hydro(hydro *h)
 91 | {
 92 |     xmalloc_free(h->q);
 93 | }
 94 | 
 95 | void write_hydro_ts(timeseries_writer *tw, const hydro *h)
 96 | {
 97 |     const int xw = h->global_n[0] + 2*2;
 98 |     const int yw = h->global_n[1] + 2*2;
 99 | 
100 |     tw->new_frame(h->t, xw * yw * 4 * sizeof(REAL_T));
101 |     for(int v = 0; v < 4; ++v)
102 |         for(int j = 0; j < yw; ++j)
103 |             tw->append(h->q + v*h->varstride + j * h->ystride, xw * sizeof(REAL_T));
104 | }
105 | 
106 | static void set_boundary(      REAL_T *restrict dest,
107 |                   const REAL_T           sign,
108 |                   const int              width,
109 |                   const int              stride)
110 | {
111 |     for(int i = 0; i < width; ++i)
112 |         dest[i*stride] = sign*dest[(2*width - 1 - i)*stride];
113 | }
114 | 
115 | void set_boundaries(      REAL_T *restrict dest_base,
116 |                     const REAL_T *restrict signs,
117 |                     const int              width,
118 |                     const int              stride,
119 |                     const int              nv,
120 |                     const int              vstride)
121 | {
122 |     for(int v = 0; v < nv; ++v)
123 |         set_boundary(dest_base + v*vstride, signs[v], width, stride);
124 | }
125 | 
126 | REAL_T compute_timestep(const hydro *h)
127 | {
128 |     REAL_T courantv = SMALLC;
129 |     for(int j = 2; j < h->global_n[1] + 2; ++j)
130 |         for(int i = 2; i < h->global_n[0] + 2; ++i)
131 |         {
132 |             const int offs = j * h->ystride + i;
133 |             REAL_T    prim_rho;
134 |             REAL_T    prim_inv_rho;
135 |             REAL_T    prim_u;
136 |             REAL_T    prim_v;
137 |             REAL_T    E_internal;
138 | 
139 |             conservative_to_primitive(&prim_rho,                  &prim_inv_rho, &prim_u,                     &prim_v,                     &E_internal,
140 |                                       h->q[offs + 0*h->varstride],               h->q[offs + 1*h->varstride], h->q[offs + 2*h->varstride], h->q[offs + 3*h->varstride]);
141 |             const REAL_T prim_p = equation_of_state(prim_rho, E_internal);
142 |             const REAL_T prim_c = speed_of_sound   (prim_inv_rho,     prim_p);
143 | 
144 |             courant(&courantv, prim_u, prim_v, prim_c);
145 |         }
146 | 
147 |     return h->courant_number * h->dx / courantv;
148 | }
149 | 
150 | bool set_scheme(hydro::hscheme s, const REAL_T dt_dx)
151 | {
152 |     switch(s)
153 |     {
154 |     case hydro::MUSCL:
155 |         ZEROL   = -((REAL_T) 100.0)/dt_dx;
156 |         ZEROR   =  ((REAL_T) 100.0)/dt_dx;
157 |         PROJECT =  (REAL_T) 1.0;
158 |         break;
159 |     case hydro::PLMDE:
160 |         ZEROL   =  (REAL_T) 0;
161 |         ZEROR   =  (REAL_T) 0;
162 |         PROJECT =  (REAL_T) 1.0;
163 |         break;
164 |     case hydro::COLLELA:
165 |         ZEROL   = (REAL_T) 0.0;
166 |         ZEROR   = (REAL_T) 0.0;
167 |         PROJECT = (REAL_T) 0.0;
168 |         break;
169 |     default:
170 |         return false;
171 |     }
172 |     return true;
173 | }
174 | 


--------------------------------------------------------------------------------
/examples/hydro2d/pcl-hydro.hpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/pcl-hydro.hpp : header for hydro code
  2 | 
  3 |    (C) Romain Teyssier : CEA/IRFU           -- original F90 code
  4 |    (C) Pierre-Francois Lavallee : IDRIS      -- original F90 code
  5 |    (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version
  6 |    (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86
  7 |    (C) John Pennycook : Intel -- augmentations to above version
  8 | */
  9 | /*
 10 |   This software is governed by the CeCILL license under French law and
 11 |   abiding by the rules of distribution of free software.  You can  use,
 12 |   modify and/ or redistribute the software under the terms of the CeCILL
 13 |   license as circulated by CEA, CNRS and INRIA at the following URL
 14 |   "http://www.cecill.info".
 15 | 
 16 |   As a counterpart to the access to the source code and  rights to copy,
 17 |   modify and redistribute granted by the license, users are provided only
 18 |   with a limited warranty  and the software's author,  the holder of the
 19 |   economic rights,  and the successive licensors  have only  limited
 20 |   liability.
 21 | 
 22 |   In this respect, the user's attention is drawn to the risks associated
 23 |   with loading,  using,  modifying and/or developing or reproducing the
 24 |   software by the user in light of its specific status of free software,
 25 |   that may mean  that it is complicated to manipulate,  and  that  also
 26 |   therefore means  that it is reserved for developers  and  experienced
 27 |   professionals having in-depth computer knowledge. Users are therefore
 28 |   encouraged to load and test the software's suitability as regards their
 29 |   requirements in conditions enabling the security of their systems and/or
 30 |   data to be ensured and,  more generally, to use and operate it in the
 31 |   same conditions as regards security.
 32 | 
 33 |   The fact that you are presently reading this means that you have had
 34 |   knowledge of the CeCILL license and that you accept its terms.
 35 | */
 36 | 
 37 | #ifndef __PCL_HYDRO_HPP__
 38 | #define __PCL_HYDRO_HPP__
 39 | 
 40 | #include "arch.hpp"
 41 | #include "timeseries.hpp"
 42 | 
 43 | static const REAL_T GAMMA     = 1.4;
 44 | static const REAL_T GAMMA6    = (GAMMA + 1) / (2.0 * GAMMA);
 45 | static const REAL_T SMALLC    = 1e-10;
 46 | static const REAL_T SMALLR    = 1e-10;
 47 | static const REAL_T SMALLP    = SMALLC*SMALLC / GAMMA;
 48 | static const REAL_T SMALLPP   = SMALLR * SMALLP;
 49 | static const REAL_T PRECISION = 1e-6;
 50 | 
 51 | static const int NITER_RIEMANN = 10;
 52 | extern REAL_T ZEROR;
 53 | extern REAL_T ZEROL;
 54 | extern REAL_T PROJECT;
 55 | 
 56 | struct hydro
 57 | {
 58 |     typedef enum { MUSCL = 1, PLMDE = 2, COLLELA = 3} hscheme;
 59 |     int global_n[2];
 60 | 
 61 |     int nxystep;
 62 | 
 63 |     int ystride;
 64 |     int varstride;
 65 | 
 66 |     int     testcase;
 67 |     hscheme scheme;
 68 | 
 69 |     int          step;
 70 |     unsigned int nstepmax;
 71 |     int          iorder;
 72 |     REAL_T       slope_type;
 73 |     REAL_T       inv_slope_type;
 74 | 
 75 |     REAL_T courant_number;
 76 |     REAL_T dx;
 77 |     REAL_T t;
 78 |     REAL_T tend;
 79 | 
 80 |     REAL_T *q;
 81 | };
 82 | 
 83 | // util functions
 84 | void init_hydro(hydro *h);
 85 | void destroy_hydro(hydro *h);
 86 | bool load_hydro_params(hydro *h, const char *file, int quiet);
 87 | bool hydro_set_kv(hydro *H, char *kvstr);
 88 | 
 89 | void write_hydro_ts(timeseries_writer *tw, const hydro *h);
 90 | 
 91 | REAL_T compute_timestep(const hydro *h);
 92 | bool   set_scheme(hydro::hscheme s, const REAL_T dt_dx);
 93 | void   vtkfile(int step, const REAL_T *q, const int n[2], const int padding, const int ystride, const int varstride, const double dx);
 94 | 
 95 | void set_boundaries(      REAL_T *restrict dest_base,
 96 |                     const REAL_T *restrict signs,
 97 |                     const int              width,
 98 |                     const int              stride,
 99 |                     const int              nv,
100 |                     const int              vstride);
101 | 
102 | // serial core functions
103 | void conservative_to_primitive(      REAL_T *restrict prim_rho, REAL_T *restrict inv_prim_rho,       REAL_T *restrict prim_u,          REAL_T *restrict prim_v,          REAL_T *restrict E_internal,
104 |                                      const REAL_T           cons_rho,                                const REAL_T           cons_rhou, const REAL_T           cons_rhov, const REAL_T           cons_E);
105 | REAL_T equation_of_state(const REAL_T rho,
106 |                          const REAL_T E_internal);
107 | REAL_T speed_of_sound(const REAL_T inv_rho,
108 |                       const REAL_T p);
109 | REAL_T slope(const REAL_T nbv_m, const REAL_T nbv_0, const REAL_T nbv_p,
110 |              const REAL_T slope_type, const REAL_T inv_slope_type);
111 | void flux(      REAL_T *restrict flux_rho,                              REAL_T *restrict flux_u,         REAL_T *restrict flux_v,         REAL_T *restrict flux_p,
112 |           const REAL_T           rho,      const REAL_T inv_rho,  const REAL_T           u,        const REAL_T           v,        const REAL_T           p,
113 |           const REAL_T           sp_m,     const REAL_T sp_0,     const REAL_T           sp_p,
114 |           const REAL_T           alpha_m,  const REAL_T alpha_0r, const REAL_T           alpha_0v, const REAL_T           alpha_p,
115 |           const REAL_T           c);
116 | void trace(      REAL_T *restrict flux_rho_m,                             REAL_T *restrict flux_u_m,       REAL_T *restrict flux_v_m,       REAL_T *restrict flux_p_m,
117 |                         REAL_T *restrict flux_rho_p,                             REAL_T *restrict flux_u_p,       REAL_T *restrict flux_v_p,       REAL_T *restrict flux_p_p,
118 |            const REAL_T           rho,        const REAL_T inv_rho, const REAL_T           u,        const REAL_T           v,        const REAL_T           p,
119 |            const REAL_T           drho,       const REAL_T du,      const REAL_T           dv,       const REAL_T           dp,
120 |            const REAL_T           c,          const REAL_T inv_c,
121 |            const REAL_T           dtdx);
122 | void riemann(      REAL_T *restrict     gdnv_rho,       REAL_T *restrict gdnv_u,           REAL_T *restrict gdnv_v,           REAL_T *restrict gdnv_p,
123 |              const REAL_T            in_left_rho, const REAL_T           in_left_u,  const REAL_T           in_left_v,  const REAL_T           in_left_p,
124 |              const REAL_T           in_right_rho, const REAL_T           in_right_u, const REAL_T           in_right_v, const REAL_T           in_right_p);
125 | void cmpflx(      REAL_T *restrict flux_rho,       REAL_T *restrict flux_rhou,       REAL_T *restrict flux_rhov,       REAL_T *restrict flux_E,
126 |             const REAL_T           gdnv_rho, const REAL_T           gdnv_u,    const REAL_T           gdnv_v,    const REAL_T           gdnv_p);
127 | REAL_T update(const REAL_T  in,
128 |               const REAL_T  flux_left, const REAL_T  flux_right,
129 |               const REAL_T  dtdx);
130 | 
131 | inline void rtrace(     REAL_T *restrict flux_rho_p,                             REAL_T *restrict flux_u_p,       REAL_T *restrict flux_v_p,       REAL_T *restrict flux_p_p,
132 |            const REAL_T           rho,        const REAL_T inv_rho, const REAL_T           u,        const REAL_T           v,        const REAL_T           p,
133 |            const REAL_T           drho,       const REAL_T du,      const REAL_T           dv,       const REAL_T           dp,
134 |            const REAL_T           c,          const REAL_T inv_c,
135 |            const REAL_T           dtdx)
136 | {
137 |     const REAL_T alpha_m  = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) - du) * rho * inv_c;
138 |     const REAL_T alpha_p  = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) + du) * rho * inv_c;
139 |     const REAL_T alpha_0r = drho - dp * (inv_c*inv_c);
140 |     const REAL_T alpha_0v = dv;
141 | 
142 |     const REAL_T right_sp_m = ((u - c) >= ZEROR) ? PROJECT : (u - c) * dtdx + ((REAL_T) 1.0);
143 |     const REAL_T right_sp_p = ((u + c) >= ZEROR) ? PROJECT : (u + c) * dtdx + ((REAL_T) 1.0);
144 |     const REAL_T right_sp_0 =  (u      >= ZEROR) ? PROJECT :  u      * dtdx + ((REAL_T) 1.0);
145 | 
146 |     flux(flux_rho_p,          flux_u_p,   flux_v_p,   flux_p_p,
147 |          rho,        inv_rho, u,          v,          p,
148 |          right_sp_m,          right_sp_0, right_sp_p,
149 |          alpha_m,             alpha_0r,   alpha_0v,   alpha_p,
150 |          c);
151 |     // todo: handle passive terms
152 | }
153 | 
154 | inline void ltrace(      REAL_T *restrict flux_rho_m,                             REAL_T *restrict flux_u_m,       REAL_T *restrict flux_v_m,       REAL_T *restrict flux_p_m,
155 |            const REAL_T           rho,        const REAL_T inv_rho, const REAL_T           u,        const REAL_T           v,        const REAL_T           p,
156 |            const REAL_T           drho,       const REAL_T du,      const REAL_T           dv,       const REAL_T           dp,
157 |            const REAL_T           c,          const REAL_T inv_c,
158 |            const REAL_T           dtdx)
159 | {
160 |     const REAL_T alpha_m  = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) - du) * rho * inv_c;
161 |     const REAL_T alpha_p  = ((REAL_T) 0.5) * (dp * ( inv_rho * inv_c ) + du) * rho * inv_c;
162 |     const REAL_T alpha_0r = drho - dp * (inv_c*inv_c);
163 |     const REAL_T alpha_0v = dv;
164 | 
165 |     const REAL_T left_sp_m = ((u - c) <= ZEROL) ? -PROJECT : (u - c) * dtdx - ((REAL_T) 1.0);
166 |     const REAL_T left_sp_p = ((u + c) <= ZEROL) ? -PROJECT : (u + c) * dtdx - ((REAL_T) 1.0);
167 |     const REAL_T left_sp_0 =  (u      <= ZEROL) ? -PROJECT :  u      * dtdx - ((REAL_T) 1.0);
168 | 
169 |     flux(flux_rho_m,          flux_u_m,   flux_v_m,   flux_p_m,
170 |          rho,        inv_rho, u,          v,          p,
171 |          left_sp_m,           left_sp_0,  left_sp_p,
172 |          alpha_m,             alpha_0r,   alpha_0v,   alpha_p,
173 |          c);
174 | 
175 |     // todo: handle passive terms
176 | }
177 | 
178 | inline void courant(      REAL_T *restrict courantv,
179 |              const REAL_T           u, const REAL_T  v,
180 |              const REAL_T           c)
181 | {
182 |     *courantv = std::max(*courantv, std::max(c + std::abs(u), c + std::abs(v)));
183 | }
184 | 
185 | inline void hfav_courant(  REAL_T *restrict courantv,
186 |              const REAL_T           u, const REAL_T  v,
187 |              const REAL_T           c)
188 | {
189 |     *courantv = std::max(c + std::abs(u), c + std::abs(v));
190 | }
191 | 
192 | inline void max_courant(REAL_T cfl, REAL_T* restrict courantv)
193 | {
194 |     const REAL_T old_courant = *courantv;
195 |     *courantv = std::max(old_courant, cfl);
196 | }
197 | 
198 | struct strip_work
199 | {
200 |     REAL_T flux      [4][2]; // flux at i-1/2, i+1/2
201 |     REAL_T left_flux [4][2]; // left_flux at i, i+1
202 |     REAL_T prim      [5][3]; // prim for i, i+1, i+2
203 | };
204 | 
205 | void strip_prime(strip_work   *restrict sw,
206 |                  const REAL_T *restrict rho,
207 |                  const REAL_T *restrict rhou,
208 |                  const REAL_T *restrict rhov,
209 |                  const REAL_T *restrict E,
210 |                  const hydro  *restrict h,
211 |                  const int              stride,
212 |                  const REAL_T           dtdx);
213 | 
214 | REAL_T strip_stable(const hydro *restrict h,
215 |                     REAL_T      *restrict rho,
216 |                     REAL_T      *restrict rhou,
217 |                     REAL_T      *restrict rhov,
218 |                     REAL_T      *restrict E,
219 |                     strip_work  *restrict sw,
220 |                     const int             i,
221 |                     const int             stride,
222 |                     const REAL_T          dtdx,
223 |                     const bool            do_courant);
224 | 
225 | // vector core functions
226 | void vconservative_to_primitive(      VREAL_T *restrict prim_rho, VREAL_T *restrict inv_prim_rho,       VREAL_T *restrict prim_u,          VREAL_T *restrict prim_v,          VREAL_T *restrict E_internal,
227 |                                 const VREAL_T           cons_rho,                                 const VREAL_T           cons_rhou, const VREAL_T           cons_rhov, const VREAL_T           cons_E);
228 | VREAL_T vequation_of_state(const VREAL_T rho,
229 |                            const VREAL_T E_internal);
230 | VREAL_T vspeed_of_sound(const VREAL_T inv_rho,
231 |                         const VREAL_T p);
232 | VREAL_T vslope(const VREAL_T nbv_m,      const VREAL_T nbv_0, const VREAL_T nbv_p,
233 |                const VREAL_T slope_type, const VREAL_T inv_slope_type);
234 | void vflux(      VREAL_T *restrict flux_rho,                              VREAL_T *restrict flux_u,         VREAL_T *restrict flux_v,         VREAL_T *restrict flux_p,
235 |            const VREAL_T           rho,      const VREAL_T inv_rho,  const VREAL_T           u,        const VREAL_T           v,        const VREAL_T           p,
236 |            const VREAL_T           sp_m,     const VREAL_T sp_0,     const VREAL_T           sp_p,
237 |            const VREAL_T           alpha_m,  const VREAL_T alpha_0r, const VREAL_T           alpha_0v, const VREAL_T           alpha_p,
238 |            const VREAL_T           c);
239 | void vtrace(      VREAL_T *restrict flux_rho_m,                              VREAL_T *restrict flux_u_m,       VREAL_T *restrict flux_v_m,       VREAL_T *restrict flux_p_m,
240 |                   VREAL_T *restrict flux_rho_p,                              VREAL_T *restrict flux_u_p,       VREAL_T *restrict flux_v_p,       VREAL_T *restrict flux_p_p,
241 |             const VREAL_T           rho,        const VREAL_T inv_rho, const VREAL_T           u,        const VREAL_T           v,        const VREAL_T           p,
242 |             const VREAL_T           drho,       const VREAL_T du,      const VREAL_T           dv,       const VREAL_T           dp,
243 |             const VREAL_T           c,          const VREAL_T inv_c,
244 |             const VREAL_T           dtdx);
245 | void vriemann(      VREAL_T *restrict     gdnv_rho,       VREAL_T *restrict gdnv_u,           VREAL_T *restrict gdnv_v,           VREAL_T *restrict gdnv_p,
246 |               const VREAL_T            in_left_rho, const VREAL_T           in_left_u,  const VREAL_T           in_left_v,  const VREAL_T           in_left_p,
247 |               const VREAL_T           in_right_rho, const VREAL_T           in_right_u, const VREAL_T           in_right_v, const VREAL_T           in_right_p);
248 | void vcmpflx(      VREAL_T *restrict flux_rho,       VREAL_T *restrict flux_rhou,       VREAL_T *restrict flux_rhov,       VREAL_T *restrict flux_E,
249 |              const VREAL_T           gdnv_rho, const VREAL_T           gdnv_u,    const VREAL_T           gdnv_v,    const VREAL_T           gdnv_p);
250 | VREAL_T vupdate(const VREAL_T  in,
251 |                 const VREAL_T  flux_left, const VREAL_T  flux_right,
252 |                 const VREAL_T  dtdx);
253 | void vcourant(      VREAL_T *restrict courantv,
254 |               const VREAL_T           u, const VREAL_T  v,
255 |               const VREAL_T           c,
256 |               const VMASK_T           write_mask);
257 | 
258 | struct vstrip_work
259 | {
260 |     VREAL_T flux      [4][2]; // flux at i-1/2, i+1/2
261 |     VREAL_T left_flux [4][2]; // left_flux at i, i+1
262 |     VREAL_T prim      [5][3]; // prim for i, i+1, i+2
263 | };
264 | 
265 | void vstrip_prime(      vstrip_work  *restrict sw,
266 |                   const REAL_T       *restrict rho,
267 |                   const REAL_T       *restrict rhou,
268 |                   const REAL_T       *restrict rhov,
269 |                   const REAL_T       *restrict E,
270 |                   const hydro        *restrict h,
271 |                   const int                    stride,
272 |                   const VREAL_T                dtdx);
273 | 
274 | VREAL_T vstrip_stable(const hydro       *restrict h,
275 |                             REAL_T      *restrict rho,
276 |                             REAL_T      *restrict rhou,
277 |                             REAL_T      *restrict rhov,
278 |                             REAL_T      *restrict E,
279 |                             vstrip_work *restrict sw,
280 |                       const int                   i,
281 |                       const int                   stride,
282 |                       const VREAL_T               dtdx,
283 |                       const VMASK_T               write_mask,
284 |                       const bool                  do_courant);
285 | 
286 | VREAL_T hstrip_stable(const hydro       *restrict h,
287 |                             REAL_T      *restrict rho,
288 |                             REAL_T      *restrict rhou,
289 |                             REAL_T      *restrict rhov,
290 |                             REAL_T      *restrict E,
291 |                             vstrip_work *restrict sw,
292 |                       const int                   i,
293 |                       const int                   stride,
294 |                       const VREAL_T               dtdx,
295 |                       const VMASK_T               write_mask,
296 |                       const bool                  do_courant);
297 | 
298 | #endif /* __PCL_HYDRO_HPP__ */
299 | 


--------------------------------------------------------------------------------
/examples/hydro2d/test.nml:
--------------------------------------------------------------------------------
 1 | This namelist contains various input parameters for HYDRO runs
 2 | 
 3 | &RUN
 4 | tend=50
 5 | #noutput=10
 6 | nstepmax=100
 7 | dtoutput=2.
 8 | /
 9 | 
10 | &MESH
11 | nx=256
12 | ny=256
13 | nxystep=125
14 | prt=0
15 | dx=0.05
16 | boundary_left=1
17 | boundary_right=1
18 | boundary_down=1
19 | boundary_up=1
20 | testcase=1
21 | /
22 | 
23 | &HYDRO
24 | courant_factor=0.8
25 | niter_riemann=10
26 | /
27 | 


--------------------------------------------------------------------------------
/examples/hydro2d/timeseries.cpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/timeseries.cpp : timeseries read/write code
  2 | 
  3 |    (C) Jason Sewall : Intel -- initial version
  4 | */
  5 | /*
  6 |   This software is governed by the CeCILL license under French law and
  7 |   abiding by the rules of distribution of free software.  You can  use,
  8 |   modify and/ or redistribute the software under the terms of the CeCILL
  9 |   license as circulated by CEA, CNRS and INRIA at the following URL
 10 |   "http://www.cecill.info".
 11 | 
 12 |   As a counterpart to the access to the source code and  rights to copy,
 13 |   modify and redistribute granted by the license, users are provided only
 14 |   with a limited warranty  and the software's author,  the holder of the
 15 |   economic rights,  and the successive licensors  have only  limited
 16 |   liability.
 17 | 
 18 |   In this respect, the user's attention is drawn to the risks associated
 19 |   with loading,  using,  modifying and/or developing or reproducing the
 20 |   software by the user in light of its specific status of free software,
 21 |   that may mean  that it is complicated to manipulate,  and  that  also
 22 |   therefore means  that it is reserved for developers  and  experienced
 23 |   professionals having in-depth computer knowledge. Users are therefore
 24 |   encouraged to load and test the software's suitability as regards their
 25 |   requirements in conditions enabling the security of their systems and/or
 26 |   data to be ensured and,  more generally, to use and operate it in the
 27 |   same conditions as regards security.
 28 | 
 29 |   The fact that you are presently reading this means that you have had
 30 |   knowledge of the CeCILL license and that you accept its terms.
 31 | */
 32 | 
 33 | #include "timeseries.hpp"
 34 | #include <cstring>
 35 | #include <cstdarg>
 36 | #include <cstdlib>
 37 | #include <sys/mman.h>
 38 | #include <algorithm>
 39 | #include <sys/stat.h>
 40 | #include <cerrno>
 41 | #include <cassert>
 42 | 
 43 | #undef EXTEND_ARRAY
 44 | 
 45 | inline void xdie(const char *fmt, ...)
 46 | {
 47 |     va_list val;
 48 |     va_start(val, fmt);
 49 |     vfprintf(stderr, fmt, val);
 50 |     va_end(val);
 51 |     exit(EXIT_FAILURE);
 52 | }
 53 | 
 54 | static void make_path(const char *str)
 55 | {
 56 |     char buff[1024];
 57 |     memset(buff, 0, sizeof(char)*1024);
 58 |     char *current = buff;
 59 |     while(*str)
 60 |     {
 61 |         *current = *str;
 62 |         ++current;
 63 |         if(current - buff >= 1023)
 64 |             xdie("Prefix path is too long (allow 1023, got %d)\n", current-buff);
 65 |         if(*str == '/')
 66 |         {
 67 |             *current = 0;
 68 |             int dirres = mkdir(buff,  S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 69 |             if(dirres == -1 && errno != EEXIST)
 70 |             {
 71 |                 perror("mkdir");
 72 |                 xdie("mkdir failed!");
 73 |             }
 74 |         }
 75 |         ++str;
 76 |     }
 77 | }
 78 | 
 79 | bool timeseries_writer::initialize(const char *in_prefix, size_t mps)
 80 | {
 81 |     max_pack_size = mps;
 82 |     prefix = xstrdup(in_prefix, "prefix");
 83 | 
 84 |     make_path(prefix);
 85 | 
 86 |     char buff[1024];
 87 |     memset(buff, 0, sizeof(char)*1024);
 88 |     xsnprintf(buff, 1023, "%s.idx", prefix);
 89 | 
 90 |     index_file = xfopen_write(buff, "w");
 91 |     if(!index_file)
 92 |         return false;
 93 | 
 94 |     current_pack      = 0;
 95 |     current_pack_name = 0;
 96 |     pack_no           = -1;
 97 | 
 98 |     current_entry_str[0] = 0;
 99 | 
100 |     return true;
101 | }
102 | 
103 | void timeseries_writer::finish()
104 | {
105 |     if(current_pack)
106 |     {
107 |         fprintf(index_file, "%s %zu\n", current_entry_str, current_pack_size);
108 |         fclose(current_pack);
109 |     }
110 |     fclose(index_file);
111 | 
112 |     free(prefix);
113 |     free(current_pack_name);
114 | }
115 | 
116 | static bool flush_frame(timeseries_writer *tw)
117 | {
118 |     if(tw->current_entry_str[0])
119 |     {
120 |         assert(tw->current_pack);
121 |         fprintf(tw->index_file, "%s %zu\n", tw->current_entry_str, tw->current_pack_size);
122 |         fflush(tw->index_file);
123 |         tw->current_entry_str[0] = 0;
124 |         return true;
125 |     }
126 |     return false;
127 | }
128 | 
129 | bool timeseries_writer::comment(const char *str)
130 | {
131 |     flush_frame(this);
132 | 
133 |     if(*str)
134 |         fputs("# ", index_file);
135 | 
136 |     for(; *str; ++str)
137 |     {
138 |         fputc(*str, index_file);
139 |         if(*str == '\n')
140 |         {
141 |             if(*(str + 1))
142 |                 fputs("# ", index_file);
143 |             else
144 |                 return true;
145 |         }
146 |     }
147 |     fputc('\n', index_file);
148 |     fflush(index_file);
149 |     return true;
150 | }
151 | 
152 | static bool check_file(timeseries_writer *tw, size_t size_hint)
153 | {
154 |     flush_frame(tw);
155 | 
156 |     if(!tw->current_pack || (size_hint < tw->max_pack_size && tw->current_pack_size + size_hint > tw->max_pack_size))
157 |     {
158 |         if(tw->current_pack)
159 |         {
160 |             fclose(tw->current_pack);
161 |             free(tw->current_pack_name);
162 |         }
163 | 
164 |         char buff[1024];
165 |         memset(buff, 0, sizeof(char)*1024);
166 |         xsnprintf(buff, 1023, "%s%05d.pak", tw->prefix, ++tw->pack_no);
167 |         tw->current_pack_name = xstrdup(basename(buff), "pack name");
168 |         tw->current_pack      = xfopen_write(buff, "wb");
169 |         if(!tw->current_pack)
170 |             return false;
171 |         tw->current_pack_size = 0;
172 |     }
173 |     return true;
174 | }
175 | 
176 | bool timeseries_writer::new_frame(double t, size_t size_hint)
177 | {
178 |     if(!check_file(this, size_hint))
179 |         return false;
180 | 
181 |     assert(current_entry_str[0] == 0);
182 |     xsnprintf(current_entry_str, 1023, "f %20.14lf %s %zu", t, current_pack_name, current_pack_size);
183 |     return true;
184 | }
185 | 
186 | bool timeseries_writer::new_static(const char *name, size_t size_hint)
187 | {
188 |     if(!check_file(this, size_hint))
189 |         return false;
190 | 
191 |     assert(current_entry_str[0] == 0);
192 |     xsnprintf(current_entry_str, 1023, "s %s %s %zu", name, current_pack_name, current_pack_size);
193 |     return true;
194 | }
195 | 
196 | size_t timeseries_writer::append(const void *data, size_t data_size)
197 | {
198 |     if(current_entry_str[0] == 0)
199 |         return 0;
200 |     size_t wrote = fwrite(data, data_size, 1, current_pack);
201 |     current_pack_size += wrote*data_size;
202 |     return wrote*data_size;
203 | }
204 | 
205 | bool timeseries_reader::load(const char *index_filename)
206 | {
207 |     index_file = xfopen_read(index_filename, "r");
208 |     if(!index_file)
209 |         return false;
210 |     struct stat st;
211 |     int stat_res = fstat(fileno(index_file), &st);
212 |     if(stat_res != 0)
213 |         return false;
214 |     if(!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
215 |         return false;
216 | 
217 |     const char *back = strrchr(index_filename, '/');
218 |     prefix           = back ? xstrndup(index_filename, back-index_filename+1, "prefix") : xstrdup("", "null");
219 | 
220 |     frames          = 0;
221 |     frames_n        = 0;
222 |     frames_n_allocd = 0;
223 | 
224 |     statics          = 0;
225 |     statics_n        = 0;
226 |     statics_n_allocd = 0;
227 | 
228 |     files          = 0;
229 |     files_n        = 0;
230 |     files_n_allocd = 0;
231 | 
232 |     refresh();
233 | 
234 |     return true;
235 | }
236 | 
237 | #define EXTEND_ARRAY(name, num, n_allocd)       \
238 |     if(name##_n + num >= n_allocd)              \
239 |     {                                           \
240 |         n_allocd = (name##_n + num)*2;          \
241 |         void *m  = realloc(name, sizeof(name[0])*n_allocd); \
242 |         name     = (typeof(name)) m;            \
243 |     }
244 | 
245 | static bool read_frame(timeseries_reader *tsr, char *file, size_t *low_offset, size_t *high_offset)
246 | {
247 |     EXTEND_ARRAY(tsr->frames, 1, tsr->frames_n_allocd);
248 |     int num_read = fscanf(tsr->index_file, "%lf %1023s %zu %zu", &tsr->frames[tsr->frames_n].t, file, &tsr->frames[tsr->frames_n].start_offset, &tsr->frames[tsr->frames_n].end_offset);
249 |     if(num_read == 4)
250 |     {
251 |         *low_offset   = tsr->frames[tsr->frames_n].start_offset;
252 |         *high_offset  = tsr->frames[tsr->frames_n].end_offset;
253 |         return true;
254 |     }
255 | 
256 |     return false;
257 | }
258 | 
259 | static bool read_static(timeseries_reader *tsr, char *file, size_t *low_offset, size_t *high_offset)
260 | {
261 |     EXTEND_ARRAY(tsr->statics, 1, tsr->statics_n_allocd);
262 |     char buff[1024];
263 |     memset(buff, 0, sizeof(char)*1024);
264 |     int num_read = fscanf(tsr->index_file, "%1023s %1023s %zu %zu", buff, file, &tsr->statics[tsr->statics_n].start_offset, &tsr->statics[tsr->statics_n].end_offset);
265 |     if(num_read == 4)
266 |     {
267 |         tsr->statics[tsr->statics_n].name = xstrdup(buff, "static name");
268 |         *low_offset                       = tsr->statics[tsr->statics_n].start_offset;
269 |         *high_offset                      = tsr->statics[tsr->statics_n].end_offset;
270 |         return true;
271 |     }
272 | 
273 |     return false;
274 | }
275 | 
276 | int timeseries_reader::refresh()
277 | {
278 |     char buff[1024];
279 |     memset(buff, 0, sizeof(char)*1024);
280 |     int  nread     = 0;
281 |     int  back_file = files_n;
282 |     while(!feof(index_file))
283 |     {
284 |         off64_t last_offs = ftello64(index_file);
285 | 
286 |         size_t  low_offset = 0;
287 |         size_t  high_offset = 0;
288 |         static volatile int    *file_no;
289 |         char    current = fgetc(index_file);
290 |         bool    reset = false;
291 |         switch(current)
292 |         {
293 |         case '#':
294 |             while(!feof(index_file) && current != '\n')
295 |                 current = fgetc(index_file);
296 |             if(current == '\n')
297 |                 continue;
298 |             reset = true;
299 |             break;
300 |         case 'f':
301 |             {
302 |                 bool okay = read_frame(this, buff, &low_offset, &high_offset);
303 |                 if(okay)
304 |                 {
305 |                     file_no = &(frames[frames_n].file_no);
306 |                     ++frames_n;
307 |                     ++nread;
308 |                 }
309 |                 reset = !okay;
310 |             }
311 |             break;
312 |         case 's':
313 |             {
314 |                 bool okay = read_static(this, buff, &low_offset, &high_offset);
315 |                 if(okay)
316 |                 {
317 |                     file_no = &(statics[statics_n].file_no);
318 |                     ++statics_n;
319 |                     ++nread;
320 |                 }
321 |                 reset = !okay;
322 |             }
323 |             break;
324 |         }
325 | 
326 |         if(reset)
327 |         {
328 |             fseeko64(index_file, last_offs, SEEK_SET);
329 |             break;
330 |         }
331 | 
332 |         if(!files_n || strcmp(files[files_n-1].name, buff) != 0)
333 |         {
334 |             EXTEND_ARRAY(files, 1, files_n_allocd);
335 |             files[files_n].name           = xstrdup(buff, "file name");
336 |             files[files_n].fp             = 0;
337 |             files[files_n].map_bytes      = 0;
338 |             files[files_n].map_base       = 0;
339 |             files[files_n].lowest_offset  = low_offset;
340 |             files[files_n].highest_offset = high_offset;
341 |             ++files_n;
342 |         }
343 |         else
344 |         {
345 |             files[files_n-1].lowest_offset  = std::min(files[files_n-1].lowest_offset,  low_offset);
346 |             files[files_n-1].highest_offset = std::max(files[files_n-1].highest_offset, high_offset);
347 |         }
348 |         // currently, we assume that files appear in strictly increasing order in the index file
349 |         *file_no = files_n-1;
350 |     }
351 | 
352 |     if(back_file)
353 |     {
354 |         if(files[back_file-1].map_bytes != files[back_file-1].highest_offset)
355 |         {
356 |             munmap(files[back_file-1].map_base, files[back_file-1].map_bytes);
357 |             void *new_map = mmap(files[back_file-1].map_base, files[back_file-1].highest_offset,  PROT_READ, MAP_PRIVATE, fileno(files[back_file-1].fp), 0);
358 |             if(new_map == (void*)-1)
359 |             {
360 |                 perror("mmap");
361 |                 xdie("Couldn't mmap file: %s\n", files[back_file-1].name);
362 |             }
363 |             files[back_file-1].map_bytes = files[back_file-1].highest_offset;
364 |         }
365 |     }
366 | 
367 |     for(int fi = back_file; fi < files_n; ++fi)
368 |     {
369 |         char buff[1024];
370 |         xsnprintf(buff, 1023, "%s%s", prefix, files[fi].name);
371 |         files[fi].fp        = xfopen_read(buff, "r");
372 |         if(!files[fi].fp)
373 |         {
374 |             perror("fopen");
375 |             xdie("Couldn't open file: %s\n", buff);
376 |         }
377 | 
378 |         files[fi].map_bytes = files[fi].highest_offset;
379 |         files[fi].map_base  = mmap(0, files[fi].map_bytes, PROT_READ, MAP_PRIVATE, fileno(files[fi].fp), 0);
380 |         if(files[fi].map_base == (void*)-1)
381 |         {
382 |             perror("mmap");
383 |             xdie("Couldn't mmap file: %s\n", buff);
384 |         }
385 |     }
386 | 
387 |     return nread;
388 | }
389 | 
390 | const void *timeseries_reader::get_frame(int frameno, double *t, size_t *size) const
391 | {
392 |     const frame_entry *fr     = frames + frameno;
393 |     const file_entry  *fi     = files + fr->file_no;
394 |     *t                        = fr->t;
395 |     *size                     = fr->end_offset - fr->start_offset;
396 |     return (const char*)fi->map_base + fr->start_offset;
397 | }
398 | 
399 | const void *timeseries_reader::get_static(int staticno, const char **name, size_t *size) const
400 | {
401 |     const static_entry *st = statics + staticno;
402 |     const file_entry   *fi = files + st->file_no;
403 |     *name                  = st->name;
404 |     *size                  = st->end_offset - st->start_offset;
405 |     return (const char*)fi->map_base + st->start_offset;
406 | }
407 | 
408 | const void *timeseries_reader::get_static(const char *name, size_t *size) const
409 | {
410 |     const char *outname;
411 |     for(int i = 0; i < statics_n; ++i)
412 |         if(strcmp(statics[i].name, name) == 0)
413 |             return get_static(i, &outname, size);
414 | 
415 |     *size = 0;
416 |     return 0;
417 | }
418 | 


--------------------------------------------------------------------------------
/examples/hydro2d/timeseries.hpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/timeseries.hpp : timeseries read/write code
  2 | 
  3 |    (C) Jason Sewall : Intel -- initial version
  4 | */
  5 | /*
  6 |   This software is governed by the CeCILL license under French law and
  7 |   abiding by the rules of distribution of free software.  You can  use,
  8 |   modify and/ or redistribute the software under the terms of the CeCILL
  9 |   license as circulated by CEA, CNRS and INRIA at the following URL
 10 |   "http://www.cecill.info".
 11 | 
 12 |   As a counterpart to the access to the source code and  rights to copy,
 13 |   modify and redistribute granted by the license, users are provided only
 14 |   with a limited warranty  and the software's author,  the holder of the
 15 |   economic rights,  and the successive licensors  have only  limited
 16 |   liability.
 17 | 
 18 |   In this respect, the user's attention is drawn to the risks associated
 19 |   with loading,  using,  modifying and/or developing or reproducing the
 20 |   software by the user in light of its specific status of free software,
 21 |   that may mean  that it is complicated to manipulate,  and  that  also
 22 |   therefore means  that it is reserved for developers  and  experienced
 23 |   professionals having in-depth computer knowledge. Users are therefore
 24 |   encouraged to load and test the software's suitability as regards their
 25 |   requirements in conditions enabling the security of their systems and/or
 26 |   data to be ensured and,  more generally, to use and operate it in the
 27 |   same conditions as regards security.
 28 | 
 29 |   The fact that you are presently reading this means that you have had
 30 |   knowledge of the CeCILL license and that you accept its terms.
 31 | */
 32 | 
 33 | #ifndef __TIMESERIES_HPP__
 34 | #define __TIMESERIES_HPP__
 35 | 
 36 | #include "array-macros.hpp"
 37 | 
 38 | struct timeseries_writer
 39 | {
 40 |     bool   initialize(const char *in_prefix, size_t max_pack_size);
 41 |     void   finish();
 42 |     size_t append(const void *data, size_t data_size);
 43 |     bool   comment(const char *str);
 44 |     bool   new_frame(double t, size_t size_hint);
 45 |     bool   new_static(const char *name, size_t size_hint);
 46 | 
 47 |     char   *prefix;
 48 |     size_t  max_pack_size;
 49 | 
 50 |     FILE *index_file;
 51 | 
 52 |     FILE   *current_pack;
 53 |     char   *current_pack_name;
 54 |     size_t  current_pack_size;
 55 |     int     pack_no;
 56 | 
 57 |     char current_entry_str[1024];
 58 | };
 59 | 
 60 | struct file_entry
 61 | {
 62 |     char   *name;
 63 |     FILE   *fp;
 64 |     size_t  map_bytes;
 65 |     void   *map_base;
 66 |     size_t  lowest_offset;
 67 |     size_t  highest_offset;
 68 | };
 69 | 
 70 | struct frame_entry
 71 | {
 72 |     double  t;
 73 |     size_t start_offset;
 74 |     size_t end_offset;
 75 |     int    file_no;
 76 | };
 77 | 
 78 | struct static_entry
 79 | {
 80 |     char   *name;
 81 |     size_t  start_offset;
 82 |     size_t  end_offset;
 83 |     int     file_no;
 84 | };
 85 | 
 86 | struct timeseries_reader
 87 | {
 88 |     bool load(const char *index_file);
 89 |     int refresh();
 90 | 
 91 |     const void *get_frame(int frameno, double *t, size_t *size) const;
 92 |     const void *get_static(int staticno, const char **name, size_t *size) const;
 93 |     const void *get_static(const char *name, size_t *size) const;
 94 | 
 95 |     char         *prefix;
 96 |     FILE         *index_file;
 97 |     frame_entry  *frames;
 98 |     int           frames_n;
 99 |     int           frames_n_allocd;
100 | 
101 |     static_entry *statics;
102 |     int           statics_n;
103 |     int           statics_n_allocd;
104 |     file_entry   *files;
105 |     int           files_n;
106 |     int           files_n_allocd;
107 | };
108 | #endif
109 | 


--------------------------------------------------------------------------------
/examples/hydro2d/vtkfile.cpp:
--------------------------------------------------------------------------------
  1 | /* examples/hydro2d/vtkfile.cpp : vtk output
  2 | 
  3 |    (C) Romain Teyssier : CEA/IRFU           -- original F90 code
  4 |    (C) Pierre-Francois Lavallee : IDRIS      -- original F90 code
  5 |    (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version
  6 |    (C) Jason Sewall : Intel -- 'pcl-hydro' optimized for modern x86
  7 |    (C) John Pennycook : Intel -- augmentations to above version
  8 | */
  9 | /*
 10 |   This software is governed by the CeCILL license under French law and
 11 |   abiding by the rules of distribution of free software.  You can  use,
 12 |   modify and/ or redistribute the software under the terms of the CeCILL
 13 |   license as circulated by CEA, CNRS and INRIA at the following URL
 14 |   "http://www.cecill.info".
 15 | 
 16 |   As a counterpart to the access to the source code and  rights to copy,
 17 |   modify and redistribute granted by the license, users are provided only
 18 |   with a limited warranty  and the software's author,  the holder of the
 19 |   economic rights,  and the successive licensors  have only  limited
 20 |   liability.
 21 | 
 22 |   In this respect, the user's attention is drawn to the risks associated
 23 |   with loading,  using,  modifying and/or developing or reproducing the
 24 |   software by the user in light of its specific status of free software,
 25 |   that may mean  that it is complicated to manipulate,  and  that  also
 26 |   therefore means  that it is reserved for developers  and  experienced
 27 |   professionals having in-depth computer knowledge. Users are therefore
 28 |   encouraged to load and test the software's suitability as regards their
 29 |   requirements in conditions enabling the security of their systems and/or
 30 |   data to be ensured and,  more generally, to use and operate it in the
 31 |   same conditions as regards security.
 32 | 
 33 |   The fact that you are presently reading this means that you have had
 34 |   knowledge of the CeCILL license and that you accept its terms.
 35 | */
 36 | #include <cstdlib>
 37 | #include <cstdio>
 38 | #include <cstring>
 39 | #include <cassert>
 40 | #include <sys/stat.h>
 41 | #include "arch.hpp"
 42 | #include "array-macros.hpp"
 43 | 
 44 | typedef unsigned char byte;
 45 | 
 46 | static const char s_CharPlusSign = '+';
 47 | static const char s_CharSlash    = '/';
 48 | 
 49 | static char SixBitToChar(byte b);
 50 | static char *ToBase64(unsigned char *data, int length);
 51 | 
 52 | static char SixBitToChar(byte b) {
 53 |     char c;
 54 |     if (b < 26) {
 55 |         c = (char) ((int) b + (int) 'A');
 56 |     } else if (b < 52) {
 57 |         c = (char) ((int) b - 26 + (int) 'a');
 58 |     } else if (b < 62) {
 59 |         c = (char) ((int) b - 52 + (int) '0');
 60 |     } else if (b == 62) {
 61 |         c = s_CharPlusSign;
 62 |     } else {
 63 |         c = s_CharSlash;
 64 |     }
 65 |     return c;
 66 | }
 67 | 
 68 | static char *ToBase64(unsigned char *data, int length) {
 69 |     int padding = length % 3;
 70 |     int blocks = (length - 1) / 3 + 1;
 71 |     size_t lalloc;
 72 |     char *s;
 73 |     int i;
 74 | 
 75 |     if (length == 0)
 76 |         return NULL;
 77 | 
 78 |     if (padding > 0)
 79 |         padding = 3 - padding;
 80 | 
 81 |     // lalloc = (blocks * 4 + 1 + 16);
 82 |     lalloc = blocks;
 83 |     lalloc *= 4;
 84 |     lalloc += 17;
 85 | 
 86 |     s = (char*)malloc(lalloc);
 87 |     if (s == NULL) {
 88 |         fprintf(stderr, "Length=%d, blocks=%d lalloc=%ld\n", length, blocks, lalloc);
 89 |         exit(1);
 90 |     }
 91 | 
 92 |     for (i = 0; i < blocks; i++) {
 93 |         bool finalBlock = i == blocks - 1;
 94 |         bool pad2 = false;
 95 |         bool pad1 = false;
 96 |         if (finalBlock) {
 97 |             pad2 = padding == 2;
 98 |             pad1 = padding > 0;
 99 |         }
100 | 
101 |         int index = i * 3;
102 |         byte b1 = data[index];
103 |         byte b2 = pad2 ? (byte) 0 : data[index + 1];
104 |         byte b3 = pad1 ? (byte) 0 : data[index + 2];
105 | 
106 |         byte temp1 = (byte) ((b1 & 0xFC) >> 2);
107 | 
108 |         byte temp = (byte) ((b1 & 0x03) << 4);
109 |         byte temp2 = (byte) ((b2 & 0xF0) >> 4);
110 |         temp2 += temp;
111 | 
112 |         temp = (byte) ((b2 & 0x0F) << 2);
113 |         byte temp3 = (byte) ((b3 & 0xC0) >> 6);
114 |         temp3 += temp;
115 | 
116 |         byte temp4 = (byte) (b3 & 0x3F);
117 | 
118 |         index = i * 4;
119 |         s[index] = SixBitToChar(temp1);
120 |         s[index + 1] = SixBitToChar(temp2);
121 |         s[index + 2] = pad2 ? '=' : SixBitToChar(temp3);
122 |         s[index + 3] = pad1 ? '=' : SixBitToChar(temp4);
123 |     }
124 |     s[blocks * 4] = (byte) 0;
125 |     return s;
126 | }
127 | 
128 | #define BINARY 1
129 | #undef MPI
130 | static void vtkwpvd(int nout, char *r) {
131 |     char n[1024];
132 |     char vfname[1024];
133 |     int i;
134 |     FILE *vf = NULL;
135 |     char tmp[10];
136 | 
137 |     vf = xfopen_write("Hydro.pvd", "w");
138 |     if(vf == NULL)
139 |     {
140 |         fprintf(stderr, "Can't write to Hydro.pvd\n");
141 |         exit(1);
142 |     }
143 | 
144 |     fprintf(vf, "<?xml version=\"1.0\"?>\n");
145 |     fprintf(vf, " <VTKFile type=\"Collection\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
146 |     fprintf(vf, "  <Collection>\n");
147 | 
148 |     for (i = 1; i <= nout; i++) {
149 |         xsnprintf(tmp, 9, "%06d", i);
150 |         xsnprintf(n, 1023, "Dep/%c%c%c%c", tmp[0], tmp[1], tmp[2], tmp[3]);
151 |         xsnprintf(n, 1023, "%s/%c%c", n, tmp[4], tmp[5]);
152 |         xsnprintf(vfname, 1023, "%s/Hydro_%04d.pvtr", n, i);
153 |         fprintf(vf, "  <DataSet timestep=\"%d\" part=\"0\" file=\"%s\"  name=\"Asmb:FRAME\"/>\n", i, vfname);
154 |     }
155 | 
156 |     fprintf(vf, " </Collection>\n");
157 |     fprintf(vf, "</VTKFile>\n");
158 |     fclose(vf);
159 | }
160 | 
161 | static void vtknm(char *n, size_t len, int me, int nout) {
162 |     char tmp[10];
163 | 
164 |     xsnprintf(tmp, 9, "%06d", nout);
165 |     xsnprintf(n, len, "Dep");
166 |     if (me == 0) {
167 |         mkdir(n, 0777);
168 |     }
169 |     xsnprintf(n, len, "%s/%c%c%c%c", n, tmp[0], tmp[1], tmp[2], tmp[3]);
170 |     if (me == 0) {
171 |         mkdir(n, 0777);
172 |     }
173 |     xsnprintf(n, len, "%s/%c%c", n, tmp[4], tmp[5]);
174 | 
175 |     if (me == 0) {
176 |         mkdir(n, 0777);
177 |     }
178 | }
179 | 
180 | void vtkfile(int step, const REAL_T *q, const int n[2], const int padding, const int ystride, const int varstride, const double dx) {
181 |     char name[1024];
182 |     char vfrname[1024];
183 |     FILE *fic, *vf;
184 |     int i, j, nv;
185 | 
186 |     enum {ID = 0, IU = 1, IV = 2, IP = 3};
187 | 
188 |     // First step : create the directory structure ONLY using PE0
189 | #ifdef MPI
190 |     if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD);
191 | #endif
192 |     vtknm(vfrname, 1023, 0, step); // create the directory structure
193 |     // if (0 == 0) fprintf(stderr, "%s\n", vfrname);
194 | #ifdef MPI
195 |     if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD);
196 | #endif
197 | 
198 |     // Write a domain per PE
199 |     xsnprintf(name, 1023, "%s/Hydro_%05d_%04d.vtr", vfrname, 0, step);
200 |     fic = xfopen_write(name, "w");
201 |     if (fic == NULL) {
202 |         fprintf(stderr, "Ouverture du fichier %s impossible\n", name);
203 |         exit(1);
204 |     }
205 |     fprintf(fic, "<?xml version=\"1.0\"?>\n");
206 |     fprintf(fic, "<VTKFile type=\"RectilinearGrid\" byte_order=\"LittleEndian\">\n");
207 |     fprintf(fic, " <RectilinearGrid WholeExtent=\" %d %d %d %d %d %d\">\n",
208 |             0, n[0], 0, n[1], 0, 1);
209 |     fprintf(fic, "  <Piece Extent=\" %d %d %d %d %d %d\" GhostLevel=\"0\">\n",
210 |             0, n[0], 0, n[1], 0, 1);
211 |     fprintf(fic, "   <Coordinates>\n");
212 | 
213 |     fprintf(fic, "    <DataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\">\n");
214 |     for (i = 0; i <= n[0]; i++) {
215 |         fprintf(fic, "%f ", i * dx);
216 |     }
217 |     fprintf(fic, "\n");
218 |     fprintf(fic, "    </DataArray>\n");
219 |     fprintf(fic, "    <DataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\">\n");
220 |     for (j = 0; j <= n[1]; j++) {
221 |         fprintf(fic, "%f ", j * dx);
222 |     }
223 |     fprintf(fic, "\n");
224 |     fprintf(fic, "    </DataArray>\n");
225 |     fprintf(fic, "    <DataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\">\n");
226 |     fprintf(fic, "%f %f\n", 0., 1. * dx);
227 |     fprintf(fic, "    </DataArray>\n");
228 |     fprintf(fic, "   </Coordinates>\n");
229 |     name[0] = 0;
230 |     for (nv = 0; nv <= IP; nv++) {
231 |         if (nv == ID)
232 |             snprintf(name, 1023, "%s varID", name);
233 |         if (nv == IU)
234 |             snprintf(name, 1023, "%s varIU", name);
235 |         if (nv == IV)
236 |             snprintf(name, 1023, "%s varIV", name);
237 |         if (nv == IP)
238 |             snprintf(name, 1023, "%s varIP", name);
239 |     }
240 | 
241 |     // declaration of the variable list
242 |     fprintf(fic, "   <CellData Scalars=\"%s\">\n", name);
243 |     name[0] = 0;
244 |     for (nv = 0; nv <= IP; nv++) {
245 |         if (nv == ID)
246 |             snprintf(name, 1023, "varID");
247 |         if (nv == IU)
248 |             snprintf(name, 1023, "varIU");
249 |         if (nv == IV)
250 |             snprintf(name, 1023, "varIV");
251 |         if (nv == IP)
252 |             snprintf(name, 1023, "varIP");
253 | 
254 |         //Definition of the cell values
255 | #if BINARY == 1
256 |         fprintf(fic,
257 |                 "    <DataArray Name=\"%s\" type=\"Float32\" format=\"binary\" encoding=\"base64\" NumberOfComponents=\"1\">\n",
258 |                 name);
259 |         {
260 |             // float tuold[h->net_n[0] * h->net_n[1]];
261 |             float *tuold = NULL;
262 |             char *r64;
263 |             size_t p = 0, lst;
264 | 
265 |             assert((n[0] * n[1]) > 0);
266 |             tuold = (float *) calloc(n[0] * n[1] + 16, sizeof(float));
267 |             assert(tuold != NULL);
268 | 
269 |             for (j = 0; j < n[1]; j++) {
270 |                 for (i = 0; i < n[0]; i++) {
271 |                     tuold[p++] = (float) q[nv * varstride + (j + padding) * ystride + i + padding];
272 |                 }
273 |             }
274 |             // Header = size of the following items
275 |             assert(p <= n[0] * n[1]);
276 | 
277 |             p *= sizeof(float);
278 |             r64 = ToBase64((byte *) & p, sizeof(int));
279 |             lst = strlen(r64);
280 |             fwrite(r64, 1, lst, fic);
281 |             free(r64);
282 |             r64 = ToBase64((byte *) tuold, p);
283 |             lst = strlen(r64);
284 |             fwrite(r64, 1, lst, fic);
285 |             free(r64);
286 |             free(tuold);
287 |         }
288 | #else
289 |         fprintf(fic, "    <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\" NumberOfComponents=\"1\">\n", name);
290 | 
291 |         // the image is the interior of the computed domain
292 |             for (j = 0; j < n[1]; j++) {
293 |                 for (i = 0; i < n[0]; i++) {
294 |                     fprintf(fic, "%lf ", q[nv * (nt[0]*nt[1]) + (j + padding) * nt[0] + i + padding]);
295 |                 }
296 |                 fprintf(fic, "\n");
297 |             }
298 | #endif
299 |         fprintf(fic, "    </DataArray>\n");
300 |     }
301 |     fprintf(fic, "   </CellData>\n");
302 |     fprintf(fic, "  </Piece>\n");
303 |     fprintf(fic, " </RectilinearGrid>\n");
304 |     fprintf(fic, "</VTKFile>\n");
305 |     fclose(fic);
306 | 
307 |     // At this stage we can write VTK containers. Since only one file is
308 |     // necessary even for multiple domains, it has to be written by one
309 |     // PE only.
310 | 
311 | #ifdef MPI
312 |     if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD);
313 | #endif
314 |     if (0 == 0) {
315 |         xsnprintf(name, 1023, "outputvtk_%05d.pvtr", step);
316 |         xsnprintf(name, 1023, "%s/Hydro_%04d.pvtr", vfrname, step);
317 |         vf = xfopen_write(name, "w");
318 |         if (vf == NULL) {
319 |             fprintf(stderr, "Ouverture du fichier %s impossible\n", name);
320 |             exit(1);
321 |         }
322 |         fprintf(vf, "<?xml version=\"1.0\"?>\n");
323 |         fprintf(vf, "<VTKFile type=\"PRectilinearGrid\" byte_order=\"LittleEndian\">\n");
324 |         fprintf(vf, "<PRectilinearGrid WholeExtent=\"0 %d 0 %d 0 %d\"  GhostLevel=\"0\" >\n", n[0], n[1], 1);
325 |         fprintf(vf, " <PCellData>\n");
326 |         for (nv = 0; nv <= IP; nv++) {
327 |             name[0] = '\0';
328 |             if (nv == ID)
329 |                 xsnprintf(name, 1023, "varID");
330 |             if (nv == IU)
331 |                 xsnprintf(name, 1023, "varIU");
332 |             if (nv == IV)
333 |                 xsnprintf(name, 1023, "varIV");
334 |             if (nv == IP)
335 |                 xsnprintf(name, 1023, "varIP");
336 | 
337 |             #if BINARY == 1
338 |             fprintf(vf,
339 |                     "  <PDataArray Name=\"%s\" type=\"Float32\" format=\"binary\" encoding=\"base64\" NumberOfComponents=\"1\"/>\n",
340 |                     name);
341 | #else
342 |             fprintf(vf, "  <PDataArray Name=\"%s\" type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\"/>\n", name);
343 | #endif
344 |         }
345 |         fprintf(vf, " </PCellData>\n");
346 |         fprintf(vf, " <PCoordinates>\n");
347 |         fprintf(vf, "  <PDataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\"/>\n");
348 |         fprintf(vf, "  <PDataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\"/>\n");
349 |         fprintf(vf, "  <PDataArray type=\"Float32\" format=\"ascii\" NumberOfComponents=\"1\"/>\n");
350 |         fprintf(vf, " </PCoordinates>\n");
351 |         for (i = 0; i < 1; i++) {
352 |             // int box[8];
353 |             // memset(box, 0, 8 * sizeof(int));
354 |             //            CalcSubSurface(0, H.n[0], 0, H.n[1], 0, H.nproc - 1, 0, box, i, 0);
355 |             xsnprintf(name, 1023, "Hydro_%05d_%04d.vtr", i, step);
356 |             // fprintf(vf, " <Piece Extent=\"%d %d %d %d %d %d\" Source=\"%s\"/>\n", box[XMIN_BOX],
357 |             //         box[XMAX_BOX], box[YMIN_BOX], box[YMAX_BOX], 0, 1, name);
358 |             fprintf(vf, " <Piece Extent=\"%d %d %d %d %d %d\" Source=\"%s\"/>\n", 0, n[0], 0, n[1], 0, 1, name);
359 | 
360 |         }
361 |         fprintf(vf, "</PRectilinearGrid>\n");
362 |         fprintf(vf, "</VTKFile>\n");
363 |         fclose(vf);
364 | 
365 |         // We make the time step available only now to ensure consistency
366 |         vtkwpvd(step, "Dep");
367 |     }
368 | }
369 | 


--------------------------------------------------------------------------------
/examples/laplace5/.gitignore:
--------------------------------------------------------------------------------
1 | generated
2 | reference
3 | laplace-gen.hpp
4 | 


--------------------------------------------------------------------------------
/examples/laplace5/Makefile:
--------------------------------------------------------------------------------
 1 | HFAV_DIR=../../
 2 | HFAVROOT?=$(HFAV_DIR)/hfav
 3 | 
 4 | HFAV=$(HFAV_DIR)/hfav.py
 5 | 
 6 | all: reference generated
 7 | 
 8 | reference: laplace5-test.cpp
 9 | 	icpc -o reference laplace5-test.cpp -fopenmp -restrict -std=c++11 -xHost
10 | 
11 | laplace5-gen.hpp: $(HFAV) laplace5.yaml
12 | 	$(HFAV) laplace5.yaml
13 | 
14 | generated: laplace5-test.cpp laplace5-gen.hpp
15 | 	icpc -o generated laplace5-test.cpp -fopenmp -restrict -DUSE_GEN -std=c++11 -xHost -I$(HFAVROOT)/include
16 | 
17 | clean:
18 | 	rm -rf generated reference laplace5-gen.hpp
19 | 


--------------------------------------------------------------------------------
/examples/laplace5/laplace5-test.cpp:
--------------------------------------------------------------------------------
  1 | // examples/laplace5-test/laplace5-test.cpp; 5-point laplace stencil codegen example
  2 | 
  3 | // Copyright 2017 Intel Corporation
  4 | //
  5 | // GENERATED CODE EXEMPTION
  6 | //
  7 | // The output of this tool does not automatically import the Apache
  8 | // 2.0 license, except the output will continue to be subject to the
  9 | // limitation of liability clause in the Apache 2.0 license. Users may
 10 | // license their output under any license they choose but the liability
 11 | // of the authors of the tool for that output is governed by the
 12 | // limitation of liability clause in the Apache 2.0 license.
 13 | //
 14 | // Licensed under the Apache License, Version 2.0 (the "License");
 15 | // you may not use this file except in compliance with the License.
 16 | // You may obtain a copy of the License at
 17 | //
 18 | //     http://www.apache.org/licenses/LICENSE-2.0
 19 | //
 20 | // Unless required by applicable law or agreed to in writing, software
 21 | // distributed under the License is distributed on an "AS IS" BASIS,
 22 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | // See the License for the specific language governing permissions and
 24 | // limitations under the License.
 25 | 
 26 | #include <cstdio>
 27 | #include <cstring>
 28 | #include <cstdlib>
 29 | #include <algorithm>
 30 | #include <cmath>
 31 | #include <cstdint>
 32 | #include <omp.h>
 33 | 
 34 | static int GD;
 35 | 
 36 | static double h;
 37 | static double h_inv;
 38 | 
 39 | static void laplace5_resid(double rhs, double n, double ne, double e, double se, double s, double sw, double w, double nw, double self, double* out)
 40 | {
 41 |     *out = rhs - h_inv*h_inv*(n + e + s + w - 4*self);
 42 | }
 43 | 
 44 | static double L2norm(const int GD, const double in[restrict][GD+2])
 45 | {
 46 |     double res = 0.0;
 47 |     for(int j = 1; j < GD+1; ++j)
 48 |     {
 49 |         for(int i = 1; i < GD+1; ++i)
 50 |         {
 51 |             double nv;
 52 |             laplace5_resid(0.0, in[j-1][i], in[j-1][i+1], in[j][i+1], in[j+1][i+1], in[j+1][i], in[j+1][i-1], in[j][i-1], in[j-1][i-1], in[j][i], &nv);
 53 | 
 54 |             res += nv*nv;
 55 |         }
 56 |     }
 57 |     return std::sqrt(res);
 58 | }
 59 | 
 60 | static double omega = 2.0/3.0;
 61 | 
 62 | static void laplace5(double n, double e, double s, double w, double self, double* out)
 63 | {
 64 |     *out =  (1.0 - omega) * self + omega*h*h/4.0*(0.0 - h_inv*h_inv*(n + e + s + w));
 65 | }
 66 | 
 67 | #ifdef USE_GEN
 68 | #define VLEN 4
 69 | #include "hfav/c99-rotate.h"
 70 | #include "laplace5-gen.hpp"
 71 | #endif
 72 | 
 73 | static void test_compute(const int GD, const double in[restrict][GD+2], double out[restrict][GD+2])
 74 | {
 75 |     for(int j = 1; j < GD+1; ++j)
 76 |     {
 77 |         for(int i = 1; i < GD+1; ++i)
 78 |         {
 79 |             laplace5(in[j-1][i], in[j][i+1], in[j+1][i], in[j][i-1], in[j][i], &out[j][i]);
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | int main(int argc, char **argv)
 85 | {
 86 |     if (argc != 3)
 87 |     {
 88 |         printf("Usage: %s [# iterations] [size]\n", argv[0]);
 89 |         exit(EXIT_FAILURE);
 90 |     }
 91 | 
 92 |     const int iterations = atoll(argv[1]);
 93 |     GD                   = atoll(argv[2]);
 94 | 
 95 |     h = 1.0/GD;
 96 |     h_inv = 1.0/h;
 97 | 
 98 |     // pad by 1 on each side so we don't have to branch in operatorr
 99 |     double *in  = (double*) calloc((GD+2) * (GD+2), sizeof(double));
100 |     double *out = (double*) calloc((GD+2) * (GD+2), sizeof(double));
101 | 
102 |     typedef double grid[GD+2][GD+2];
103 | 
104 |     srand(12345);
105 |     for(int j = 1; j < GD+1; ++j)
106 |     {
107 |         for(int i = 1; i < GD+1; ++i)
108 |         {
109 |             in[j*(GD+2) + i] = drand48()*h_inv*h_inv;
110 |         }
111 |     }
112 | 
113 |     for(int j = 0; j < GD+2; ++j)
114 |     {
115 |         in[j*(GD+2) + 0] = 1.0*h*h;
116 |         out[j*(GD+2) + 0] = 1.0*h*h;
117 |         in[j*(GD+2) + GD+1] = 1.0*h*h;
118 |         out[j*(GD+2) + GD+1] = 1.0*h*h;
119 |     }
120 |     for(int i = 0; i < GD+2; ++i)
121 |     {
122 |         in [0*(GD+2) + i] = -1.0*h*h;
123 |         out[0*(GD+2) + i] = -1.0*h*h;
124 |         in [(GD+1)*(GD+2) + i] = -1.0*h*h;
125 |         out[(GD+1)*(GD+2) + i] = -1.0*h*h;
126 |     }
127 |     in [0*(GD+2) + 0] = 0.0;
128 |     out[0*(GD+2) + 0] = 0.0;
129 |     in [0*(GD+2) + (GD+1)] = 0.0;
130 |     out[0*(GD+2) + (GD+1)] = 0.0;
131 | 
132 |     in [(GD+1)*(GD+2) + 0] = 0.0;
133 |     out[(GD+1)*(GD+2) + 0] = 0.0;
134 |     in [(GD+1)*(GD+2) + (GD+1)] = 0.0;
135 |     out[(GD+1)*(GD+2) + (GD+1)] = 0.0;
136 | 
137 |     printf("Initial: %30.20le\n", L2norm(GD,  (double (*)[GD+2])  in));
138 | 
139 |     double start = omp_get_wtime();
140 |     uint64_t start_c = _rdtsc();
141 |     for(int t = 0; t < iterations; ++t)
142 |     {
143 | #ifdef USE_GEN
144 |         inplace_laplace(GD, (double (*)[GD+2]) in, 1, GD+1, 1, GD+1);
145 | #else
146 |         test_compute(GD, (double (*)[GD+2]) in, (double (*)[GD+2]) out);
147 |         std::swap(in, out);
148 | #endif
149 |     }
150 | 
151 |     double end = omp_get_wtime();
152 |     uint64_t end_c = _rdtsc();
153 |     printf("Took %le seconds\n", end-start);
154 |     printf("Took %le cycles\n", (double)(end_c-start_c));
155 | 
156 |     const uint64_t total_cyc = end_c - start_c;
157 |     printf("Took %le cycles/iter\n", (double)total_cyc/iterations);
158 |     printf("Took %le cycles/iter/cell\n", (double)total_cyc/iterations/(GD*GD));
159 | 
160 |     printf("Final %30.20le\n", L2norm(GD,  (double (*)[GD+2]) in));
161 | }
162 | 


--------------------------------------------------------------------------------
/examples/laplace5/laplace5.yaml:
--------------------------------------------------------------------------------
 1 | # Example: laplace5.yaml
 2 | # Demonstrates integration of generated laplace5 kernel into application code.
 3 | 
 4 | kernels:
 5 | 
 6 |     laplace:
 7 |         declaration: laplace5(double n, double e, double s, double w, double self, double &out);
 8 |         inputs: |
 9 |             n    : q?[j?-1][i?]
10 |             e    : q?[j?][i?+1]
11 |             s    : q?[j?+1][i?]
12 |             w    : q?[j?][i?-1]
13 |             self : q?[j?][i?]
14 |         outputs: |
15 |             out : laplace(q?[j?][i?])
16 | 
17 | globals:
18 | 
19 |     inputs: |
20 |         double g_cell[j?][i?] => cell[j?][i?]
21 |     outputs: |
22 |         laplace(cell[j][i]) => double g_cell[j][i]
23 | 
24 | codegen options:
25 |     header: |
26 |       static void inplace_laplace(const int GD, double g_cell[restrict][GD+2], int istart, int iend, int jstart, int jend)
27 |       {
28 |     footer: |
29 |       }
30 |     loops:
31 |       -
32 |         iter_ident: i
33 |         start: istart
34 |         end: iend
35 |         stride: 1
36 |       -
37 |         iter_ident: j
38 |         start: jstart
39 |         end: jend
40 |         stride: 1
41 |     loop order: [j, i]
42 |     language   : C99
43 |     vector loop: i
44 |     prefix     : __hfav_
45 |     output file: laplace5-gen.hpp
46 | 


--------------------------------------------------------------------------------
/examples/literals.yaml:
--------------------------------------------------------------------------------
 1 | # Example: literals.yaml
 2 | # Demonstrates usage of literals to modify indices passed to functions.
 3 | # This functionality has only been tested with very simple expressions, e.g. +/- 1
 4 | 
 5 | kernels:
 6 | 
 7 |     update:
 8 |         declaration: update(int im1, int i, int ip1, double om1, double old, double op1, double &new);
 9 |         inputs: |
10 |             im1: i?-1
11 |             i  : i?
12 |             ip1: i?+1
13 |             om1: old[j?][i?-1]
14 |             old: old[j?][i?]
15 |             op1: old[j?][i?+1]
16 |         outputs: |
17 |             new: new[j?][i?]
18 | 
19 | globals:
20 | 
21 |     inputs: |
22 |         double cell[j?][i?] => old[j?][i?]
23 |     outputs: |
24 |         new[j][i] => double cell[j][i]
25 | 
26 | codegen options:
27 | 
28 |     loops:
29 |     -
30 |         iter_ident: i
31 |         start: first_i
32 |         end: last_i
33 |         stride: 1
34 |     -
35 |         iter_ident: j
36 |         start: first__j
37 |         end: last__j
38 |         stride: 1
39 | 
40 |     loop order: [j, i]
41 | 
42 |     language: C99
43 |     prefix: __hfav_
44 |     vector loop: None
45 | 


--------------------------------------------------------------------------------
/examples/reduction.yaml:
--------------------------------------------------------------------------------
 1 | # Example: sum.yaml
 2 | # Demonstrates usage of reduction(+:) syntax.
 3 | 
 4 | kernels:
 5 | 
 6 |     sum:
 7 |         declaration: sum(double x, double &xSum);
 8 |         inputs: |
 9 |             x: x[i][j]
10 |         outputs: |
11 |             xSum: reduction(+:xSum)
12 | 
13 | globals:
14 | 
15 |     inputs: |
16 |         double x[i?][j?]
17 | 
18 |     outputs: |
19 |         double xSum
20 | 
21 | codegen options:
22 | 
23 |     loops:
24 |     -
25 |         iter_ident: i
26 |         start: first_i
27 |         end: last_i
28 |         stride: 1
29 |     -
30 |         iter_ident: j
31 |         start: first_j
32 |         end: last_j
33 |         stride: 1
34 | 
35 |     loop order: [i, j]
36 | 
37 |     language: C99
38 |     prefix: __hfav_
39 |     vector loop: None
40 | 


--------------------------------------------------------------------------------
/examples/split-loops.yaml:
--------------------------------------------------------------------------------
 1 | # Example: split-loops.yaml
 2 | # Demonstrates a loop split occuring due to a reduction.
 3 | # Such splits are automatically identified by hfav.
 4 | 
 5 | kernels:
 6 | 
 7 |     sum:
 8 |         declaration: sum(double x, double &xSum);
 9 |         inputs: |
10 |             x: x[i][j]
11 |         outputs: |
12 |             xSum: reduction(+:xSum)
13 | 
14 |     normalize:
15 |         declaration: normalize(double& x, double& xSum);
16 |         inputs: |
17 |             x: x[i?][j?]
18 |             xSum: xSum
19 |         outputs: |
20 |             x: normalized(x[i?][j?])
21 | 
22 | globals:
23 | 
24 |     inputs: |
25 |         double x[i?][j?]
26 | 
27 |     outputs: |
28 |         double xSum
29 |         normalized(x[i][j]) => double x[i][j]
30 | 
31 | codegen options:
32 | 
33 |     loops:
34 |     -
35 |         iter_ident: i
36 |         start: first_i
37 |         end: last_i
38 |         stride: 1
39 |     -
40 |         iter_ident: j
41 |         start: first_j
42 |         end: last_j
43 |         stride: 1
44 | 
45 |     loop order: [i, j]
46 | 
47 |     language: C99
48 |     prefix: __hfav_
49 |     vector loop: None
50 | 


--------------------------------------------------------------------------------
/examples/uninitialized.yaml:
--------------------------------------------------------------------------------
 1 | # Example: uninitizalized.yaml
 2 | # Demonstrates usage of unintialized variables.
 3 | 
 4 | kernels:
 5 | 
 6 |     set_to_zero:
 7 |         declaration: set_to_zero(double &x);
 8 |         outputs: |
 9 |             x: zero(q?)
10 | 
11 | globals:
12 | 
13 |     outputs: |
14 |         zero(x[i]) => double x[i]
15 | 
16 | codegen options:
17 | 
18 |     loops:
19 |     -
20 |         iter_ident: i
21 |         start: first_cell_x
22 |         end: last_cell_x
23 |         stride: 1
24 | 
25 |     loop order: [i]
26 | 
27 |     language: C99
28 |     prefix: __hfav_
29 |     vector loop: None
30 | 


--------------------------------------------------------------------------------
/examples/vectorization-inner.yaml:
--------------------------------------------------------------------------------
 1 | # Example: vectorization-inner.yaml
 2 | # Demonstrates usage of "vector loop" to vectorize an inner loop.
 3 | 
 4 | kernels:
 5 | 
 6 |     flux_x:
 7 |         declaration: flux(cell_t lc, cell_t rc, flux_t &fx);
 8 |         inputs: |
 9 |             lc : q?[j?-1][i?]
10 |             rc : q?[j?][i?]
11 |         outputs: |
12 |             fx : flux_x(q?[j?][i?])
13 | 
14 |     integrate:
15 |         declaration: integrate(flux_t lf, flux_t rf, cell_t &ic);
16 |         inputs: |
17 |             lf : flux_x(q?[j?][i?])
18 |             rf : flux_x(q?[j?+1][i?])
19 |         outputs: |
20 |             ic : integrated(q?[j?][i?])
21 | 
22 |     clamp:
23 |         declaration: clamp(cell_t in, int &out);
24 |         inputs: |
25 |             in : q?
26 |         outputs: |
27 |             out : clamped(q?)
28 | 
29 | globals:
30 | 
31 |     inputs: |
32 |         double d_cell[j?][i?] => cell[j?][i?]
33 |     outputs: |
34 |         clamped(integrated(cell[j][i])) => int i_cell[j][i]
35 | 
36 | codegen options:
37 | 
38 |     loops:
39 |     -
40 |       iter_ident: i
41 |       start: first_i
42 |       end: last_i
43 |       stride: 1
44 |     -
45 |       iter_ident: j
46 |       start: first_j
47 |       end: last_j
48 |       stride: 1
49 | 
50 |     loop order: [j, i]
51 | 
52 |     language   : C99
53 |     vector loop: i
54 |     prefix     : __hfav_
55 |     types:
56 |         cell_t: float64
57 |         flux_t: float
58 |         clamp_t: int32
59 | 


--------------------------------------------------------------------------------
/examples/vectorization-outer.yaml:
--------------------------------------------------------------------------------
 1 | # Example: vectorization-outer.yaml
 2 | # Demonstrates usage of "vector loop" to vectorize an outer loop.
 3 | 
 4 | kernels:
 5 | 
 6 |     flux_x:
 7 |         declaration: flux(cell_t lc, cell_t rc, flux_t &fx);
 8 |         inputs: |
 9 |             lc : q?[j?-1][i?]
10 |             rc : q?[j?][i?]
11 |         outputs: |
12 |             fx : flux_x(q?[j?][i?])
13 | 
14 |     integrate:
15 |         declaration: integrate(flux_t lf, flux_t rf, cell_t &ic);
16 |         inputs: |
17 |             lf : flux_x(q?[j?][i?])
18 |             rf : flux_x(q?[j?+1][i?])
19 |         outputs: |
20 |             ic : integrated(q?[j?][i?])
21 | 
22 |     clamp:
23 |         declaration: clamp(cell_t in, int &out);
24 |         inputs: |
25 |             in : q?
26 |         outputs: |
27 |             out : clamped(q?)
28 | 
29 | globals:
30 | 
31 |     inputs: |
32 |         double d_cell[j?][i?] => cell[j?][i?]
33 |     outputs: |
34 |         clamped(integrated(cell[j][i])) => int i_cell[j][i]
35 | 
36 | codegen options:
37 | 
38 |     loops:
39 |     -
40 |       iter_ident: i
41 |       start: first_i
42 |       end: last_i
43 |       stride: 1
44 |     -
45 |       iter_ident: j
46 |       start: first_j
47 |       end: last_j
48 |       stride: 1
49 | 
50 |     loop order: [i, j]
51 | 
52 |     language   : C99
53 |     vector loop: i
54 |     prefix     : __hfav_
55 |     types:
56 |         cell_t: float64
57 |         flux_t: float
58 |         clamp_t: int32
59 | 


--------------------------------------------------------------------------------
/hfav.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # hfav.py; top-level invocation and parsing
  4 | 
  5 | # Copyright 2017 Intel Corporation
  6 | #
  7 | # GENERATED CODE EXEMPTION
  8 | #
  9 | # The output of this tool does not automatically import the Apache
 10 | # 2.0 license, except the output will continue to be subject to the
 11 | # limitation of liability clause in the Apache 2.0 license. Users may
 12 | # license their output under any license they choose but the liability
 13 | # of the authors of the tool for that output is governed by the
 14 | # limitation of liability clause in the Apache 2.0 license.
 15 | #
 16 | # Licensed under the Apache License, Version 2.0 (the "License");
 17 | # you may not use this file except in compliance with the License.
 18 | # You may obtain a copy of the License at
 19 | #
 20 | #     http://www.apache.org/licenses/LICENSE-2.0
 21 | #
 22 | # Unless required by applicable law or agreed to in writing, software
 23 | # distributed under the License is distributed on an "AS IS" BASIS,
 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25 | # See the License for the specific language governing permissions and
 26 | # limitations under the License.
 27 | 
 28 | import logging
 29 | import sys
 30 | import re
 31 | import os
 32 | import argparse
 33 | import yaml
 34 | from hfav.infer import dag_chain, rule_arg, rule, axiom, goal, rule_group, ivar_axiom, codeblock, reduction_op, reduction_initializer, reduction_finalizer
 35 | from hfav.ispace import iteration_space
 36 | from hfav.term import symbolic_constant
 37 | from hfav.analyze import simple_generator, rolling_generator, rap_dual
 38 | from hfav.c99 import codegen, c99_generator
 39 | from hfav.cpp import cpp_generator
 40 | from hfav.inest import inest_dag
 41 | from hfav import parse
 42 | 
 43 | def parse_declaration(declaration):
 44 |     m = re.match(r"(?:(\w+)[\s]+)?(\w+)\(([\w\d\s,&]*)\);", declaration)
 45 |     if m is None:
 46 |         raise SyntaxError("Malformed kernel declaration: %s" % (declaration))  # TODO: More information...
 47 |     else:
 48 |         rtype = m.group(1)
 49 |         kname = m.group(2)
 50 |         types = {}
 51 |         positions = {}
 52 |         try:
 53 |             varlist = m.group(3).replace("&", "").replace("*", "")
 54 |             pos = 0
 55 |             for v in re.split(r",\s*", varlist):
 56 |                 vsplit = re.split(r"\s*", v)
 57 |                 if len(vsplit) != 2:
 58 |                     raise SyntaxError("Can't tell if %s is a type or a variable name." % (v))
 59 |                 vtype = vsplit[0]
 60 |                 vname = vsplit[1]
 61 |                 types[vname] = vtype
 62 |                 positions[vname] = pos
 63 |                 pos = pos + 1
 64 |         except IndexError:
 65 |             pass
 66 |         if rtype is not None and rtype != 'void':
 67 |             types['<return>'] = rtype
 68 |             positions['<return>'] = -1
 69 |         return (kname, types, positions)
 70 | 
 71 | # top level list
 72 | # [ iter level
 73 | #   itspace
 74 | # ]
 75 | 
 76 | # loop [prologue, steady, epilogue] ident
 77 | 
 78 | # ident parent, children (loops, raps)
 79 | 
 80 | # loop
 81 | #
 82 | 
 83 | def hfav_run_yaml():
 84 |     logging.basicConfig(level=logging.INFO)
 85 | 
 86 |     parser = argparse.ArgumentParser(description="YAML front-end for High-performance Inference Fusion Into Vectorization (hfav)")
 87 |     parser.add_argument('-d', '--debug', dest='debug_output', action='store_true', default=False, help='enable debug output')
 88 |     parser.add_argument('-o', '--output', dest='output_location', action='store', default=False, help='override output location "-" gives stdout')
 89 |     parser.add_argument('-s', '--storage', dest='storage', action='store', default='stack', help='where to place temporary arrays (default: stack)')
 90 |     parser.add_argument('-v', '--verbosity', dest='verbosity', choices=['0', '1', '2'], action='store', default=0, help='verbosity level')
 91 |     parser.add_argument('FILE', help='Input YAML file')
 92 |     args = parser.parse_args()
 93 |     debug_output = args.debug_output
 94 |     extra_output = args.verbosity
 95 |     filename = args.FILE
 96 |     storage = args.storage
 97 | 
 98 |     logging.info("Loading input file %s", filename)
 99 |     config = yaml.load(file(filename, 'r'))
100 |     kernels = []
101 |     axioms = []
102 |     goals = []
103 | 
104 |     if os.environ.get('HFAVROOT') is None:
105 |         logging.warning("Please set HFAVROOT environment variable to your hfav directory...\n")
106 |         hfavroot = "hfav"
107 |     else:
108 |         hfavroot = os.environ.get('HFAVROOT')
109 | 
110 |     # Read kernels
111 |     for kname, kparams in config["kernels"].items():
112 | 
113 |         name, vtype, vpos = parse_declaration(kparams["declaration"])
114 |         vrule = {}
115 | 
116 |         iargs = []
117 |         if "inputs" in kparams.keys():
118 |             for line in kparams["inputs"].splitlines():
119 |                 input_li = line.partition(":")
120 |                 vname = input_li[0].strip()
121 |                 if vname == '<return>':
122 |                     raise SyntaxError("<return> cannot be used as an input! (%s)" % (name))
123 |                 vrule[vname] = input_li[2].strip()
124 |                 iargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "input"))
125 | 
126 |         oargs = []
127 |         got_return = False
128 |         if "outputs" in kparams.keys():
129 |             for line in kparams["outputs"].splitlines():
130 |                 output = line.partition(":")
131 |                 vname = output[0].strip()
132 |                 if vname == '<return>':
133 |                     if got_return:
134 |                         raise SyntaxError("Got multiple <return>s! (%s, %s)" % (vrule[vname], output[2].strip()))
135 |                     got_return = True
136 |                 if vname in vrule.keys():
137 |                     logging.warning("Parameter \"%s\" specified as an input and output to kernel \"%s\" -- here be dragons...", vname, kname)
138 | 
139 |                 m = re.match(r"reduction\((.+):(.+)\)", output[2].strip())
140 |                 if m is not None:
141 |                     opkey = m.group(1)
142 |                     if opkey not in reduction_op.supported().keys():
143 |                         logging.error("%s is not a recognized reduction, must be one of: %s", opkey, map(str, reduction_op.supported().keys()))
144 |                     red_op = reduction_op.supported()[opkey]
145 |                     vrule[vname] = "_reduction(%s)" % m.group(2)
146 |                     iargs.append(rule_arg(vpos[vname], vtype[vname], "_init(%s)" % m.group(2), "input"))
147 |                     kernels.append(reduction_initializer(parse.parser(m.group(2)).expr(), red_op, vtype[vname]))
148 |                     kernels.append(reduction_finalizer(parse.parser(m.group(2)).expr(), red_op, vtype[vname]))
149 |                 else:
150 |                     vrule[vname] = output[2].strip()
151 | 
152 |                 oargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "output"))
153 | 
154 |         for vname in vtype.keys():
155 |             if vname not in vrule:
156 |                 logging.warning("No replacement rule for parameter \"%s\" passed to kernel \"%s\" was specified -- assuming a global input of the same name exists.", vname, kname)
157 |                 vrule[vname] = "%s" % (vname)
158 |                 iargs.append(rule_arg(vpos[vname], vtype[vname], vrule[vname], "input"))
159 |                 axioms.append(axiom.read(vrule[vname], vrule[vname], vtype[vname]))
160 | 
161 |         kernel = rule.read(name, iargs, oargs)
162 |         kernels.append(kernel)
163 | 
164 |     # Read code blocks
165 |     code_blocks = []
166 |     try:
167 |         cbs = config["code blocks"].items()
168 |         for name, cb in cbs:
169 |             block = codeblock.read(name, cb)
170 |             code_blocks.append(block)
171 |     except KeyError:
172 |         logging.warning("No code blocks specified; assuming no boundary conditions -- \"to infinity and beyond!\"")
173 | 
174 |     # Read inputs
175 |     if "inputs" in config["globals"]:
176 |         for line in config["globals"]["inputs"].splitlines():
177 |             input_li = line.partition("=>")
178 |             if input_li[1] == "=>":
179 |                 decl = re.split(r"\s*", input_li[0].strip(), 1)
180 |                 axioms.append(axiom.read(decl[1], input_li[2].strip(), decl[0]))
181 |             else:
182 |                 decl = re.split(r"\s*", input_li[0].strip(), 1)
183 |                 # this case makes sense, where the input is implicitly the same as the output
184 |                 axioms.append(axiom.read(decl[1], decl[1], decl[0]))
185 |     else:
186 |         logging.warning("No global inputs specified -- things are unlikely to work except in pathological cases.")
187 | 
188 |     # Read outputs
189 |     if "outputs" in config["globals"]:
190 |         for line in config["globals"]["outputs"].splitlines():
191 |             output = line.partition("=>")
192 |             if output[1] == "=>":
193 |                 decl = re.split(r"\s*", output[2].strip(), 1)
194 |                 goals.append(goal.read(output[0].strip(), decl[1], decl[0]))
195 |             else:
196 |                 decl = re.split(r"\s*", output[0].strip(), 1)
197 |                 # this case makes sense, where the output is explicitly different to all inputs
198 |                 goals.append(goal.read(decl[1], decl[1], decl[0]))
199 |     else:
200 |         logging.error("No global outputs specified -- nothing to generate.")
201 | 
202 |     pg = rule_group()
203 |     pg.rules += kernels
204 |     pg.rules += code_blocks
205 | 
206 |     try:
207 |         prefix = config["codegen options"]["prefix"]
208 |     except KeyError:
209 |         prefix = "__"
210 | 
211 |     language = config["codegen options"]["language"]
212 |     vector_var = None
213 |     try:
214 |         vector_var = config["codegen options"]["vector loop"]
215 |         if vector_var == "None":
216 |             vector_var = None
217 |         else:
218 |             vector_var = symbolic_constant(vector_var)
219 |     except KeyError:
220 |         pass
221 | 
222 |     if language == "C" or language == "C99":
223 |         generator = c99_generator
224 |     elif language == "C++":
225 |         generator = cpp_generator
226 |     else:
227 |         logging.error("Unrecognized language: %s -- select one of C, C99 or C++")
228 | 
229 |     if debug_output:
230 |         cgen = generator(hfavroot, storage, None)
231 |         cgen.debug_vector_var = vector_var
232 |     else:
233 |         cgen = generator(hfavroot, storage, vector_var)
234 | 
235 |     default_typedict = cgen.typedict.copy()
236 |     try:
237 |         for k, v in config["codegen options"]["types"].items():
238 |             if k in cgen.typedict:
239 |                 logging.warning("%s already exists in dictionary -- overriding with %s.", k, v)
240 |             m = re.match(r"([a-zA-Z]+)(\d+)?", v)
241 |             if m is None:
242 |                 raise SyntaxError("Malformed type: %s -- expected <type><width>" % v)
243 |             elif m.group(2) == None:
244 |                 if m.group(1) in default_typedict.keys():
245 |                     cgen.typedict[k] = list(default_typedict[m.group(1)])
246 |                 else:
247 |                     raise SyntaxError("Malformed type: %s -- width must be specified for all types not in %s" % (v, default_typedict.keys()))
248 |             else:
249 |                 if m.group(1) not in ["int", "float"]:
250 |                     raise SyntaxError("Malformed type: %s -- base type must be 'int' or 'float'" % m.group(1))
251 |                 cgen.typedict[k] = [m.group(1), int(m.group(2))]
252 |     except KeyError:
253 |         pass
254 |     logging.debug("Using type dictionary: %s", cgen.typedict)
255 | 
256 |     loops = iteration_space.from_yaml(config)
257 | 
258 |     if args.output_location:
259 |         if args.output_location == '-':
260 |             of = sys.stdout
261 |             logging.info("Generating code to stdout (overriden)")
262 |         else:
263 |             of = open(args.output_location, "w")
264 |             output = args.output_location
265 |             logging.info("Generating code to %s (overriden)", output)
266 |     else:
267 |         try:
268 |             output = config["codegen options"]["output file"]
269 |             of = open(output, "w")
270 |             logging.info("Generating code into %s", output)
271 |         except KeyError:
272 |             of = sys.stdout
273 |             logging.info("Generating code to stdout")
274 | 
275 |     try:
276 |         header = config["codegen options"]["header"]
277 |     except KeyError:
278 |         header = None
279 | 
280 |     try:
281 |         footer = config["codegen options"]["footer"]
282 |     except KeyError:
283 |         footer = None
284 | 
285 |     logging.info("Loaded input file")
286 | 
287 |     for iv in loops.loop_order:
288 |         axioms.append(ivar_axiom(iv))
289 | 
290 |     logging.info("Chaining...")
291 |     gr = dag_chain(pg, cgen.typedict, axioms).resolve(goals)
292 |     logging.info("Chaining finished.")
293 |     logging.info("IDAG has %s", gr.stats())
294 |     logging.info("     Iteration space is over %s", [str(x) for x in gr.ivars()])
295 | 
296 |     rd = rap_dual.from_idag(gr)
297 |     logging.info("Rap DUAL! %s ", rd.stats())
298 |     order = rd.level_sort()
299 |     for i, o in enumerate(order):
300 |         logging.info("RD %d %s ", i, o.name())
301 |     levels = rd.level_sort_levels()
302 |     for i, l in enumerate(levels):
303 |         logging.info("RD level %d %s ", i, [o.name() for o in l])
304 |     rd.check_reductions()
305 | 
306 |     rap_loops = rd.topo_sort(lambda x: (len(x.rap_ivars()), x.rap_ivars()))
307 |     for i, r in enumerate(rap_loops):
308 |         logging.debug("%d %s %s", i, str(r), r.rap_ivars())
309 | 
310 |     if extra_output > 0:
311 |         (root, ext) = os.path.splitext(os.path.basename(filename))
312 |         dagfile = root + "rapdual.dot"
313 |         logging.info("Writing out rapdual dag to %s", dagfile,)
314 |         with file(dagfile, "w") as fi:
315 |             print >> fi, rd.dot(v_fmt=lambda x: "%s-%s" % (x.name(), [str(i) for i in x.rap_ivars()]), e_fmt=lambda x: "")
316 |         logging.info("Done writing out rapdual dag.")
317 |     else:
318 |         logging.info("Skipping writing rapdual dag.")
319 | 
320 |     if extra_output > 0:
321 |         (root, ext) = os.path.splitext(os.path.basename(filename))
322 |         dagfile = root + ".dot"
323 |         logging.info("Writing out inference dag to %s", dagfile)
324 |         with file(dagfile, "w") as fi:
325 |             print >> fi, gr.dot()
326 |         logging.info("Done writing out inference dag.")
327 |     else:
328 |         logging.info("Skipping writing inference dag.")
329 | 
330 |     fusion = not debug_output
331 |     logging.info("Rap dual super node fusion.")
332 |     indag = inest_dag(rd, loops)
333 |     if extra_output > 0:
334 |         (root, ext) = os.path.splitext(os.path.basename(filename))
335 |         dagfile = root + "_inest.dot"
336 |         logging.info("Writing out inest dag to %s", dagfile)
337 |         with file(dagfile, "w") as fi:
338 |             print >> fi, indag.dot(lambda v: str(v.inest), lambda v: "")
339 |         logging.info("Done writing out inest dag.")
340 |     else:
341 |         logging.info("Skipping writing inest dag.")
342 | 
343 |     if not debug_output:
344 |         logging.info("Fusing inest_dag")
345 |         indag.topo_fuse()
346 |         if extra_output > 0:
347 |             (root, ext) = os.path.splitext(os.path.basename(filename))
348 |             dagfile = root + "_inest_fused.dot"
349 |             logging.info("Writing out fused dag to %s", dagfile)
350 |             with file(dagfile, "w") as fi:
351 |                 print >> fi, indag.dot(lambda v: str(v.inest), lambda v: "")
352 |             logging.info("Done writing out fused dag.")
353 |         else:
354 |             logging.info("Skipping writing fused dag.")
355 |     else:
356 |         logging.info("Not fusing inest_dag")
357 | 
358 |     rolling = True
359 |     if rolling and fusion:
360 |         logging.info("Preparing rolling generator")
361 |         generator = rolling_generator
362 |     else:
363 |         logging.info("Preparing simple generator")
364 |         generator = simple_generator
365 | 
366 |     ig = generator(indag, loops, cgen, prefix)
367 |     logging.info("Generator initialized")
368 |     lst = codegen.listing()
369 |     cgen.header(lst, header)
370 |     logging.info("Generating")
371 |     ig.generate(lst)
372 |     logging.info("Done generating")
373 |     cgen.footer(lst, footer)
374 |     if of != sys.stdout:
375 |         logging.info("Writing code to %s", os.path.abspath(output))
376 |     else:
377 |         logging.info("Writing code to stdout")
378 | 
379 |     print >> of, lst.emit()
380 | 
381 |     if of != sys.stdout:
382 |         of.close()
383 | 
384 |     logging.info("Finished generating code.")
385 |     logging.info("Done; exiting.")
386 | 
387 |     sys.exit(0)
388 | 
389 | if __name__ == '__main__':
390 |     hfav_run_yaml()
391 | 


--------------------------------------------------------------------------------
/hfav/__init__.py:
--------------------------------------------------------------------------------
 1 | # hfav/__init__.py; module header file
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | global extra_output
27 | extra_output = 0
28 | 


--------------------------------------------------------------------------------
/hfav/c99.py:
--------------------------------------------------------------------------------
 1 | # hfav/c99.py; code generation for c99
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | from . import codegen
27 | import os
28 | 
29 | 
30 | class c99_generator(codegen.codegen):
31 | 
32 |     def __init__(self, root, storage, vector_var):
33 |         super(c99_generator, self).__init__(root, storage)
34 |         self.vector_var = vector_var
35 |         self.remainder = False
36 |         pass
37 | 
38 |     def begin_vector_loop(self, lst):
39 |         lst.append("#pragma simd assert\n")
40 |         lst.append("for (int __hfav_vlane = 0; __hfav_vlane < VLEN; ++__hfav_vlane)\n")
41 |         self.begin_scope(lst)
42 | 
43 |     def end_vector_loop(self, lst):
44 |         self.end_scope(lst)
45 | 
46 |     def begin_loop(self, lst, itervar, interval, phase):
47 |         if not (itervar == self.vector_var and 1 in phase):
48 |             super(c99_generator, self).begin_loop(lst, itervar, interval, phase)
49 |         else:
50 |             stride = str(interval.stride)
51 |             vstride = stride + "*VLEN"
52 |             start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride)
53 |             end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride)
54 |             vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start)
55 |             lst.append("const int %s_vbound = %s;\n" % (itervar, vbound))
56 |             lst.append("for (%s = %s; %s < %s_vbound; %s += %s)\n" % (itervar, start, itervar, itervar, itervar, vstride))
57 |             self.vectorize = True
58 |             self.begin_scope(lst)
59 | 
60 |     def end_loop(self, lst, itervar, interval, phase):
61 |         if (itervar == self.vector_var and 1 in phase):
62 |             self.vectorize = False
63 |         super(c99_generator, self).end_loop(lst, itervar, interval, phase)
64 | 
65 |     def begin_remainder_loop(self, lst, itervar, interval, phase):
66 |         stride = interval.stride
67 |         start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride)
68 |         end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride)
69 |         vbound = vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start)
70 |         lst.append("for (%s = %s_vbound; %s < %s; %s += %s)\n" % (itervar, itervar, itervar, end, itervar, interval.stride))
71 |         self.begin_scope(lst)
72 |         self.remainder = True
73 | 
74 |     def end_remainder_loop(self, lst, itervar, interval, phase):
75 |         self.end_scope(lst)
76 |         self.remainder = False
77 | 
78 |     def rotate(self, type, ident, start, end, roll_var):
79 |         if roll_var == self.vector_var:
80 |             if self.vectorize:
81 |                 return self.invoke("rotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "VLEN"])
82 |             else:
83 |                 return super(c99_generator, self).rotate(type, ident, start, end, roll_var)
84 |         elif self.vector_var is not None:
85 |             return self.invoke("vrotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "1"])
86 |         else:
87 |             return super(c99_generator, self).rotate(type, ident, start, end, roll_var)
88 | 
89 |     def rotate_ptr(self, type, ident, len, roll_var):
90 |         if roll_var == self.vector_var:
91 |             if self.vectorize:
92 |                 raise NotImplementedError("rotate_ptr is not implemented for vector types")
93 |             else:
94 |                 return super(c99_generator, self).rotate_ptr(type, ident, str(len), roll_var)
95 |         else:
96 |             return super(c99_generator, self).rotate_ptr(type, ident, str(len), roll_var)
97 | 


--------------------------------------------------------------------------------
/hfav/codegen.py:
--------------------------------------------------------------------------------
  1 | # hfav/codegen.py; code generation base class
  2 | 
  3 | # Copyright 2017 Intel Corporation
  4 | #
  5 | # GENERATED CODE EXEMPTION
  6 | #
  7 | # The output of this tool does not automatically import the Apache
  8 | # 2.0 license, except the output will continue to be subject to the
  9 | # limitation of liability clause in the Apache 2.0 license. Users may
 10 | # license their output under any license they choose but the liability
 11 | # of the authors of the tool for that output is governed by the
 12 | # limitation of liability clause in the Apache 2.0 license.
 13 | #
 14 | # Licensed under the Apache License, Version 2.0 (the "License");
 15 | # you may not use this file except in compliance with the License.
 16 | # You may obtain a copy of the License at
 17 | #
 18 | #     http://www.apache.org/licenses/LICENSE-2.0
 19 | #
 20 | # Unless required by applicable law or agreed to in writing, software
 21 | # distributed under the License is distributed on an "AS IS" BASIS,
 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | # See the License for the specific language governing permissions and
 24 | # limitations under the License.
 25 | 
 26 | from operator import attrgetter
 27 | import os
 28 | import logging
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | 
 32 | class codegen(object):
 33 | 
 34 |     def __init__(self, root, storage):
 35 |         self.root = root
 36 |         self.typedict = {'char': ['int', 8], 'byte': ['int', 8], 'int': ['int', 32], 'long': ['int', 64], 'float': ['float', 32], 'double': ['float', 64]}
 37 |         self.vector_var = None
 38 |         self.vectorize = False
 39 |         self.hindent = 0
 40 |         self.debug_vector_var = None  # TODO: Sorry Jason
 41 |         self.storage = storage
 42 |         pass
 43 | 
 44 |     def byref(self, ident):
 45 |         return "&" + ident
 46 | 
 47 |     def header(self, lst, h):
 48 |         self.rotate_function(lst)
 49 |         if h is not None:
 50 |             lst.append(h)
 51 |             opened = h.count('{')
 52 |             closed = h.count('}')
 53 |             self.hindent = opened - closed
 54 |             for i in range(0, opened - closed):
 55 |                 lst.indent()
 56 | 
 57 |     def footer(self, lst, f):
 58 |         if f is not None:
 59 |             if self.hindent > 0:
 60 |                 lst.deindent()
 61 |             lst.append(f)
 62 | 
 63 |     def ident_offset(self, ident, offset):
 64 |         if offset > 0:
 65 |             return ident + "+" + str(offset)
 66 |         elif offset == 0:
 67 |             return ident
 68 |         else:
 69 |             return ident + "-" + str(abs(offset))
 70 | 
 71 |     def prologue_gen(self):
 72 |         return self
 73 | 
 74 |     def epilogue_gen(self):
 75 |         return None
 76 | 
 77 |     def read_aref(self, ident, offset):
 78 |         return "%s%s" % (ident, "".join([("[%s]" % o) for o in offset]))
 79 | 
 80 |     def write_aref(self, ident, offset):
 81 |         return "%s%s" % (ident, "".join([("[%s]" % o) for o in offset]))
 82 | 
 83 |     def read_ref(self, ident):
 84 |         return "%s" % (ident,)
 85 | 
 86 |     def write_ref(self, ident):
 87 |         return "%s" % (ident,)
 88 | 
 89 |     def assign(self, dst, src):
 90 |         return "%s = %s" % (dst, src)
 91 | 
 92 |     def invoke(self, ident, args):
 93 |         return "%s(%s)" % (ident, ", ".join(args))
 94 | 
 95 |     def array_declaration(self, type, ident, size):
 96 |         if self.storage == "stack":
 97 |             return "%s %s%s" % (type, ident, "".join([("[%s]" % s) for s in size]))
 98 |         else:
 99 | 
100 |             if size == []:
101 |                 return "%s %s" % (type, ident)
102 | 
103 |             if len(size) > 1:
104 |                 unroll_str = "".join([("[%s]" % s) for s in size[1:]])
105 |             else:
106 |                 unroll_str = ""
107 |             decl = "%s (*%s)%s" % (type, ident, unroll_str)
108 |             cast = "%s(*)%s" % (type, unroll_str)
109 |             flatsize = "*".join([("(%s)" % s) for s in size])
110 |             return "%s = (%s) _mm_malloc((%s)*sizeof(%s), 64)" % (decl, cast, flatsize, type)
111 | 
112 |     def array_free(self, type, ident, size):
113 |         if self.storage == "stack":
114 |             return None
115 |         else:
116 |             if size == []:
117 |                 return None
118 |             else:
119 |                 return "_mm_free(%s)" % (ident)
120 | 
121 |     def array_ptr_declaration(self, type, ptr_ident, src_ident, size):
122 |         roll_str = "[%s]" % str(size[0])
123 |         if len(size) > 2:
124 |             unroll_str = "".join([("[%s]" % s) for s in size[2:]])
125 |         else:
126 |             unroll_str = ""
127 |         dst = "%s (*%s%s)%s " % (type, ptr_ident, roll_str, unroll_str)
128 |         srcs = []
129 |         for r in range(size[0]):
130 |             srcs.append(src_ident + ("[%s]" % str(r)))
131 |         return self.assign(dst, "{" + ", ".join(srcs) + "}")
132 | 
133 |     def statement(self, lst, state):
134 |         if state is not None:
135 |             lst.append(state + ";\n")
136 | 
137 |     def init_iters(self, lst, loops):
138 |         if len(loops.loop_dict.keys()) > 0:
139 |             lst.append("int %s;\n" % (", ".join(map(attrgetter("ident"), loops.loop_dict.keys()))))
140 | 
141 |     def begin_scope(self, lst):
142 |         lst.append("{\n")
143 |         lst.indent()
144 | 
145 |     def end_scope(self, lst):
146 |         lst.deindent()
147 |         lst.append("}\n")
148 | 
149 |     def begin_loop(self, lst, itervar, interval, phase):
150 |         stride = interval.stride
151 |         if phase == [0]:
152 |             lst.append("%s = %s;\n" % (itervar, interval.start))
153 |             lst.append("if (%s < %s)\n" % (itervar, interval.end))
154 |         elif 1 in phase:
155 |             if self.debug_vector_var is not None and self.debug_vector_var == itervar:
156 |                 lst.append("#pragma simd assert\n")
157 |             start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride)
158 |             end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride)
159 |             lst.append("for (%s = %s; %s < %s; %s += %s)\n" % (itervar, start, itervar, end, itervar, stride))
160 |         elif phase == [2]:
161 |             lst.append("%s = %s-1;\n" % (itervar, interval.end))
162 |             lst.append("if (%s > %s)\n" % (itervar, interval.start))
163 |         self.begin_scope(lst)
164 | 
165 |     def end_loop(self, lst, itervar, interval, phase):
166 |         self.end_scope(lst)
167 | 
168 |     def rotate_header(self):
169 |         return "\n"  # TODO: Decide if we should remove this completely.
170 |         # return "#include \"hfav/c99-rotate.h\"\n"
171 | 
172 |     def rotate_function(self, lst):
173 |         lst.append(self.rotate_header())
174 | 
175 |     def rotate(self, type, ident, start, end, roll_var):
176 |         return self.invoke("rotate_%s%s" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(start), str(end), "1"])
177 | 
178 |     def rotate_ptr(self, type, ident, len, roll_var):
179 |         return self.invoke("rotate_%s%s_ptr" % (self.typedict[type][0], self.typedict[type][1]), [ident, str(len)])
180 | 
181 |     def comment(self, lst, lines):
182 |         if len(lines) > 2:
183 |             lst.append("/* " + lines[0] + "\n")
184 |             for li in ["   " + z + "\n" for z in lines[1:-1]]:
185 |                 lst.append(li)
186 |             lst.append("   " + lines[-1] + "*/\n")
187 |         else:
188 |             for li in ["// " + z + "\n" for z in lines]:
189 |                 lst.append(li)
190 | 
191 | 
192 | class listing(object):
193 | 
194 |     def __init__(self):
195 |         self.indent_level = 0
196 |         self.lines = []
197 |         self.indent_width = 4
198 | 
199 |     def indent(self):
200 |         self.indent_level += 1
201 | 
202 |     def deindent(self):
203 |         self.indent_level -= 1
204 | 
205 |     def append(self, string):
206 |         assert string[-1] == '\n'
207 |         self.lines.append(self.indent_level * self.indent_width * " " + string)
208 | 
209 |     def emit(self):
210 |         return "".join(self.lines)
211 | 


--------------------------------------------------------------------------------
/hfav/cpp.py:
--------------------------------------------------------------------------------
  1 | # hfav/cpp.py; C++ code generation
  2 | 
  3 | # Copyright 2017 Intel Corporation
  4 | #
  5 | # GENERATED CODE EXEMPTION
  6 | #
  7 | # The output of this tool does not automatically import the Apache
  8 | # 2.0 license, except the output will continue to be subject to the
  9 | # limitation of liability clause in the Apache 2.0 license. Users may
 10 | # license their output under any license they choose but the liability
 11 | # of the authors of the tool for that output is governed by the
 12 | # limitation of liability clause in the Apache 2.0 license.
 13 | #
 14 | # Licensed under the Apache License, Version 2.0 (the "License");
 15 | # you may not use this file except in compliance with the License.
 16 | # You may obtain a copy of the License at
 17 | #
 18 | #     http://www.apache.org/licenses/LICENSE-2.0
 19 | #
 20 | # Unless required by applicable law or agreed to in writing, software
 21 | # distributed under the License is distributed on an "AS IS" BASIS,
 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | # See the License for the specific language governing permissions and
 24 | # limitations under the License.
 25 | 
 26 | from . import codegen
 27 | import os
 28 | 
 29 | 
 30 | class cpp_generator(codegen.codegen):
 31 | 
 32 |     def __init__(self, root, storage, vector_var):
 33 |         super(cpp_generator, self).__init__(root, storage)
 34 |         self.vector_var = vector_var
 35 |         self.remainder = False
 36 |         pass
 37 | 
 38 |     def byref(self, ident):
 39 |         return ident
 40 | 
 41 |     def begin_vector_loop(self, lst):
 42 |         lst.append("#pragma simd assert\n")
 43 |         lst.append("for (int __hfav_vlane = 0; __hfav_vlane < VLEN; ++__hfav_vlane)\n")
 44 |         self.begin_scope(lst)
 45 | 
 46 |     def end_vector_loop(self, lst):
 47 |         self.end_scope(lst)
 48 | 
 49 |     def begin_loop(self, lst, itervar, interval, phase):
 50 |         if not (itervar == self.vector_var and 1 in phase):
 51 |             super(cpp_generator, self).begin_loop(lst, itervar, interval, phase)
 52 |         else:
 53 |             stride = str(interval.stride)
 54 |             vstride = stride + "*VLEN"
 55 |             start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride)
 56 |             end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride)
 57 |             vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start)
 58 |             lst.append("const int %s_vbound = %s;\n" % (itervar, vbound))
 59 |             lst.append("for (%s = %s; %s < %s_vbound; %s += %s)\n" % (itervar, start, itervar, itervar, itervar, vstride))
 60 |             self.vectorize = True
 61 |             self.begin_scope(lst)
 62 | 
 63 |     def end_loop(self, lst, itervar, interval, phase):
 64 |         if itervar == self.vector_var and 1 in phase:
 65 |             self.vectorize = False
 66 |         super(cpp_generator, self).end_loop(lst, itervar, interval, phase)
 67 | 
 68 |     def begin_remainder_loop(self, lst, itervar, interval, phase):
 69 |         stride = interval.stride
 70 |         start = interval.start if 0 in phase else "%s+%s" % (interval.start, stride)
 71 |         end = interval.end if 2 in phase else "%s-%s" % (interval.end, stride)
 72 |         vbound = vbound = "%s + (((%s)-(%s)) & ~(VLEN-1))" % (start, end, start)
 73 |         lst.append("for (%s = %s_vbound; %s < %s; %s += %s)\n" % (itervar, itervar, itervar, end, itervar, interval.stride))
 74 |         self.begin_scope(lst)
 75 |         self.remainder = True
 76 | 
 77 |     def end_remainder_loop(self, lst, itervar, interval, phase):
 78 |         self.end_scope(lst)
 79 |         self.remainder = False
 80 | 
 81 |     def rotate(self, type, ident, start, end, roll_var):
 82 |         if roll_var == self.vector_var:
 83 |             if self.vectorize:
 84 |                 return self.invoke("hfav::rotate", [ident, str(start), str(end), "VLEN"])
 85 |             else:
 86 |                 return self.invoke("hfav::rotate", [ident, str(start), str(end), "1"])
 87 |         elif self.vector_var is not None:
 88 |             return self.invoke("hfav::vrotate", [ident, str(start), str(end), "1"])
 89 |         else:
 90 |             return self.invoke("hfav::rotate", [ident, str(start), str(end), "1"])
 91 | 
 92 |     def rotate_ptr(self, type, ident, len, roll_var):
 93 |         if roll_var == self.vector_var:
 94 |             if self.vectorize:
 95 |                 raise NotImplementedError("hfav::rotate_ptr is not implemented for vector types")
 96 |             else:
 97 |                 return self.invoke("hfav::rotate_ptr", [ident, str(len)])
 98 |         else:
 99 |             return self.invoke("hfav::rotate_ptr", [ident, str(len)])
100 | 


--------------------------------------------------------------------------------
/hfav/dot.py:
--------------------------------------------------------------------------------
 1 | # hfav/dot.py; graphviz 'dot' output
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | from . import codegen
27 | 
28 | 
29 | class dot_generator(codegen.codegen):
30 | 
31 |     def __init__(self):
32 |         self.lines = []
33 |         self.indent = 0
34 | 
35 |     def header(self, lst):
36 |         lst.append("digraph\n")
37 |         lst.append("{\n")
38 |         lst.indent()
39 |         lst.append("size=\"20,20\";\n")
40 |         lst.append("ratio=fill;\n")
41 |         lst.append("node [shape=box];\n")
42 | 
43 |     def footer(self, lst):
44 |         lst.append("}\n")
45 |         lst.deindent()
46 | 
47 |     def offset_string(self, offset):
48 |         offstring = str(offset)
49 |         offstring = offstring.replace("+", "p")
50 |         return offstring.replace("-", "m")
51 | 
52 |     def read_aref(self, ident, offset):
53 |         return "%s_%s" % (ident, self.offset_string(offset))
54 | 
55 |     def write_aref(self, ident, offset):
56 |         return "%s_%s" % (ident, self.offset_string(offset))
57 | 
58 |     def assign(self, dst, src):
59 |         return "%s -> %s" % (src, dst)
60 | 
61 |     def invoke(self, ident, outputs, inputs):
62 |         assignments = []
63 |         first = 1
64 |         for i in inputs:
65 |             for o in outputs:
66 |                 if first == 1:
67 |                     assignments.append(self.assign(o, i))
68 |                     first = 0
69 |                 else:
70 |                     assignments.append(self.assign(o, i))
71 |         return "\n".join(assignments)[:-1]  # hackily remove trailing newline
72 | 
73 |     def array_declaration(self, ident, size):
74 |         declarations = []
75 |         for offset in range(0, size):
76 |             declarations.append(self.indent * " " + "%s_%s [label=\"%s[%s]\"];" % (ident, self.offset_string(offset), ident, offset))
77 |         return "\n".join(declarations)[:-1]
78 | 
79 |     def begin_loop(self, lst, loopi):
80 |         pass
81 | 
82 |     def end_loop(self, lst):
83 |         pass
84 | 


--------------------------------------------------------------------------------
/hfav/include/cpp-rotate.hpp:
--------------------------------------------------------------------------------
 1 | namespace hfav
 2 | {
 3 |     template <typename T>
 4 |     static inline void rotate(T v[], int len)
 5 |     {
 6 |         for(int i = 0; i < len-1; ++i)
 7 |             v[i] = v[i+1];
 8 |     }
 9 | 
10 |     template <typename T>
11 |     static inline void rotate_ptr(T v[], int len)
12 |     {
13 |         const T temp = v[0];
14 |         for(int i = 0; i < len-1; ++i)
15 |             v[i] = v[i+1];
16 |         v[len-1] = temp;
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/hfav/include/hfav/c99-rotate.h:
--------------------------------------------------------------------------------
 1 | #ifndef _HFAV_C99_ROTATE_H_
 2 | #define _HFAV_C99_ROTATE_H_
 3 | 
 4 | #include <limits.h>
 5 | 
 6 | #define __hfav_max(x, y) (((x) > (y)) ? (x) : (y))
 7 | #define __hfav_min(x, y) (((x) < (y)) ? (x) : (y))
 8 | 
 9 | typedef int int32;
10 | typedef long int64;
11 | typedef float float32;
12 | typedef double float64;
13 | 
14 | #define ROTATE(T) \
15 | static inline void rotate_##T(T v[], int start, int end, int s) \
16 | { \
17 |     for (int i = start; i < end; ++i) \
18 |     { \
19 |         v[i] = v[i+s]; \
20 |     } \
21 | }
22 | 
23 | #define VROTATE(T) \
24 | static inline void vrotate_##T(T v[][VLEN], int start, int end, int s) \
25 | { \
26 |     for (int i = start; i < end; ++i) \
27 |     { \
28 |         _Pragma("simd assert") \
29 |         for (int j = 0; j < VLEN; ++j) \
30 |         { \
31 |             v[i][j] = v[i+s][j]; \
32 |         } \
33 |     } \
34 | }
35 | 
36 | #define ROTATE_PTR(T) \
37 | static inline void rotate_##T##_ptr(T* v[], int len) \
38 | { \
39 |     T* temp = v[0]; \
40 |     for (int i = 0; i < len-1; ++i) \
41 |     { \
42 |         v[i] = v[i+1]; \
43 |     } \
44 |     v[len-1] = temp; \
45 | }
46 | 
47 | ROTATE(int32)
48 | ROTATE(int64)
49 | ROTATE(float32)
50 | ROTATE(float64)
51 | 
52 | VROTATE(int32)
53 | VROTATE(int64)
54 | VROTATE(float32)
55 | VROTATE(float64)
56 | 
57 | ROTATE_PTR(int32)
58 | ROTATE_PTR(int64)
59 | ROTATE_PTR(float32)
60 | ROTATE_PTR(float64)
61 | 
62 | #endif /* _HFAV_C99_ROTATE_H_ */
63 | 


--------------------------------------------------------------------------------
/hfav/include/hfav/cpp-rotate.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _HFAV_CPP_ROTATE_H_
 2 | #define _HFAV_CPP_ROTATE_H_
 3 | 
 4 | namespace hfav
 5 | {
 6 | 
 7 | template <typename T>
 8 | static inline void rotate(T v[], int start, int end, int s)
 9 | {
10 |     for (int i = start; i < end; ++i)
11 |     {
12 |         v[i] = v[i+s];
13 |     }
14 | }
15 | 
16 | template <typename T>
17 | static inline void vrotate(T v[][VLEN], int start, int end, int s)
18 | {
19 |     for (int i = start; i < end; ++i)
20 |     {
21 |         #pragma simd assert
22 |         for (int j = 0; j < VLEN; ++j)
23 |         {
24 |             v[i][j] = v[i+s][j];
25 |         }
26 |     }
27 | }
28 | 
29 | template <typename T>
30 | static inline void rotate_ptr(T v[], int len)
31 | {
32 |     const T temp = v[0];
33 |     for(int i = 0; i < len-1; ++i)
34 |     {
35 |         v[i] = v[i+1];
36 |     }
37 |     v[len-1] = temp;
38 | }
39 | 
40 | }
41 | 
42 | #endif /* _HFAV_CPP_ROTATE_H_ */
43 | 


--------------------------------------------------------------------------------
/hfav/ispace.py:
--------------------------------------------------------------------------------
  1 | # hfav/ispace.py; iteration space manipulation tools
  2 | 
  3 | # Copyright 2017 Intel Corporation
  4 | #
  5 | # GENERATED CODE EXEMPTION
  6 | #
  7 | # The output of this tool does not automatically import the Apache
  8 | # 2.0 license, except the output will continue to be subject to the
  9 | # limitation of liability clause in the Apache 2.0 license. Users may
 10 | # license their output under any license they choose but the liability
 11 | # of the authors of the tool for that output is governed by the
 12 | # limitation of liability clause in the Apache 2.0 license.
 13 | #
 14 | # Licensed under the Apache License, Version 2.0 (the "License");
 15 | # you may not use this file except in compliance with the License.
 16 | # You may obtain a copy of the License at
 17 | #
 18 | #     http://www.apache.org/licenses/LICENSE-2.0
 19 | #
 20 | # Unless required by applicable law or agreed to in writing, software
 21 | # distributed under the License is distributed on an "AS IS" BASIS,
 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | # See the License for the specific language governing permissions and
 24 | # limitations under the License.
 25 | 
 26 | import logging
 27 | from . import term
 28 | 
 29 | 
 30 | class strided_interval(object):
 31 | 
 32 |     def __init__(self, start, end, stride=1):
 33 |         self.start = str(start)
 34 |         self.end = str(end)
 35 |         self.stride = stride
 36 | 
 37 |     def __str__(self):
 38 |         return "[%s:%s:%s]" % (self.start, self.end, self.stride)
 39 | 
 40 |     @classmethod
 41 |     def num(cls, n):
 42 |         return cls.__init__(n, n + 1)
 43 | 
 44 |     def sweep(self, n):
 45 |         return strided_interval(self.start + n, self.end + n, self.stride)
 46 | 
 47 |     def explicit(self):
 48 |         return range(self.start, self.end, self.stride)
 49 | 
 50 | 
 51 | class iteration_space(object):
 52 | 
 53 |     def __init__(self, loop_dict, loop_order):
 54 |         self.loop_dict = loop_dict
 55 |         self.loop_order = loop_order
 56 | 
 57 |     def dim(self):
 58 |         return len(self.loop_order)
 59 | 
 60 |     def strides(self):
 61 |         return [(k, self.loop_dict[k].stride) for k in self.loop_order]
 62 | 
 63 |     def copy(self):
 64 |         return iteration_space(self.loop_dict.copy(), list(self.loop_order))
 65 | 
 66 |     def map_offset(self, iter_var, offs, roll_var=None):
 67 |         if roll_var is not None and iter_var == roll_var:
 68 |             return offs
 69 |         else:
 70 |             loop = self.loop_dict[iter_var]
 71 |             if loop.stride != 1:
 72 |                 res_str = "(%s-%s)/%s" % (iter_var, loop.start, loop.stride)
 73 |             else:
 74 |                 res_str = "%s-%s" % (iter_var, loop.start)
 75 |             if offs != 0:
 76 |                 return res_str + "+%d" % offs
 77 |             else:
 78 |                 return res_str
 79 | 
 80 |     def interval(self, ivar):
 81 |         return self.loop_dict[ivar]
 82 | 
 83 |     def subspace(self, ivars):
 84 |         loop_dict = {}
 85 |         for iv in ivars:
 86 |             loop_dict[iv] = self.loop_dict[iv]
 87 |         loop_order = []
 88 |         for iv in self.loop_order:
 89 |             if iv in ivars:
 90 |                 loop_order.append(iv)
 91 |         return iteration_space(loop_dict, loop_order)
 92 | 
 93 |     def is_iter(self, var):
 94 |         return term.symbolic_constant(var) in self.loop_dict
 95 | 
 96 |     @classmethod
 97 |     def from_yaml(cls, config):
 98 |         loop_stuff = config["codegen options"]["loops"]
 99 |         if not isinstance(loop_stuff, list):
100 |             loop_stuff = [loop_stuff]
101 | 
102 |         loops = dict((term.symbolic_constant(x["iter_ident"]), strided_interval(x["start"], x["end"], x["stride"])) for x in loop_stuff)
103 | 
104 |         try:
105 |             loop_order = list(reversed([term.symbolic_constant(x) for x in config["codegen options"]["loop order"]]))
106 |         except KeyError:
107 |             loop_order = list(reversed([x for x in loops]))
108 | 
109 |         logging.debug("loop_order: %s" % map(str, loop_order),)
110 |         logging.debug("loops: %s" % (str(loops),))
111 |         return cls(loops, loop_order)
112 | 


--------------------------------------------------------------------------------
/hfav/iter_plot.py:
--------------------------------------------------------------------------------
 1 | # hfav/iter_plot.py; iteration space plotting tools
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | import matplotlib
27 | matplotlib.use('agg')
28 | import pylab
29 | import math
30 | import itertools as it
31 | import logging
32 | 
33 | import matplotlib.patches as mpatches
34 | 
35 | radius = 0.1
36 | hl = 0.1
37 | 
38 | 
39 | def arrow(s, e, estyle):
40 |     delta = tuple((ev - sv for sv, ev in zip(s, e)))
41 |     l = math.sqrt(delta[0] * delta[0] + delta[1] * delta[1])
42 |     unit = (delta[0] / l, delta[1] / l)
43 |     start = (s[0] + unit[0] * radius, s[1] + unit[1] * radius)
44 |     edel = (delta[0] - unit[0] * (hl + 2 * radius), delta[1] - unit[1] * (hl + 2 * radius))
45 |     return mpatches.FancyArrow(start[0], start[1], edel[0], edel[1], head_length=hl, head_width=0.05, **estyle)
46 | 
47 | 
48 | def iter_plot_start():
49 |     fig = pylab.figure(figsize=(8, 8))
50 |     ax = fig.add_subplot(111)
51 |     return ax
52 | 
53 | 
54 | def iter_plot(ax, dag, vstyle={}, estyle={}):
55 |     seen = set()
56 | 
57 |     patches = []
58 |     for s, e in dag.edges.keys():
59 |         ps = s.plotting_point()
60 |         pe = e.plotting_point()
61 |         seen.add(ps)
62 |         seen.add(pe)
63 |         ar = arrow(ps, pe, estyle)
64 |         ax.add_patch(ar)
65 | 
66 |     xmin = None
67 |     xmax = None
68 |     ymin = None
69 |     ymax = None
70 |     for p in (x.plotting_point() for x in dag.vertices.keys()):
71 |         if xmin is None or p[0] < xmin:
72 |             xmin = p[0]
73 |         if xmax is None or p[0] > xmax:
74 |             xmax = p[0]
75 | 
76 |         if ymin is None or p[1] < ymin:
77 |             ymin = p[1]
78 |         if ymax is None or p[1] > ymax:
79 |             ymax = p[1]
80 |         point = mpatches.Circle(p, 0.1, **vstyle)
81 |         ax.add_patch(point)
82 | 
83 |     xticks = range(xmin - 1, xmax + 2)
84 |     yticks = range(ymin - 1, ymax + 2)
85 | 
86 |     pylab.xticks(xticks, xticks)
87 |     pylab.yticks(yticks, yticks)
88 | 
89 |     ax.xaxis.grid(True)
90 |     ax.yaxis.grid(True)
91 | 
92 |     ax.set_aspect('equal')
93 | 
94 | 
95 | def iter_plot_finish(fp):
96 |     pylab.savefig(fp)
97 | 


--------------------------------------------------------------------------------
/hfav/old_cpp.py:
--------------------------------------------------------------------------------
 1 | # hfav/old_cpp.py; Unmaintained C++ code generation
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | from . import c99
27 | import os
28 | 
29 | 
30 | class cpp_generator(c99.c99_generator):
31 | 
32 |     def __init__(self, root):
33 |         super(cpp_generator, self).__init__(root)
34 |         pass
35 | 
36 |     def byref(self, ident):
37 |         return ident
38 | 
39 |     def rotate_header(self):
40 |         return "\n"
41 |         #return "#include \"hfav/cpp-rotate.hpp\"\n"
42 | 
43 |     def rotate(self, type, ident, start, end, roll_var):
44 |         return self.invoke("rotate", [ident, str(start), str(end), "1"])
45 | 
46 |     def rotate_ptr(self, type, ident, len, roll_var):
47 |         return self.invoke("rotate_ptr", [ident, str(len)])
48 | 
49 | 
50 | class cpp_autovec_generator(cpp_generator):
51 | 
52 |     def __init__(self, root, vector_var):
53 |         super(cpp_autovec_generator, self).__init__(root, vector_var)
54 |         pass
55 | 
56 |     def rotate(self, type, ident, start, end, roll_var):
57 |         if roll_var == self.vector_var:
58 |             if self.vectorize:
59 |                 return self.invoke("rotate", [ident, str(start), str(end), "VLEN"])
60 |             else:
61 |                 return super(cpp_autovec_generator, self).rotate(type, ident, start, end, roll_var)
62 |         elif self.vector_var is not None:
63 |             return self.invoke("vrotate", [ident, str(start), str(end), "1"])
64 |         else:
65 |             return super(cpp_autovec_generator, self).rotate(type, ident, start, end, roll_var)
66 | 
67 |     def rotate_ptr(self, type, ident, len, roll_var):
68 |         if roll_var == self.vector_var:
69 |             if self.vectorize:
70 |                 raise NotImplementedError("rotate_ptr is not implemented for vector types")
71 |             else:
72 |                 return super(cpp_autovec_generator, self).rotate_ptr(type, ident, len, roll_var)
73 |         else:
74 |             return super(cpp_autovec_generator, self).rotate_ptr(type, ident, len, roll_var)
75 | 


--------------------------------------------------------------------------------
/hfav/parse.py:
--------------------------------------------------------------------------------
  1 | # hfav/parse.py; Parse iteration/variable descriptions
  2 | 
  3 | # Copyright 2017 Intel Corporation
  4 | #
  5 | # GENERATED CODE EXEMPTION
  6 | #
  7 | # The output of this tool does not automatically import the Apache
  8 | # 2.0 license, except the output will continue to be subject to the
  9 | # limitation of liability clause in the Apache 2.0 license. Users may
 10 | # license their output under any license they choose but the liability
 11 | # of the authors of the tool for that output is governed by the
 12 | # limitation of liability clause in the Apache 2.0 license.
 13 | #
 14 | # Licensed under the Apache License, Version 2.0 (the "License");
 15 | # you may not use this file except in compliance with the License.
 16 | # You may obtain a copy of the License at
 17 | #
 18 | #     http://www.apache.org/licenses/LICENSE-2.0
 19 | #
 20 | # Unless required by applicable law or agreed to in writing, software
 21 | # distributed under the License is distributed on an "AS IS" BASIS,
 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | # See the License for the specific language governing permissions and
 24 | # limitations under the License.
 25 | 
 26 | from . import term
 27 | 
 28 | 
 29 | class parse_error(ValueError):
 30 |     pass
 31 | 
 32 | class fatal_parse_error(ValueError):
 33 |     pass
 34 | 
 35 | """grammar:
 36 | <numeric-literal> := <digit>+
 37 | <symbolic-const>  := <alpha>[<alpha>|<digit>]*['!']
 38 | <const>           := [<symbolic-const>|<numeric-literal>]
 39 | <variable>        := <alpha>[<alpha>|<digit>]*'?'
 40 | <function-identifier> := <alpha>[<alpha>|<digit>]*'
 41 | <function>        := <function-identifier>'('<expr-list>')'
 42 | <term>            := [<function>|<const>|<variable>]
 43 | <prefix-op>       := ['-'|'+']
 44 | <infix-op>        := ['-'|'+']
 45 | <suffix-op>       := '['<expr>']'
 46 | (had to modify to remove a left-recursive term, see below)
 47 | <expr>            := [<term>|<prefix-op><expr>|<expr><infix-op><expr>|<expr><suffix-op>]
 48 | <expr-list>       := [<expr>?|','<expr-list>]
 49 | 
 50 | this
 51 | <expr>            := [<term>|<prefix-op><expr>|<expr><infix-op><expr>|<expr><suffix-op>]
 52 | 
 53 | is left-recusive, so we modify to have:
 54 | <expr>            := [<term><tail-expr>|<prefix-op><expr><tail-expr>]
 55 | <tail-expr>       := [<>|<infix-op><tail-expr>|<suffix-op><tail-expr>]
 56 | """
 57 | 
 58 | 
 59 | class parser(object):
 60 | 
 61 |     """Each parser either consumes some of string (advances pos) and returns something or resets pos and throws a parser_error"""
 62 | 
 63 |     def __init__(self, string):
 64 |         self.string = string
 65 |         self.pos = 0
 66 | 
 67 |     def digit(self):
 68 |         c = self.string[self.pos:self.pos + 1]
 69 |         if c.isdigit():
 70 |             self.pos += 1
 71 |             return c
 72 |         raise parse_error("Not a digit")
 73 | 
 74 |     def alpha(self):
 75 |         c = self.string[self.pos:self.pos + 1]
 76 |         if c.isalpha() or c == '_':
 77 |             self.pos += 1
 78 |             return c
 79 |         raise parse_error("Not an alpha character")
 80 | 
 81 |     def alpha_digit(self):
 82 |         c = self.string[self.pos:self.pos + 1]
 83 |         if c.isalpha() or c == '_' or c.isdigit():
 84 |             self.pos += 1
 85 |             return c
 86 |         raise parse_error("Not an alpha or digit character")
 87 | 
 88 |     def match(self, char):
 89 |         c = self.string[self.pos:self.pos + 1]
 90 |         if c == char:
 91 |             self.pos += 1
 92 |             return c
 93 |         raise parse_error("Not a %s" % (char,))
 94 | 
 95 |     def whitespace(self):
 96 |         while self.pos < len(self.string) and self.string[self.pos].isspace():
 97 |             self.pos += 1
 98 | 
 99 |     def numeric_literal(self):
100 |         """<numeric-literal> := <digit>+"""
101 |         pos = self.pos
102 |         res = []
103 |         try:
104 |             while True:
105 |                 res.append(self.digit())
106 |         except parse_error:
107 |             if len(res) < 1:
108 |                 self.pos = pos
109 |                 raise parse_error("Not a number")
110 |             return term.numeric_constant(int(str(''.join(res)))).canonize()
111 | 
112 |     def symbolic_const(self):
113 |         """<symbolic-const> := <alpha>[<alpha>|<digit>]*['!']"""
114 |         pos = self.pos
115 |         res = []
116 |         res.append(self.alpha())
117 |         try:
118 |             while True:
119 |                 res.append(self.alpha_digit())
120 |         except parse_error:
121 |             try:
122 |                 res.append(self.match('!'))
123 |             except parse_error:
124 |                 pass
125 |             return term.symbolic_constant(''.join(res)).canonize()
126 | 
127 |     def const(self):
128 |         """<const> := [<symbolic-const>|<numeric-literal>]"""
129 |         pos = self.pos
130 |         try:
131 |             try:
132 |                 return self.symbolic_const()
133 |             except parse_error:
134 |                 self.pos = pos
135 |                 return self.numeric_literal()
136 |         except parse_error:
137 |             self.pos = pos
138 |             raise parse_error("Not a const")
139 | 
140 |     def variable(self):
141 |         """<variable> := <alpha>[<alpha>|<digit>]*'?'"""
142 |         pos = self.pos
143 |         try:
144 |             res = []
145 |             res.append(self.alpha())
146 |             try:
147 |                 while True:
148 |                     res.append(self.alpha_digit())
149 |             except parse_error:
150 |                 res.append(self.match('?'))
151 |                 return term.variable(''.join(res)).canonize()
152 |         except parse_error:
153 |             self.pos = pos
154 |             raise parse_error("Not a variable")
155 | 
156 |     def function_identifier(self):
157 |         """<function-identifier> := <alpha>[<alpha>|<digit>]*"""
158 |         pos = self.pos
159 |         try:
160 |             res = []
161 |             res.append(self.alpha())
162 |             try:
163 |                 while True:
164 |                     res.append(self.alpha_digit())
165 |             except parse_error:
166 |                 return ''.join(res)
167 |         except parse_error:
168 |             self.pos = pos
169 |             raise parse_error("Not a function identifier")
170 | 
171 |     def function(self):
172 |         """<function> := <function-identifier>'('<expr-list>')'"""
173 |         pos = self.pos
174 |         try:
175 |             fi = self.function_identifier()
176 |             self.whitespace()
177 |             self.match('(')
178 |             try:
179 |                 li = self.expr_list()
180 |                 self.whitespace()
181 |                 self.match(')')
182 |             except parse_error:
183 |                 raise fatal_parse_error("Not a valid expression list.")
184 |             return term.function(fi, li).canonize()
185 |         except parse_error:
186 |             self.pos = pos
187 |             raise parse_error("Not a function")
188 | 
189 |     def expr_list(self):
190 |         """<expr-list> := [<expr>?|','<expr-list>]"""
191 |         pos = self.pos
192 |         res = []
193 |         try:
194 |             res.append(self.expr())
195 |             while True:
196 |                 self.whitespace()
197 |                 self.match(',')
198 |                 res.append(self.expr())
199 |         except parse_error:
200 |             return res
201 | 
202 |     def term(self):
203 |         """<term> := [<function>|<const>|<variable>]"""
204 |         pos = self.pos
205 |         self.whitespace()
206 |         try:
207 |             return self.function()
208 |         except parse_error:
209 |             pass
210 |         try:
211 |             return self.variable()
212 |         except parse_error:
213 |             pass
214 |         try:
215 |             return self.const()
216 |         except parse_error:
217 |             pass
218 |         self.whitespace()
219 | 
220 |         self.pos = pos
221 |         raise parse_error("Not a term")
222 | 
223 |     def prefix_op(self):
224 |         """<prefix-op> := ['-'|'+']"""
225 |         try:
226 |             self.match('-')
227 |             return term.neg
228 |         except parse_error:
229 |             try:
230 |                 self.match('+')
231 |                 return term.add
232 |             except parse_error:
233 |                 raise parse_error("not a prefix op")
234 | 
235 |     def infix_op(self):
236 |         """<infix-op> := ['-'|'+']"""
237 |         try:
238 |             self.match('+')
239 |             return term.add
240 |         except parse_error:
241 |             pass
242 |         try:
243 |             self.match('-')
244 |             return term.neg
245 |         except parse_error:
246 |             raise parse_error("not an infix op")
247 | 
248 |     def suffix_op(self, left):
249 |         """<suffix-op> := '['<expr>']'"""
250 |         pos = self.pos
251 |         try:
252 |             self.whitespace()
253 |             self.match('[')
254 |             expr1 = self.expr()
255 |             self.match(']')
256 |             return term.at(left, expr1).canonize()
257 |         except parse_error:
258 |             self.pos = pos
259 |             raise parse_error("Not a suffix op")
260 | 
261 |     def tail_expr(self, left):
262 |         """<tail-expr> := [<EOF>|<infix-op><expr><tail-expr>|<suffix-op><tail-expr>]"""
263 |         pos = self.pos
264 |         try:
265 |             self.whitespace()
266 |             op = self.infix_op()
267 |             expr0 = self.expr()
268 |             newleft = op([left, expr0])
269 |             return self.tail_expr(newleft)
270 |         except parse_error:
271 |             pass
272 |         try:
273 |             self.whitespace()
274 |             op = self.suffix_op(left)
275 |             return self.tail_expr(op)
276 |         except parse_error:
277 |             self.pos = pos
278 |             return left
279 | 
280 |     def expr(self):
281 |         """<expr> := <term><tail-expr>|<prefix-op><expr><tail-expr>"""
282 |         pos = self.pos
283 |         try:
284 |             self.whitespace()
285 |             a = self.term()
286 |             self.whitespace()
287 |             return self.tail_expr(a)
288 |         except parse_error:
289 |             pos = self.pos
290 |         try:
291 |             self.whitespace()
292 |             a = self.prefix_op()
293 |             b = self.expr()
294 |             return a([self.tail_expr(b)]).canonize()
295 |         except parse_error:
296 |             pos = self.pos
297 |         raise parse_error("no expression found")
298 | 
299 | if __name__ == '__main__':
300 |     print(parser(" a (x? , a(q![i!-1] ) [ 1 + 1 ], -1 )").expr())
301 | 


--------------------------------------------------------------------------------
/regress-results/test-examples.txt:
--------------------------------------------------------------------------------
 1 | examples/boundary-conditions.yaml
 2 | examples/broadcast.yaml
 3 | examples/constants.yaml
 4 | examples/hydro2d/hydro2d-x.yaml
 5 | examples/hydro2d/hydro2d-y.yaml
 6 | examples/laplace5/laplace5.yaml
 7 | examples/literals.yaml
 8 | examples/reduction.yaml
 9 | examples/split-loops.yaml
10 | examples/uninitialized.yaml
11 | examples/vectorization-inner.yaml
12 | examples/vectorization-outer.yaml
13 | 


--------------------------------------------------------------------------------
/regress.sh:
--------------------------------------------------------------------------------
 1 | # regress.sh; run regression test on each example
 2 | 
 3 | # Copyright 2017 Intel Corporation
 4 | #
 5 | # GENERATED CODE EXEMPTION
 6 | #
 7 | # The output of this tool does not automatically import the Apache
 8 | # 2.0 license, except the output will continue to be subject to the
 9 | # limitation of liability clause in the Apache 2.0 license. Users may
10 | # license their output under any license they choose but the liability
11 | # of the authors of the tool for that output is governed by the
12 | # limitation of liability clause in the Apache 2.0 license.
13 | #
14 | # Licensed under the Apache License, Version 2.0 (the "License");
15 | # you may not use this file except in compliance with the License.
16 | # You may obtain a copy of the License at
17 | #
18 | #     http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | 
26 | jobfile=regress-results/jobs.txt
27 | parallel -a regress-results/test-examples.txt --joblog $jobfile --results regress-results ./hfav.py > /dev/null 2> /dev/null
28 | cat $jobfile | head -1; cat $jobfile | tail -n +2 | sort -k7 -n
29 | 


--------------------------------------------------------------------------------