├── .gitignore ├── .gitmodules ├── LICENSE.txt ├── Makefile.am ├── README.md ├── autogen.sh ├── configure.ac ├── examples ├── gemm.c ├── gemm.tiled.c ├── stencil.c ├── stencil.tiled.c ├── transpose.c └── transpose.tiled.c ├── get_submodules.sh ├── m4 ├── absolute-header.m4 ├── ax_boost_base.m4 ├── ax_boost_program_options.m4 ├── ax_cflags_warn_all.m4 ├── ax_create_pkgconfig_info.m4 ├── ax_cxx_compile_stdcxx.m4 ├── ax_cxx_compile_stdcxx_11.m4 ├── ax_detect_clang.m4 ├── ax_submodule.m4 └── pkg.m4 ├── src ├── Access.cpp ├── Access.h ├── Definitions.h ├── HayStack.cpp ├── HayStack.h ├── Makefile.am ├── Program.cpp ├── Program.h ├── Timer.cpp ├── Timer.h ├── isl-helpers.cpp ├── isl-helpers.h ├── main.cpp └── op.h └── tests ├── BlinkTest.cpp ├── CacheEmulator.cpp ├── CacheEmulator.h ├── CholeskyTest.cpp ├── CopyTest.cpp ├── Makefile.am ├── MultiplicationTest.cpp ├── StencilTest.cpp ├── ToyTest.cpp ├── inputs ├── blink.c ├── cholesky.c ├── copy.c ├── multiplication.c ├── stencil.c └── toy.c └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # http://www.gnu.org/software/automake 2 | 3 | Makefile.in 4 | /ar-lib 5 | /mdate-sh 6 | /py-compile 7 | /test-driver 8 | /ylwrap 9 | /build-aux 10 | /build 11 | /.deps 12 | /.libs 13 | /src/.deps 14 | /src/.libs 15 | .vscode 16 | 17 | # http://www.gnu.org/software/autoconf 18 | 19 | /autom4te.cache 20 | /autoscan.log 21 | /autoscan-*.log 22 | /aclocal.m4 23 | /compile 24 | /config.guess 25 | /config.h.in 26 | /config.sub 27 | /configure 28 | /configure.scan 29 | /depcomp 30 | /install-sh 31 | /missing 32 | /stamp-h1 33 | 34 | # https://www.gnu.org/software/libtool/ 35 | 36 | /ltmain.sh 37 | 38 | # http://www.gnu.org/software/texinfo 39 | 40 | /texinfo.tex 41 | 42 | # http://www.gnu.org/software/m4/ 43 | 44 | m4/libtool.m4 45 | m4/ltoptions.m4 46 | m4/ltsugar.m4 47 | m4/ltversion.m4 48 | m4/lt~obsolete.m4 49 | autom4te.cache 50 | 51 | config.h* 52 | haystack-uninstalled.* 53 | haystack.pc* -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "barvinok"] 2 | path = barvinok 3 | url = https://repo.or.cz/barvinok.git 4 | [submodule "pet"] 5 | path = pet 6 | url = https://repo.or.cz/pet.git 7 | [submodule "googletest"] 8 | path = googletest 9 | url = https://github.com/google/googletest.git 10 | [submodule "isl"] 11 | path = isl 12 | url = https://github.com/spcl/haystack-isl.git 13 | branch = master 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ============================================================================== 2 | The Haystack Project is under the Apache License v2.0 with LLVM Exceptions: 3 | ============================================================================== 4 | 5 | Apache License 6 | Version 2.0, January 2004 7 | http://www.apache.org/licenses/ 8 | 9 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 10 | 11 | 1. Definitions. 12 | 13 | "License" shall mean the terms and conditions for use, reproduction, 14 | and distribution as defined by Sections 1 through 9 of this document. 15 | 16 | "Licensor" shall mean the copyright owner or entity authorized by 17 | the copyright owner that is granting the License. 18 | 19 | "Legal Entity" shall mean the union of the acting entity and all 20 | other entities that control, are controlled by, or are under common 21 | control with that entity. For the purposes of this definition, 22 | "control" means (i) the power, direct or indirect, to cause the 23 | direction or management of such entity, whether by contract or 24 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 25 | outstanding shares, or (iii) beneficial ownership of such entity. 26 | 27 | "You" (or "Your") shall mean an individual or Legal Entity 28 | exercising permissions granted by this License. 29 | 30 | "Source" form shall mean the preferred form for making modifications, 31 | including but not limited to software source code, documentation 32 | source, and configuration files. 33 | 34 | "Object" form shall mean any form resulting from mechanical 35 | transformation or translation of a Source form, including but 36 | not limited to compiled object code, generated documentation, 37 | and conversions to other media types. 38 | 39 | "Work" shall mean the work of authorship, whether in Source or 40 | Object form, made available under the License, as indicated by a 41 | copyright notice that is included in or attached to the work 42 | (an example is provided in the Appendix below). 43 | 44 | "Derivative Works" shall mean any work, whether in Source or Object 45 | form, that is based on (or derived from) the Work and for which the 46 | editorial revisions, annotations, elaborations, or other modifications 47 | represent, as a whole, an original work of authorship. For the purposes 48 | of this License, Derivative Works shall not include works that remain 49 | separable from, or merely link (or bind by name) to the interfaces of, 50 | the Work and Derivative Works thereof. 51 | 52 | "Contribution" shall mean any work of authorship, including 53 | the original version of the Work and any modifications or additions 54 | to that Work or Derivative Works thereof, that is intentionally 55 | submitted to Licensor for inclusion in the Work by the copyright owner 56 | or by an individual or Legal Entity authorized to submit on behalf of 57 | the copyright owner. For the purposes of this definition, "submitted" 58 | means any form of electronic, verbal, or written communication sent 59 | to the Licensor or its representatives, including but not limited to 60 | communication on electronic mailing lists, source code control systems, 61 | and issue tracking systems that are managed by, or on behalf of, the 62 | Licensor for the purpose of discussing and improving the Work, but 63 | excluding communication that is conspicuously marked or otherwise 64 | designated in writing by the copyright owner as "Not a Contribution." 65 | 66 | "Contributor" shall mean Licensor and any individual or Legal Entity 67 | on behalf of whom a Contribution has been received by Licensor and 68 | subsequently incorporated within the Work. 69 | 70 | 2. Grant of Copyright License. Subject to the terms and conditions of 71 | this License, each Contributor hereby grants to You a perpetual, 72 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 73 | copyright license to reproduce, prepare Derivative Works of, 74 | publicly display, publicly perform, sublicense, and distribute the 75 | Work and such Derivative Works in Source or Object form. 76 | 77 | 3. Grant of Patent License. Subject to the terms and conditions of 78 | this License, each Contributor hereby grants to You a perpetual, 79 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 80 | (except as stated in this section) patent license to make, have made, 81 | use, offer to sell, sell, import, and otherwise transfer the Work, 82 | where such license applies only to those patent claims licensable 83 | by such Contributor that are necessarily infringed by their 84 | Contribution(s) alone or by combination of their Contribution(s) 85 | with the Work to which such Contribution(s) was submitted. If You 86 | institute patent litigation against any entity (including a 87 | cross-claim or counterclaim in a lawsuit) alleging that the Work 88 | or a Contribution incorporated within the Work constitutes direct 89 | or contributory patent infringement, then any patent licenses 90 | granted to You under this License for that Work shall terminate 91 | as of the date such litigation is filed. 92 | 93 | 4. Redistribution. You may reproduce and distribute copies of the 94 | Work or Derivative Works thereof in any medium, with or without 95 | modifications, and in Source or Object form, provided that You 96 | meet the following conditions: 97 | 98 | (a) You must give any other recipients of the Work or 99 | Derivative Works a copy of this License; and 100 | 101 | (b) You must cause any modified files to carry prominent notices 102 | stating that You changed the files; and 103 | 104 | (c) You must retain, in the Source form of any Derivative Works 105 | that You distribute, all copyright, patent, trademark, and 106 | attribution notices from the Source form of the Work, 107 | excluding those notices that do not pertain to any part of 108 | the Derivative Works; and 109 | 110 | (d) If the Work includes a "NOTICE" text file as part of its 111 | distribution, then any Derivative Works that You distribute must 112 | include a readable copy of the attribution notices contained 113 | within such NOTICE file, excluding those notices that do not 114 | pertain to any part of the Derivative Works, in at least one 115 | of the following places: within a NOTICE text file distributed 116 | as part of the Derivative Works; within the Source form or 117 | documentation, if provided along with the Derivative Works; or, 118 | within a display generated by the Derivative Works, if and 119 | wherever such third-party notices normally appear. The contents 120 | of the NOTICE file are for informational purposes only and 121 | do not modify the License. You may add Your own attribution 122 | notices within Derivative Works that You distribute, alongside 123 | or as an addendum to the NOTICE text from the Work, provided 124 | that such additional attribution notices cannot be construed 125 | as modifying the License. 126 | 127 | You may add Your own copyright statement to Your modifications and 128 | may provide additional or different license terms and conditions 129 | for use, reproduction, or distribution of Your modifications, or 130 | for any such Derivative Works as a whole, provided Your use, 131 | reproduction, and distribution of the Work otherwise complies with 132 | the conditions stated in this License. 133 | 134 | 5. Submission of Contributions. Unless You explicitly state otherwise, 135 | any Contribution intentionally submitted for inclusion in the Work 136 | by You to the Licensor shall be under the terms and conditions of 137 | this License, without any additional terms or conditions. 138 | Notwithstanding the above, nothing herein shall supersede or modify 139 | the terms of any separate license agreement you may have executed 140 | with Licensor regarding such Contributions. 141 | 142 | 6. Trademarks. This License does not grant permission to use the trade 143 | names, trademarks, service marks, or product names of the Licensor, 144 | except as required for reasonable and customary use in describing the 145 | origin of the Work and reproducing the content of the NOTICE file. 146 | 147 | 7. Disclaimer of Warranty. Unless required by applicable law or 148 | agreed to in writing, Licensor provides the Work (and each 149 | Contributor provides its Contributions) on an "AS IS" BASIS, 150 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 151 | implied, including, without limitation, any warranties or conditions 152 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 153 | PARTICULAR PURPOSE. You are solely responsible for determining the 154 | appropriateness of using or redistributing the Work and assume any 155 | risks associated with Your exercise of permissions under this License. 156 | 157 | 8. Limitation of Liability. In no event and under no legal theory, 158 | whether in tort (including negligence), contract, or otherwise, 159 | unless required by applicable law (such as deliberate and grossly 160 | negligent acts) or agreed to in writing, shall any Contributor be 161 | liable to You for damages, including any direct, indirect, special, 162 | incidental, or consequential damages of any character arising as a 163 | result of this License or out of the use or inability to use the 164 | Work (including but not limited to damages for loss of goodwill, 165 | work stoppage, computer failure or malfunction, or any and all 166 | other commercial damages or losses), even if such Contributor 167 | has been advised of the possibility of such damages. 168 | 169 | 9. Accepting Warranty or Additional Liability. While redistributing 170 | the Work or Derivative Works thereof, You may choose to offer, 171 | and charge a fee for, acceptance of support, warranty, indemnity, 172 | or other liability obligations and/or rights consistent with this 173 | License. However, in accepting such obligations, You may act only 174 | on Your own behalf and on Your sole responsibility, not on behalf 175 | of any other Contributor, and only if You agree to indemnify, 176 | defend, and hold each Contributor harmless for any liability 177 | incurred by, or claims asserted against, such Contributor by reason 178 | of your accepting any such warranty or additional liability. 179 | 180 | END OF TERMS AND CONDITIONS 181 | 182 | APPENDIX: How to apply the Apache License to your work. 183 | 184 | To apply the Apache License to your work, attach the following 185 | boilerplate notice, with the fields enclosed by brackets "[]" 186 | replaced with your own identifying information. (Don't include 187 | the brackets!) The text should be enclosed in the appropriate 188 | comment syntax for the file format. We also recommend that a 189 | file or class name and description of purpose be included on the 190 | same "printed page" as the copyright notice for easier 191 | identification within third-party archives. 192 | 193 | Copyright 2019 ETH Zurich 194 | 195 | Licensed under the Apache License, Version 2.0 (the "License"); 196 | you may not use this file except in compliance with the License. 197 | You may obtain a copy of the License at 198 | 199 | http://www.apache.org/licenses/LICENSE-2.0 200 | 201 | Unless required by applicable law or agreed to in writing, software 202 | distributed under the License is distributed on an "AS IS" BASIS, 203 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 204 | See the License for the specific language governing permissions and 205 | limitations under the License. 206 | 207 | 208 | ---- LLVM Exceptions to the Apache 2.0 License ---- 209 | 210 | As an exception, if, as a result of your compiling your source code, portions 211 | of this Software are embedded into an Object form of such source code, you 212 | may redistribute such embedded portions in such Object form without complying 213 | with the conditions of Sections 4(a), 4(b) and 4(d) of the License. 214 | 215 | In addition, if you combine or link compiled forms of this Software with 216 | software that is licensed under the GPLv2 ("Combined Software") and if a 217 | court of competent jurisdiction determines that the patent provision (Section 218 | 3), the indemnity provision (Section 9) or other Section of the License 219 | conflicts with the conditions of the GPLv2, you may retroactively and 220 | prospectively choose to deem waived or otherwise exclude such Section(s) of 221 | the License, but only in their entirety and only with respect to the Combined 222 | Software. 223 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | if BUNDLED_ISL 2 | MAYBE_ISL = isl 3 | endif 4 | if BUNDLED_BARVINOK 5 | MAYBE_BARVINOK = barvinok 6 | endif 7 | if BUNDLED_PET 8 | MAYBE_PET = pet 9 | endif 10 | 11 | SUBDIRS = $(MAYBE_ISL) $(MAYBE_BARVINOK) $(MAYBE_PET) src tests 12 | 13 | ACLOCAL_AMFLAGS = -I m4 14 | 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Haystack 2 | 3 | Haystack is an analytical cache model that given a program computes the number of cache misses. The tool aims at providing the programmer with a better intuition of the memory access costs that on todays machines increasingly dominate the overall program execution time. The tool counts the cache misses symbolically and thus neither executes the program nor enumerates all memory accesses explicitly which makes the model runtime problem size independent. The tool models fully associative caches with LRU replacement policy. 4 | 5 | The paper "A Fast Analytical Model of Fully Associative Caches" (Tobias Gysi, Tobias Grosser, Laurin Brandner, and Torsten Hoefler) provides further implementation details. The software was developed by SPCL (ETH Zurich). 6 | 7 | ## Installation 8 | 9 | Before installing the package make sure you install die following dependencies: 10 | - llvm/clang 11 | - gmp 12 | - ntl 13 | - boost (program options) 14 | - libyaml 15 | 16 | On Ubuntu systems (18.04) you can install these dependencies using the following command: 17 | ``` 18 | sudo apt-get install llvm-dev libclang-dev libgmp3-dev libntl-dev libboost-program-options-dev libyaml-dev 19 | ``` 20 | Once all dependencies are installed change to the haystack folder and run the commands: 21 | ``` 22 | ./get_submodules.sh 23 | ./autogen.sh 24 | ``` 25 | The two commands get all submodules and initialize autotools. After these steps we are ready to configure and build the project: 26 | ``` 27 | ./configure --prefix=$HOME 28 | make 29 | make install 30 | ``` 31 | We set the prefix to the home folder to install the tool in the user directory (this is optional). Additional configure options also allow us to point autotools to dependencies such as the boost program options library (in case they are not found automatically). 32 | 33 | ## Usage 34 | 35 | Haystack is a command line tool that analyzes c source files with an annotated scop. The example folder contains two valid test input files: 36 | - gemm.c 37 | - gemm.tiled.c 38 | 39 | All other files are used by the unit tests and due to undefined loop bounds don't work out of the box. To analyze the cache misses of gemm, we type the following command: 40 | ``` 41 | haystack -f ../examples/gemm.c 42 | ``` 43 | The tool then reports the number of cache misses per statement/memory reference: 44 | ``` 45 | ... 46 | 12 for (int k = 0; k < D; k++) 47 | 13 for (int j = 0; j < D; j++) 48 | 14 C[i][j] += alpha * A[i][k] * B[k][j]; 49 | ------------------------------------------------------------------- 50 | ref type comp[%] L1[%] L2[%] tot[%] reuse[ln] 51 | C[i][j] rd 0.0000 0.0000 0.0000 24.9878 11,14 52 | A[i][k] rd 0.0015 0.0000 0.0000 24.9878 14 53 | B[k][j] rd 0.0015 1.5602 1.5602 24.9878 14 54 | C[i][j] wr 0.0000 0.0000 0.0000 24.9878 14 55 | ... 56 | ``` 57 | The columns provide the following information (assuming fully associative caches with LRU replacement policy): 58 | - ref: memory reference 59 | - type: read or write access 60 | - comp: compulsory misses in percent of the total number of memory accesses 61 | - L1: capacity misses (L1) in percent of the total number of memory accesses 62 | - L2: capacity misses (L2) in percent of the total number of memory accesses 63 | - tot: number of memory accesses in percent of the total number of memory accesses 64 | - reuse: line numbers that contain the memory references that last accessed the same cache line (reuse) 65 | 66 | The tool additionally reports the absolute numbers of cache misses (compulsory and capacity) for the entire scop and the total number of memory accesses (total): 67 | ``` 68 | compulsory: 196'608 69 | capacity (L1) 67'043'328 70 | capacity (L2) 67'043'328 71 | total: 4'297'064'448 72 | ``` 73 | The tool provides a number of additional program options: 74 | ``` 75 | haystack -h 76 | Program options: 77 | -h [ --help ] print the program options 78 | -c [ --cache-sizes ] arg (=32768 524288) 79 | cache sizes in byte 80 | -l [ --line-size ] arg (=64) cache-line size in byte 81 | -f [ --input-file ] arg set the source file [file name] 82 | -I [ --include-path ] arg set the include path [include path] 83 | -s [ --scop-function ] arg set the scop function scop 84 | ``` 85 | We can use the -c and -l options to define different cache configurations and the -I option to specify one or more include paths. The -s option allows us to select the function from which we want to extract the scop. This is useful for input files with multiple scops. 86 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | autoreconf -i 3 | if test -f isl/autogen.sh; then 4 | (cd isl; ./autogen.sh) 5 | fi 6 | if test -f barvinok/autogen.sh; then 7 | (cd barvinok; ./autogen.sh) 8 | fi 9 | if test -f pet/autogen.sh; then 10 | (cd pet; ./autogen.sh) 11 | fi -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([haystack], 1.0) 2 | AC_CONFIG_AUX_DIR([build-aux]) 3 | AC_CONFIG_MACRO_DIR([m4]) 4 | AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) 5 | m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) 6 | 7 | # make static linking the default 8 | # AM_ENABLE_STATIC 9 | # AM_DISABLE_SHARED 10 | 11 | AC_PROG_CXX 12 | AM_PROG_AR 13 | #LT_INIT([static]) # works only if static libraries are available 14 | LT_INIT([disable-shared]) 15 | 16 | AX_BOOST_BASE([1.60.0]) 17 | AX_BOOST_PROGRAM_OPTIONS 18 | 19 | AC_ARG_ENABLE(debug, 20 | AS_HELP_STRING([--enable-debug], 21 | [enable debugging, default: no]), 22 | [case "${enableval}" in 23 | yes) debug=true ;; 24 | no) debug=false ;; 25 | *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; 26 | esac], 27 | [debug=false]) 28 | AM_CONDITIONAL(DEBUG, test x"$debug" = x"true") 29 | 30 | AX_SUBMODULE(isl,build|bundled|system,bundled) 31 | AM_CONDITIONAL(BUNDLED_ISL, test $with_isl = bundled) 32 | AM_CONDITIONAL(BUILD_ISL, test $with_isl = build) 33 | 34 | AC_SUBST(ISL_CFLAGS) 35 | AC_SUBST(ISL_LIBS) 36 | AC_SUBST(ISL_SRCDIR) 37 | AC_SUBST(ISL_BUILDDIR) 38 | case "$with_isl" in 39 | bundled) 40 | ISL_CFLAGS="-I\$(top_srcdir)/isl/include -I\$(top_builddir)/isl/include" 41 | ISL_CFLAGS="$ISL_CFLAGS" 42 | ISL_SRCDIR="$srcdir/isl" 43 | ISL_BUILDDIR=isl 44 | bv_configure_args="$bv_configure_args --with-isl-builddir=../isl" 45 | PACKAGE_CFLAGS_ISL='-I${prefix}/include' 46 | ;; 47 | build) 48 | ISL_SRCDIR="$isl_srcdir" 49 | ISL_BUILDDIR=`echo @abs_builddir@ | $with_isl_builddir/config.status --file=-` 50 | ISL_CFLAGS="-I$isl_srcdir/include -I$ISL_BUILDDIR/include" 51 | ISL_LIBS="$with_isl_builddir/libisl.la" 52 | PACKAGE_CFLAGS_ISL='-I${prefix}/include' 53 | ;; 54 | system) 55 | PKG_CHECK_MODULES([ISL], [isl]) 56 | PACKAGE_CFLAGS_ISL="$ISL_CFLAGS" 57 | esac 58 | AM_CONDITIONAL(HAVE_ISL_BUILDDIR, test "x$ISL_BUILDDIR" != "x") 59 | 60 | AX_SUBMODULE(barvinok,build|bundled|system,bundled) 61 | AM_CONDITIONAL(BUNDLED_BARVINOK, test $with_barvinok = bundled) 62 | AM_CONDITIONAL(BUILD_BARVINOK, test $with_barvinok = build) 63 | 64 | AC_SUBST(BARVINOK_CFLAGS) 65 | AC_SUBST(BARVINOK_LIBS) 66 | AC_SUBST(BARVINOK_SRCDIR) 67 | AC_SUBST(BARVINOK_BUILDDIR) 68 | case "$with_barvinok" in 69 | bundled) 70 | BARVINOK_CFLAGS="-I\$(top_srcdir)/barvinok -I\$(top_builddir)/barvinok" 71 | BARVINOK_CFLAGS="$BARVINOK_CFLAGS" 72 | BARVINOK_SRCDIR="$srcdir/barvinok" 73 | BARVINOK_BUILDDIR=barvinok 74 | ;; 75 | build) 76 | BARVINOK_SRCDIR="$barvinok_srcdir" 77 | BARVINOK_CFLAGS="$BARVINOK_CFLAGS" 78 | BARVINOK_BUILDDIR=`echo @abs_builddir@ | $with_BARVINOK_builddir/config.status --file=-` 79 | BARVINOK_CFLAGS="-I$barvinok_srcdir/ -I$BARVINOK_BUILDDIR/" 80 | BARVINOK_LIBS="$with_barvinok_builddir/libisl.la" 81 | ;; 82 | system) 83 | PKG_CHECK_MODULES([BARVINOK], [barvinok]) 84 | PACKAGE_CFLAGS_BARVINOK="$BARVINOK_CFLAGS" 85 | esac 86 | AM_CONDITIONAL(HAVE_BARVINOK_BUILDDIR, test "x$BARVINOK_BUILDDIR" != "x") 87 | 88 | AX_SUBMODULE(pet,bundled|build|system,bundled) 89 | AC_SUBST(PET_CFLAGS) 90 | AC_SUBST(PET_LIBS) 91 | AC_SUBST(PET_BUILDDIR) 92 | case "$with_pet" in 93 | bundled) 94 | PET_CFLAGS="-I\$(top_srcdir)/pet/include" 95 | ;; 96 | build) 97 | PET_BUILDDIR=`echo @abs_builddir@ | $with_pet_builddir/config.status --file=-` 98 | PET_CFLAGS="-I$pet_srcdir/include" 99 | ;; 100 | system) 101 | PKG_CHECK_MODULES([PET], [pet]) 102 | PACKAGE_CFLAGS_PET="$PET_CFLAGS" 103 | ;; 104 | esac 105 | AM_CONDITIONAL(BUNDLED_PET, test $with_pet = bundled) 106 | AM_CONDITIONAL(BUILD_PET, test $with_pet = build) 107 | 108 | # PACKAGE_CFLAGS="$PACKAGE_CFLAGS_ISL $PACKAGE_CFLAGS_BARVINOK $PACKAGE_CFLAGS_PET" 109 | # PACKAGE_LIBS="-lisl -lbarvinok -lpet" 110 | # AX_CREATE_PKGCONFIG_INFO 111 | 112 | # copy the example files 113 | AC_CONFIG_LINKS([tests/blink.c:tests/inputs/blink.c]) 114 | AC_CONFIG_LINKS([tests/cholesky.c:tests/inputs/cholesky.c]) 115 | AC_CONFIG_LINKS([tests/copy.c:tests/inputs/copy.c]) 116 | AC_CONFIG_LINKS([tests/multiplication.c:tests/inputs/multiplication.c]) 117 | AC_CONFIG_LINKS([tests/stencil.c:tests/inputs/stencil.c]) 118 | AC_CONFIG_LINKS([tests/toy.c:tests/inputs/toy.c]) 119 | 120 | AC_CONFIG_HEADERS(config.h) 121 | AC_CONFIG_FILES(Makefile src/Makefile tests/Makefile) 122 | if test $with_isl = bundled; then 123 | AC_CONFIG_SUBDIRS(isl) 124 | fi 125 | if test $with_barvinok = bundled; then 126 | AC_CONFIG_SUBDIRS(barvinok) 127 | fi 128 | if test $with_pet = bundled; then 129 | AC_CONFIG_SUBDIRS(pet) 130 | fi 131 | AC_CONFIG_COMMANDS_POST([ 132 | ac_configure_args="$ac_configure_args $bv_configure_args" 133 | ]) 134 | AC_OUTPUT -------------------------------------------------------------------------------- /examples/gemm.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | #define D 1024 3 | 4 | float A[D][D]; 5 | float B[D][D]; 6 | float C[D][D]; 7 | float alpha, beta, tmp; 8 | #pragma scop 9 | for (int i = 0; i < D; i++) { 10 | for (int j = 0; j < D; j++) 11 | C[i][j] *= beta; 12 | for (int k = 0; k < D; k++) 13 | for (int j = 0; j < D; j++) 14 | C[i][j] += alpha * A[i][k] * B[k][j]; 15 | } 16 | #pragma endscop 17 | } 18 | 19 | -------------------------------------------------------------------------------- /examples/gemm.tiled.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | #define T 32 3 | #define D 1024 4 | 5 | float A[D][D]; 6 | float B[D][D]; 7 | float C[D][D]; 8 | float beta, alpha; 9 | 10 | #pragma scop 11 | for (int i = 0; i < D/T; i++) { 12 | for (int j = 0; j < D/T; j++) { 13 | for (int ii = i*T; ii < i*T + T; ii++) 14 | for (int jj = j*T; jj < j*T + T; jj++) 15 | C[ii][jj] *= beta; 16 | for (int k = 0; k < D/T; k++) 17 | for (int ii = i*T; ii < i*T + T; ii++) 18 | for (int kk = k*T; kk < k*T + T; kk++) 19 | for (int jj = j*T; jj < j*T + T; jj++) 20 | C[ii][jj] += alpha * A[ii][kk] * B[kk][jj]; 21 | } 22 | } 23 | #pragma endscop 24 | } 25 | -------------------------------------------------------------------------------- /examples/stencil.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | #define I 64 3 | #define J 16 4 | #define K 64 5 | 6 | float tmp1, tmp2; 7 | 8 | float A[K][J][I]; 9 | float B[K][J][I]; 10 | float C[K][J][I]; 11 | float D[K][J][I]; 12 | float E[K][J][I]; 13 | float F[K][J][I]; 14 | float G[K][J][I]; 15 | float H[K][J][I]; 16 | float L[K][J][I]; 17 | float M[K][J][I]; 18 | float N[K][J][I]; 19 | float O[K][J][I]; 20 | float P[K][J][I]; 21 | float Q[K][J][I]; 22 | float R[K][J][I]; 23 | float S[K][J][I]; 24 | float T[K][J][I]; 25 | float U[K][J][I]; 26 | float V[K][J][I]; 27 | 28 | #pragma scop 29 | for(int k=1; k', so that the /// cannot be confused with a C99 comment. 19 | AC_DEFUN([gl_ABSOLUTE_HEADER], 20 | [AC_LANG_PREPROC_REQUIRE()dnl 21 | AC_FOREACH([gl_HEADER_NAME], [$1], 22 | [AS_VAR_PUSHDEF([gl_absolute_header], 23 | [gl_cv_absolute_]m4_quote(m4_defn([gl_HEADER_NAME])))dnl 24 | AC_CACHE_CHECK([absolute name of <]m4_quote(m4_defn([gl_HEADER_NAME]))[>], 25 | m4_quote(m4_defn([gl_absolute_header])), 26 | [AS_VAR_PUSHDEF([ac_header_exists], 27 | [ac_cv_header_]m4_quote(m4_defn([gl_HEADER_NAME])))dnl 28 | AC_CHECK_HEADERS_ONCE(m4_quote(m4_defn([gl_HEADER_NAME])))dnl 29 | if test AS_VAR_GET(ac_header_exists) = yes; then 30 | AC_LANG_CONFTEST([AC_LANG_SOURCE([[#include <]]m4_dquote(m4_defn([gl_HEADER_NAME]))[[>]])]) 31 | dnl eval is necessary to expand ac_cpp. 32 | dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell. 33 | AS_VAR_SET(gl_absolute_header, 34 | [`(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD | 35 | sed -n '\#/]m4_quote(m4_defn([gl_HEADER_NAME]))[#{ 36 | s#.*"\(.*/]m4_quote(m4_defn([gl_HEADER_NAME]))[\)".*#\1# 37 | s#^/[^/]#//&# 38 | p 39 | q 40 | }'`]) 41 | fi 42 | AS_VAR_POPDEF([ac_header_exists])dnl 43 | ])dnl 44 | AC_DEFINE_UNQUOTED(AS_TR_CPP([ABSOLUTE_]m4_quote(m4_defn([gl_HEADER_NAME]))), 45 | ["AS_VAR_GET(gl_absolute_header)"], 46 | [Define this to an absolute name of <]m4_quote(m4_defn([gl_HEADER_NAME]))[>.]) 47 | AS_VAR_POPDEF([gl_absolute_header])dnl 48 | ])dnl 49 | ])# gl_ABSOLUTE_HEADER 50 | -------------------------------------------------------------------------------- /m4/ax_boost_program_options.m4: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # https://www.gnu.org/software/autoconf-archive/ax_boost_program_options.html 3 | # ============================================================================= 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BOOST_PROGRAM_OPTIONS 8 | # 9 | # DESCRIPTION 10 | # 11 | # Test for program options library from the Boost C++ libraries. The macro 12 | # requires a preceding call to AX_BOOST_BASE. Further documentation is 13 | # available at . 14 | # 15 | # This macro calls: 16 | # 17 | # AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) 18 | # 19 | # And sets: 20 | # 21 | # HAVE_BOOST_PROGRAM_OPTIONS 22 | # 23 | # LICENSE 24 | # 25 | # Copyright (c) 2009 Thomas Porschberg 26 | # 27 | # Copying and distribution of this file, with or without modification, are 28 | # permitted in any medium without royalty provided the copyright notice 29 | # and this notice are preserved. This file is offered as-is, without any 30 | # warranty. 31 | 32 | #serial 25 33 | 34 | AC_DEFUN([AX_BOOST_PROGRAM_OPTIONS], 35 | [ 36 | AC_ARG_WITH([boost-program-options], 37 | AS_HELP_STRING([--with-boost-program-options@<:@=special-lib@:>@], 38 | [use the program options library from boost - it is possible to specify a certain library for the linker 39 | e.g. --with-boost-program-options=boost_program_options-gcc-mt-1_33_1 ]), 40 | [ 41 | if test "$withval" = "no"; then 42 | want_boost="no" 43 | elif test "$withval" = "yes"; then 44 | want_boost="yes" 45 | ax_boost_user_program_options_lib="" 46 | else 47 | want_boost="yes" 48 | ax_boost_user_program_options_lib="$withval" 49 | fi 50 | ], 51 | [want_boost="yes"] 52 | ) 53 | 54 | if test "x$want_boost" = "xyes"; then 55 | AC_REQUIRE([AC_PROG_CC]) 56 | export want_boost 57 | CPPFLAGS_SAVED="$CPPFLAGS" 58 | CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" 59 | export CPPFLAGS 60 | LDFLAGS_SAVED="$LDFLAGS" 61 | LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" 62 | export LDFLAGS 63 | AC_CACHE_CHECK([whether the Boost::Program_Options library is available], 64 | ax_cv_boost_program_options, 65 | [AC_LANG_PUSH(C++) 66 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include 67 | ]], 68 | [[boost::program_options::error err("Error message"); 69 | return 0;]])], 70 | ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no) 71 | AC_LANG_POP([C++]) 72 | ]) 73 | if test "$ax_cv_boost_program_options" = yes; then 74 | AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available]) 75 | BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` 76 | if test "x$ax_boost_user_program_options_lib" = "x"; then 77 | for libextension in `ls $BOOSTLIBDIR/libboost_program_options*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.dylib* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.dylib.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.a.*$;\1;'` ; do 78 | ax_lib=${libextension} 79 | AC_CHECK_LIB($ax_lib, exit, 80 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 81 | [link_program_options="no"]) 82 | done 83 | if test "x$link_program_options" != "xyes"; then 84 | for libextension in `ls $BOOSTLIBDIR/boost_program_options*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.a.*$;\1;'` ; do 85 | ax_lib=${libextension} 86 | AC_CHECK_LIB($ax_lib, exit, 87 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 88 | [link_program_options="no"]) 89 | done 90 | fi 91 | else 92 | for ax_lib in $ax_boost_user_program_options_lib boost_program_options-$ax_boost_user_program_options_lib; do 93 | AC_CHECK_LIB($ax_lib, main, 94 | [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], 95 | [link_program_options="no"]) 96 | done 97 | fi 98 | if test "x$ax_lib" = "x"; then 99 | AC_MSG_ERROR(Could not find a version of the library!) 100 | fi 101 | if test "x$link_program_options" != "xyes"; then 102 | AC_MSG_ERROR([Could not link against [$ax_lib] !]) 103 | fi 104 | fi 105 | CPPFLAGS="$CPPFLAGS_SAVED" 106 | LDFLAGS="$LDFLAGS_SAVED" 107 | fi 108 | ]) 109 | -------------------------------------------------------------------------------- /m4/ax_cflags_warn_all.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.nongnu.org/autoconf-archive/ax_cflags_warn_all.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CFLAGS_WARN_ALL [(shellvar [,default, [A/NA]])] 8 | # 9 | # DESCRIPTION 10 | # 11 | # Try to find a compiler option that enables most reasonable warnings. 12 | # 13 | # For the GNU CC compiler it will be -Wall (and -ansi -pedantic) The 14 | # result is added to the shellvar being CFLAGS by default. 15 | # 16 | # Currently this macro knows about GCC, Solaris C compiler, Digital Unix C 17 | # compiler, C for AIX Compiler, HP-UX C compiler, IRIX C compiler, NEC 18 | # SX-5 (Super-UX 10) C compiler, and Cray J90 (Unicos 10.0.0.8) C 19 | # compiler. 20 | # 21 | # - $1 shell-variable-to-add-to : CFLAGS 22 | # - $2 add-value-if-not-found : nothing 23 | # - $3 action-if-found : add value to shellvariable 24 | # - $4 action-if-not-found : nothing 25 | # 26 | # LICENSE 27 | # 28 | # Copyright (c) 2008 Guido U. Draheim 29 | # 30 | # This program is free software; you can redistribute it and/or modify it 31 | # under the terms of the GNU General Public License as published by the 32 | # Free Software Foundation; either version 2 of the License, or (at your 33 | # option) any later version. 34 | # 35 | # This program is distributed in the hope that it will be useful, but 36 | # WITHOUT ANY WARRANTY; without even the implied warranty of 37 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 38 | # Public License for more details. 39 | # 40 | # You should have received a copy of the GNU General Public License along 41 | # with this program. If not, see . 42 | # 43 | # As a special exception, the respective Autoconf Macro's copyright owner 44 | # gives unlimited permission to copy, distribute and modify the configure 45 | # scripts that are the output of Autoconf when processing the Macro. You 46 | # need not follow the terms of the GNU General Public License when using 47 | # or distributing such scripts, even though portions of the text of the 48 | # Macro appear in them. The GNU General Public License (GPL) does govern 49 | # all other use of the material that constitutes the Autoconf Macro. 50 | # 51 | # This special exception to the GPL applies to versions of the Autoconf 52 | # Macro released by the Autoconf Archive. When you make and distribute a 53 | # modified version of the Autoconf Macro, you may extend this special 54 | # exception to the GPL to apply to your modified version as well. 55 | 56 | AC_DEFUN([AX_CFLAGS_WARN_ALL],[dnl 57 | AS_VAR_PUSHDEF([FLAGS],[CFLAGS])dnl 58 | AS_VAR_PUSHDEF([VAR],[ac_cv_cflags_warn_all])dnl 59 | AC_CACHE_CHECK([m4_ifval($1,$1,FLAGS) for maximum warnings], 60 | VAR,[VAR="no, unknown" 61 | AC_LANG_SAVE 62 | AC_LANG_C 63 | ac_save_[]FLAGS="$[]FLAGS" 64 | for ac_arg dnl 65 | in "-pedantic % -Wall" dnl GCC 66 | "-xstrconst % -v" dnl Solaris C 67 | "-std1 % -verbose -w0 -warnprotos" dnl Digital Unix 68 | "-qlanglvl=ansi % -qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd" dnl AIX 69 | "-ansi -ansiE % -fullwarn" dnl IRIX 70 | "+ESlit % +w1" dnl HP-UX C 71 | "-Xc % -pvctl[,]fullmsg" dnl NEC SX-5 (Super-UX 10) 72 | "-h conform % -h msglevel 2" dnl Cray C (Unicos) 73 | # 74 | do FLAGS="$ac_save_[]FLAGS "`echo $ac_arg | sed -e 's,%%.*,,' -e 's,%,,'` 75 | AC_TRY_COMPILE([],[return 0;], 76 | [VAR=`echo $ac_arg | sed -e 's,.*% *,,'` ; break]) 77 | done 78 | FLAGS="$ac_save_[]FLAGS" 79 | AC_LANG_RESTORE 80 | ]) 81 | case ".$VAR" in 82 | .ok|.ok,*) m4_ifvaln($3,$3) ;; 83 | .|.no|.no,*) m4_ifvaln($4,$4,[m4_ifval($2,[ 84 | AC_RUN_LOG([: m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $2"]) 85 | m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $2"])]) ;; 86 | *) m4_ifvaln($3,$3,[ 87 | if echo " $[]m4_ifval($1,$1,FLAGS) " | grep " $VAR " 2>&1 >/dev/null 88 | then AC_RUN_LOG([: m4_ifval($1,$1,FLAGS) does contain $VAR]) 89 | else AC_RUN_LOG([: m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $VAR"]) 90 | m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $VAR" 91 | fi ]) ;; 92 | esac 93 | AS_VAR_POPDEF([VAR])dnl 94 | AS_VAR_POPDEF([FLAGS])dnl 95 | ]) 96 | 97 | dnl the only difference - the LANG selection... and the default FLAGS 98 | 99 | AC_DEFUN([AX_CXXFLAGS_WARN_ALL],[dnl 100 | AS_VAR_PUSHDEF([FLAGS],[CXXFLAGS])dnl 101 | AS_VAR_PUSHDEF([VAR],[ax_cv_cxxflags_warn_all])dnl 102 | AC_CACHE_CHECK([m4_ifval($1,$1,FLAGS) for maximum warnings], 103 | VAR,[VAR="no, unknown" 104 | AC_LANG_SAVE 105 | AC_LANG_CPLUSPLUS 106 | ac_save_[]FLAGS="$[]FLAGS" 107 | for ac_arg dnl 108 | in "-pedantic % -Wall" dnl GCC 109 | "-xstrconst % -v" dnl Solaris C 110 | "-std1 % -verbose -w0 -warnprotos" dnl Digital Unix 111 | "-qlanglvl=ansi % -qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd" dnl AIX 112 | "-ansi -ansiE % -fullwarn" dnl IRIX 113 | "+ESlit % +w1" dnl HP-UX C 114 | "-Xc % -pvctl[,]fullmsg" dnl NEC SX-5 (Super-UX 10) 115 | "-h conform % -h msglevel 2" dnl Cray C (Unicos) 116 | # 117 | do FLAGS="$ac_save_[]FLAGS "`echo $ac_arg | sed -e 's,%%.*,,' -e 's,%,,'` 118 | AC_TRY_COMPILE([],[return 0;], 119 | [VAR=`echo $ac_arg | sed -e 's,.*% *,,'` ; break]) 120 | done 121 | FLAGS="$ac_save_[]FLAGS" 122 | AC_LANG_RESTORE 123 | ]) 124 | case ".$VAR" in 125 | .ok|.ok,*) m4_ifvaln($3,$3) ;; 126 | .|.no|.no,*) m4_ifvaln($4,$4,[m4_ifval($2,[ 127 | AC_RUN_LOG([: m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $2"]) 128 | m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $2"])]) ;; 129 | *) m4_ifvaln($3,$3,[ 130 | if echo " $[]m4_ifval($1,$1,FLAGS) " | grep " $VAR " 2>&1 >/dev/null 131 | then AC_RUN_LOG([: m4_ifval($1,$1,FLAGS) does contain $VAR]) 132 | else AC_RUN_LOG([: m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $VAR"]) 133 | m4_ifval($1,$1,FLAGS)="$m4_ifval($1,$1,FLAGS) $VAR" 134 | fi ]) ;; 135 | esac 136 | AS_VAR_POPDEF([VAR])dnl 137 | AS_VAR_POPDEF([FLAGS])dnl 138 | ]) 139 | 140 | dnl implementation tactics: 141 | dnl the for-argument contains a list of options. The first part of 142 | dnl these does only exist to detect the compiler - usually it is 143 | dnl a global option to enable -ansi or -extrawarnings. All other 144 | dnl compilers will fail about it. That was needed since a lot of 145 | dnl compilers will give false positives for some option-syntax 146 | dnl like -Woption or -Xoption as they think of it is a pass-through 147 | dnl to later compile stages or something. The "%" is used as a 148 | dnl delimimiter. A non-option comment can be given after "%%" marks 149 | dnl which will be shown but not added to the respective C/CXXFLAGS. 150 | -------------------------------------------------------------------------------- /m4/ax_create_pkgconfig_info.m4: -------------------------------------------------------------------------------- 1 | dnl @* AX_CREATE_PKGCONFIG_INFO [(outputfile, [requires [,libs [,summary [,cflags [,ldflags]]]]])] 2 | dnl defaults: 3 | dnl $1 = $PACKAGE_NAME.pc 4 | dnl $2 = (empty) 5 | dnl $3 = $PACKAGE_LIBS $LIBS (as set at that point in configure.ac) 6 | dnl $4 = $PACKAGE_SUMMARY (or $1 Library) 7 | dnl $5 = $CPPFLAGS $PACKAGE_CFLAGS (as set at the point in configure.ac) 8 | dnl $6 = $LDFLAGS $PACKAGE_LDFLAGS (as set at the point in configure.ac) 9 | dnl 10 | dnl PACKAGE_NAME defaults to $PACKAGE if not set. 11 | dnl PACKAGE_LIBS defaults to -l$PACKAGE_NAME if not set. 12 | dnl 13 | dnl the resulting file is called $PACKAGE.pc.in / $PACKAGE.pc 14 | dnl 15 | dnl You will find this macro most useful in conjunction with ax_spec_defaults 16 | dnl that can read good initializers from the .spec file. In consequencd, most 17 | dnl of the generatable installable stuff can be made from information being 18 | dnl updated in a single place for the whole project. 19 | dnl 20 | dnl @$Id: ax_create_pkgconfig_info.m4,v 1.2 2005/01/06 19:56:30 guidod Exp $ 21 | 22 | AC_DEFUN([AX_CREATE_PKGCONFIG_INFO],[dnl 23 | AS_VAR_PUSHDEF([PKGCONFIG_suffix],[ax_create_pkgconfig_suffix])dnl 24 | AS_VAR_PUSHDEF([PKGCONFIG_libdir],[ax_create_pkgconfig_libdir])dnl 25 | AS_VAR_PUSHDEF([PKGCONFIG_libfile],[ax_create_pkgconfig_libfile])dnl 26 | AS_VAR_PUSHDEF([PKGCONFIG_libname],[ax_create_pkgconfig_libname])dnl 27 | AS_VAR_PUSHDEF([PKGCONFIG_version],[ax_create_pkgconfig_version])dnl 28 | AS_VAR_PUSHDEF([PKGCONFIG_description],[ax_create_pkgconfig_description])dnl 29 | AS_VAR_PUSHDEF([PKGCONFIG_requires],[ax_create_pkgconfig_requires])dnl 30 | AS_VAR_PUSHDEF([PKGCONFIG_pkglibs],[ax_create_pkgconfig_pkglibs])dnl 31 | AS_VAR_PUSHDEF([PKGCONFIG_libs],[ax_create_pkgconfig_libs])dnl 32 | AS_VAR_PUSHDEF([PKGCONFIG_ldflags],[ax_create_pkgconfig_ldflags])dnl 33 | AS_VAR_PUSHDEF([PKGCONFIG_cppflags],[ax_create_pkgconfig_cppflags])dnl 34 | AS_VAR_PUSHDEF([PKGCONFIG_generate],[ax_create_pkgconfig_generate])dnl 35 | AS_VAR_PUSHDEF([PKGCONFIG_src_libdir],[ax_create_pkgconfig_src_libdir])dnl 36 | AS_VAR_PUSHDEF([PKGCONFIG_src_headers],[ax_create_pkgconfig_src_headers])dnl 37 | 38 | # we need the expanded forms... 39 | test "x$prefix" = xNONE && prefix=$ac_default_prefix 40 | test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' 41 | 42 | AC_MSG_CHECKING(our pkgconfig libname) 43 | test ".$PKGCONFIG_libname" != "." || \ 44 | PKGCONFIG_libname="ifelse($1,,${PACKAGE_NAME},`basename $1 .pc`)" 45 | test ".$PKGCONFIG_libname" != "." || \ 46 | PKGCONFIG_libname="$PACKAGE" 47 | PKGCONFIG_libname=`eval echo "$PKGCONFIG_libname"` 48 | PKGCONFIG_libname=`eval echo "$PKGCONFIG_libname"` 49 | AC_MSG_RESULT($PKGCONFIG_libname) 50 | 51 | AC_MSG_CHECKING(our pkgconfig version) 52 | test ".$PKGCONFIG_version" != "." || \ 53 | PKGCONFIG_version="${PACKAGE_VERSION}" 54 | test ".$PKGCONFIG_version" != "." || \ 55 | PKGCONFIG_version="$VERSION" 56 | PKGCONFIG_version=`eval echo "$PKGCONFIG_version"` 57 | PKGCONFIG_version=`eval echo "$PKGCONFIG_version"` 58 | AC_MSG_RESULT($PKGCONFIG_version) 59 | 60 | AC_MSG_CHECKING(our pkgconfig_libdir) 61 | test ".$pkgconfig_libdir" = "." && \ 62 | pkgconfig_libdir='${libdir}/pkgconfig' 63 | PKGCONFIG_libdir=`eval echo "$pkgconfig_libdir"` 64 | PKGCONFIG_libdir=`eval echo "$PKGCONFIG_libdir"` 65 | PKGCONFIG_libdir=`eval echo "$PKGCONFIG_libdir"` 66 | AC_MSG_RESULT($pkgconfig_libdir) 67 | test "$pkgconfig_libdir" != "$PKGCONFIG_libdir" && ( 68 | AC_MSG_RESULT(expanded our pkgconfig_libdir... $PKGCONFIG_libdir)) 69 | AC_SUBST([pkgconfig_libdir]) 70 | 71 | AC_MSG_CHECKING(our pkgconfig_libfile) 72 | test ".$pkgconfig_libfile" != "." || \ 73 | pkgconfig_libfile="ifelse($1,,$PKGCONFIG_libname.pc,`basename $1`)" 74 | PKGCONFIG_libfile=`eval echo "$pkgconfig_libfile"` 75 | PKGCONFIG_libfile=`eval echo "$PKGCONFIG_libfile"` 76 | AC_MSG_RESULT($pkgconfig_libfile) 77 | test "$pkgconfig_libfile" != "$PKGCONFIG_libfile" && ( 78 | AC_MSG_RESULT(expanded our pkgconfig_libfile... $PKGCONFIG_libfile)) 79 | AC_SUBST([pkgconfig_libfile]) 80 | 81 | AC_MSG_CHECKING(our package / suffix) 82 | PKGCONFIG_suffix="$program_suffix" 83 | test ".$PKGCONFIG_suffix" != .NONE || PKGCONFIG_suffix="" 84 | AC_MSG_RESULT(${PACKAGE_NAME} / ${PKGCONFIG_suffix}) 85 | 86 | AC_MSG_CHECKING(our pkgconfig description) 87 | PKGCONFIG_description="ifelse($4,,$PACKAGE_SUMMARY,$4)" 88 | test ".$PKGCONFIG_description" != "." || \ 89 | PKGCONFIG_description="$PKGCONFIG_libname Library" 90 | PKGCONFIG_description=`eval echo "$PKGCONFIG_description"` 91 | PKGCONFIG_description=`eval echo "$PKGCONFIG_description"` 92 | AC_MSG_RESULT($PKGCONFIG_description) 93 | 94 | AC_MSG_CHECKING(our pkgconfig requires) 95 | PKGCONFIG_requires="ifelse($2,,$PACKAGE_REQUIRES,$2)" 96 | PKGCONFIG_requires=`eval echo "$PKGCONFIG_requires"` 97 | PKGCONFIG_requires=`eval echo "$PKGCONFIG_requires"` 98 | AC_MSG_RESULT($PKGCONFIG_requires) 99 | 100 | AC_MSG_CHECKING(our pkgconfig ext libs) 101 | PKGCONFIG_pkglibs="$PACKAGE_LIBS" 102 | test ".$PKGCONFIG_pkglibs" != "." || PKGCONFIG_pkglibs="-l$PKGCONFIG_libname" 103 | PKGCONFIG_libs="ifelse($3,,$PKGCONFIG_pkglibs $LIBS,$3)" 104 | PKGCONFIG_libs=`eval echo "$PKGCONFIG_libs"` 105 | PKGCONFIG_libs=`eval echo "$PKGCONFIG_libs"` 106 | AC_MSG_RESULT($PKGCONFIG_libs) 107 | 108 | AC_MSG_CHECKING(our pkgconfig cppflags) 109 | PKGCONFIG_cppflags="ifelse($5,,$CPPFLAGS $PACKAGE_CFLAGS,$5)" 110 | PKGCONFIG_cppflags=`eval echo "$PKGCONFIG_cppflags"` 111 | PKGCONFIG_cppflags=`eval echo "$PKGCONFIG_cppflags"` 112 | AC_MSG_RESULT($PKGCONFIG_cppflags) 113 | 114 | AC_MSG_CHECKING(our pkgconfig ldflags) 115 | PKGCONFIG_ldflags="ifelse($6,,$LDFLAGS $PACKAGE_LDFLAGS,$5)" 116 | PKGCONFIG_ldflags=`eval echo "$PKGCONFIG_ldflags"` 117 | PKGCONFIG_ldflags=`eval echo "$PKGCONFIG_ldflags"` 118 | AC_MSG_RESULT($PKGCONFIG_ldflags) 119 | 120 | test ".$PKGCONFIG_generate" != "." || \ 121 | PKGCONFIG_generate="ifelse($1,,$PKGCONFIG_libname.pc,$1)" 122 | PKGCONFIG_generate=`eval echo "$PKGCONFIG_generate"` 123 | PKGCONFIG_generate=`eval echo "$PKGCONFIG_generate"` 124 | test "$pkgconfig_libfile" != "$PKGCONFIG_generate" && ( 125 | AC_MSG_RESULT(generate the pkgconfig later... $PKGCONFIG_generate)) 126 | 127 | if test ".$PKGCONFIG_src_libdir" = "." ; then 128 | PKGCONFIG_src_libdir=`pwd` 129 | PKGCONFIG_src_libdir=`AS_DIRNAME("$PKGCONFIG_src_libdir/$PKGCONFIG_generate")` 130 | test ! -d $PKGCONFIG_src_libdir/src || \ 131 | PKGCONFIG_src_libdir="$PKGCONFIG_src_libdir/src" 132 | case ".$objdir" in 133 | *libs) PKGCONFIG_src_libdir="$PKGCONFIG_src_libdir/$objdir" ;; esac 134 | AC_MSG_RESULT(noninstalled pkgconfig -L $PKGCONFIG_src_libdir) 135 | fi 136 | 137 | if test ".$PKGCONFIG_src_headers" = "." ; then 138 | PKGCONFIG_src_headers=`pwd` 139 | v="$ac_top_srcdir" ; 140 | test ".$v" != "." || v="$ax_spec_dir" 141 | test ".$v" != "." || v="$srcdir" 142 | case "$v" in /*) PKG_CONFIG_src_headers="" ;; esac 143 | PKGCONFIG_src_headers=`AS_DIRNAME("$PKGCONFIG_src_headers/$v/x")` 144 | test ! -d $PKGCONFIG_src_headers/incl[]ude || \ 145 | PKGCONFIG_src_headers="$PKGCONFIG_src_headers/incl[]ude" 146 | AC_MSG_RESULT(noninstalled pkgconfig -I $PKGCONFIG_src_headers) 147 | fi 148 | 149 | 150 | dnl AC_CONFIG_COMMANDS crap disallows to use $PKGCONFIG_libfile here... 151 | AC_CONFIG_COMMANDS([$ax_create_pkgconfig_generate],[ 152 | pkgconfig_generate="$ax_create_pkgconfig_generate" 153 | if test ! -f "$pkgconfig_generate.in" 154 | then generate="true" 155 | elif grep ' generated by configure ' $pkgconfig_generate.in >/dev/null 156 | then generate="true" 157 | else generate="false"; 158 | fi 159 | if $generate ; then 160 | AC_MSG_NOTICE(creating $pkgconfig_generate.in) 161 | cat > $pkgconfig_generate.in <conftest.sed < $pkgconfig_generate 203 | if test ! -s $pkgconfig_generate ; then 204 | AC_MSG_ERROR([$pkgconfig_generate is empty]) 205 | fi ; rm conftest.sed # DONE generate $pkgconfig_generate 206 | pkgconfig_uninstalled=`echo $pkgconfig_generate |sed 's/.pc$/-uninstalled.pc/'` 207 | AC_MSG_NOTICE(creating $pkgconfig_uninstalled) 208 | cat >conftest.sed < $pkgconfig_uninstalled 228 | if test ! -s $pkgconfig_uninstalled ; then 229 | AC_MSG_ERROR([$pkgconfig_uninstalled is empty]) 230 | fi ; rm conftest.sed # DONE generate $pkgconfig_uninstalled 231 | pkgconfig_requires_add=`echo ${pkgconfig_requires}` 232 | if test ".$pkgconfig_requires_add" != "." ; then 233 | pkgconfig_requires_add="pkg-config $pkgconfig_requires_add" 234 | else pkgconfig_requires_add=":" ; fi 235 | pkgconfig_uninstalled=`echo $pkgconfig_generate |sed 's/.pc$/-uninstalled.sh/'` 236 | AC_MSG_NOTICE(creating $pkgconfig_uninstalled) 237 | cat >conftest.sed <Name:>for option\\; do case \"\$option\" in --list-all|--name) echo > 256 | s>Description: *>\\;\\; --help) pkg-config --help \\; echo Buildscript Of > 257 | s>Version: *>\\;\\; --modversion|--version) echo > 258 | s>Requires:>\\;\\; --requires) echo $pkgconfig_requires_add> 259 | s>Libs: *>\\;\\; --libs) echo > 260 | s>Cflags: *>\\;\\; --cflags) echo > 261 | /--libs)/a\\ 262 | $pkgconfig_requires_add 263 | /--cflags)/a\\ 264 | $pkgconfig_requires_add\\ 265 | ;; --variable=*) eval echo '\$'\`echo \$option | sed -e 's/.*=//'\`\\ 266 | ;; --uninstalled) exit 0 \\ 267 | ;; *) ;; esac done 268 | AXEOF 269 | sed -f conftest.sed $pkgconfig_generate.in > $pkgconfig_uninstalled 270 | if test ! -s $pkgconfig_uninstalled ; then 271 | AC_MSG_ERROR([$pkgconfig_uninstalled is empty]) 272 | fi ; rm conftest.sed # DONE generate $pkgconfig_uninstalled 273 | ],[ 274 | dnl AC_CONFIG_COMMANDS crap, the AS_PUSHVAR defines are invalid here... 275 | ax_create_pkgconfig_generate="$ax_create_pkgconfig_generate" 276 | pkgconfig_prefix='$prefix' 277 | pkgconfig_execprefix='$exec_prefix' 278 | pkgconfig_bindir='$bindir' 279 | pkgconfig_libdir='$libdir' 280 | pkgconfig_includedir='$includedir' 281 | pkgconfig_datarootdir='$datarootdir' 282 | pkgconfig_datadir='$datadir' 283 | pkgconfig_sysconfdir='$sysconfdir' 284 | pkgconfig_suffix='$ax_create_pkgconfig_suffix' 285 | pkgconfig_package='$PACKAGE_NAME' 286 | pkgconfig_libname='$ax_create_pkgconfig_libname' 287 | pkgconfig_description='$ax_create_pkgconfig_description' 288 | pkgconfig_version='$ax_create_pkgconfig_version' 289 | pkgconfig_requires='$ax_create_pkgconfig_requires' 290 | pkgconfig_libs='$ax_create_pkgconfig_libs' 291 | pkgconfig_ldflags='$ax_create_pkgconfig_ldflags' 292 | pkgconfig_cppflags='$ax_create_pkgconfig_cppflags' 293 | pkgconfig_src_libdir='$ax_create_pkgconfig_src_libdir' 294 | pkgconfig_src_headers='$ax_create_pkgconfig_src_headers' 295 | ])dnl 296 | AS_VAR_POPDEF([PKGCONFIG_suffix])dnl 297 | AS_VAR_POPDEF([PKGCONFIG_libdir])dnl 298 | AS_VAR_POPDEF([PKGCONFIG_libfile])dnl 299 | AS_VAR_POPDEF([PKGCONFIG_libname])dnl 300 | AS_VAR_POPDEF([PKGCONFIG_version])dnl 301 | AS_VAR_POPDEF([PKGCONFIG_description])dnl 302 | AS_VAR_POPDEF([PKGCONFIG_requires])dnl 303 | AS_VAR_POPDEF([PKGCONFIG_pkglibs])dnl 304 | AS_VAR_POPDEF([PKGCONFIG_libs])dnl 305 | AS_VAR_POPDEF([PKGCONFIG_ldflags])dnl 306 | AS_VAR_POPDEF([PKGCONFIG_cppflags])dnl 307 | AS_VAR_POPDEF([PKGCONFIG_generate])dnl 308 | AS_VAR_POPDEF([PKGCONFIG_src_libdir])dnl 309 | AS_VAR_POPDEF([PKGCONFIG_src_headers])dnl 310 | ]) 311 | -------------------------------------------------------------------------------- /m4/ax_cxx_compile_stdcxx_11.m4: -------------------------------------------------------------------------------- 1 | # ============================================================================ 2 | # http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html 3 | # ============================================================================ 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CXX_COMPILE_STDCXX_11([ext|noext], [mandatory|optional]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check for baseline language coverage in the compiler for the C++11 12 | # standard; if necessary, add switches to CXX and CXXCPP to enable 13 | # support. 14 | # 15 | # This macro is a convenience alias for calling the AX_CXX_COMPILE_STDCXX 16 | # macro with the version set to C++11. The two optional arguments are 17 | # forwarded literally as the second and third argument respectively. 18 | # Please see the documentation for the AX_CXX_COMPILE_STDCXX macro for 19 | # more information. If you want to use this macro, you also need to 20 | # download the ax_cxx_compile_stdcxx.m4 file. 21 | # 22 | # LICENSE 23 | # 24 | # Copyright (c) 2008 Benjamin Kosnik 25 | # Copyright (c) 2012 Zack Weinberg 26 | # Copyright (c) 2013 Roy Stogner 27 | # Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov 28 | # Copyright (c) 2015 Paul Norman 29 | # Copyright (c) 2015 Moritz Klammler 30 | # 31 | # Copying and distribution of this file, with or without modification, are 32 | # permitted in any medium without royalty provided the copyright notice 33 | # and this notice are preserved. This file is offered as-is, without any 34 | # warranty. 35 | 36 | #serial 17 37 | 38 | AX_REQUIRE_DEFINED([AX_CXX_COMPILE_STDCXX]) 39 | AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [AX_CXX_COMPILE_STDCXX([11], [$1], [$2])]) 40 | -------------------------------------------------------------------------------- /m4/ax_detect_clang.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_DETECT_CLANG], [ 2 | AC_SUBST(CLANG_CXXFLAGS) 3 | AC_SUBST(CLANG_LDFLAGS) 4 | AC_SUBST(CLANG_LIBS) 5 | AC_PROG_GREP 6 | AC_PROG_SED 7 | llvm_config="llvm-config" 8 | AC_CHECK_PROG([llvm_config_found], ["$llvm_config"], [yes]) 9 | if test "x$with_clang_prefix" != "x"; then 10 | llvm_config="$with_clang_prefix/bin/llvm-config" 11 | if test -x "$llvm_config"; then 12 | llvm_config_found=yes 13 | fi 14 | fi 15 | if test "$llvm_config_found" != yes; then 16 | AC_MSG_ERROR([llvm-config not found]) 17 | fi 18 | CLANG_CXXFLAGS=`$llvm_config --cxxflags | \ 19 | $SED -e 's/-Wcovered-switch-default//;s/-gsplit-dwarf//'` 20 | CLANG_LDFLAGS=`$llvm_config --ldflags` 21 | targets=`$llvm_config --targets-built` 22 | components="$targets asmparser bitreader support mc" 23 | $llvm_config --components | $GREP option > /dev/null 2> /dev/null 24 | if test $? -eq 0; then 25 | components="$components option" 26 | fi 27 | CLANG_LIBS=`$llvm_config --libs $components` 28 | systemlibs=`$llvm_config --system-libs 2> /dev/null | tail -1` 29 | if test $? -eq 0; then 30 | CLANG_LIBS="$CLANG_LIBS $systemlibs" 31 | fi 32 | CLANG_PREFIX=`$llvm_config --prefix` 33 | AC_DEFINE_UNQUOTED(CLANG_PREFIX, ["$CLANG_PREFIX"], [Clang installation prefix]) 34 | 35 | SAVE_CPPFLAGS="$CPPFLAGS" 36 | CPPFLAGS="$CLANG_CXXFLAGS $CPPFLAGS" 37 | AC_LANG_PUSH(C++) 38 | AC_CHECK_HEADER([clang/Basic/SourceLocation.h], [], 39 | [AC_ERROR([clang header file not found])]) 40 | AC_EGREP_HEADER([getDefaultTargetTriple], [llvm/Support/Host.h], [], 41 | [AC_DEFINE([getDefaultTargetTriple], [getHostTriple], 42 | [Define to getHostTriple for older versions of clang])]) 43 | AC_EGREP_HEADER([getExpansionLineNumber], [clang/Basic/SourceLocation.h], [], 44 | [AC_DEFINE([getExpansionLineNumber], [getInstantiationLineNumber], 45 | [Define to getInstantiationLineNumber for older versions of clang])]) 46 | AC_EGREP_HEADER([getImmediateExpansionRange], [clang/Basic/SourceManager.h], 47 | [], 48 | [AC_DEFINE([getImmediateExpansionRange], 49 | [getImmediateInstantiationRange], 50 | [Define to getImmediateInstantiationRange for older versions of clang])] 51 | ) 52 | AC_EGREP_HEADER([DiagnosticsEngine], [clang/Basic/Diagnostic.h], [], 53 | [AC_DEFINE([DiagnosticsEngine], [Diagnostic], 54 | [Define to Diagnostic for older versions of clang])]) 55 | AC_EGREP_HEADER([ArrayRef], [clang/Driver/Driver.h], 56 | [AC_DEFINE([USE_ARRAYREF], [], 57 | [Define if Driver::BuildCompilation takes ArrayRef])]) 58 | AC_EGREP_HEADER([CXXIsProduction], [clang/Driver/Driver.h], 59 | [AC_DEFINE([HAVE_CXXISPRODUCTION], [], 60 | [Define if Driver constructor takes CXXIsProduction argument])]) 61 | AC_EGREP_HEADER([ IsProduction], [clang/Driver/Driver.h], 62 | [AC_DEFINE([HAVE_ISPRODUCTION], [], 63 | [Define if Driver constructor takes IsProduction argument])]) 64 | AC_TRY_COMPILE([#include ], [ 65 | using namespace clang; 66 | DiagnosticsEngine *Diags; 67 | new driver::Driver("", "", "", *Diags); 68 | ], [AC_DEFINE([DRIVER_CTOR_TAKES_DEFAULTIMAGENAME], [], 69 | [Define if Driver constructor takes default image name])]) 70 | AC_EGREP_HEADER([void HandleTopLevelDecl\(], [clang/AST/ASTConsumer.h], 71 | [AC_DEFINE([HandleTopLevelDeclReturn], [void], 72 | [Return type of HandleTopLevelDeclReturn]) 73 | AC_DEFINE([HandleTopLevelDeclContinue], [], 74 | [Return type of HandleTopLevelDeclReturn])], 75 | [AC_DEFINE([HandleTopLevelDeclReturn], [bool], 76 | [Return type of HandleTopLevelDeclReturn]) 77 | AC_DEFINE([HandleTopLevelDeclContinue], [true], 78 | [Return type of HandleTopLevelDeclReturn])]) 79 | AC_CHECK_HEADER([clang/Basic/DiagnosticOptions.h], 80 | [AC_DEFINE([HAVE_BASIC_DIAGNOSTICOPTIONS_H], [], 81 | [Define if clang/Basic/DiagnosticOptions.h exists])]) 82 | AC_CHECK_HEADER([clang/Lex/PreprocessorOptions.h], 83 | [AC_DEFINE([HAVE_LEX_PREPROCESSOROPTIONS_H], [], 84 | [Define if clang/Lex/PreprocessorOptions.h exists])], [], 85 | [#include ]) 86 | AC_TRY_COMPILE([#include ], [ 87 | using namespace clang; 88 | std::shared_ptr TO; 89 | DiagnosticsEngine *Diags; 90 | TargetInfo::CreateTargetInfo(*Diags, TO); 91 | ], [AC_DEFINE([CREATETARGETINFO_TAKES_SHARED_PTR], [], 92 | [Define if TargetInfo::CreateTargetInfo takes shared_ptr])]) 93 | AC_TRY_COMPILE([#include ], [ 94 | using namespace clang; 95 | TargetOptions *TO; 96 | DiagnosticsEngine *Diags; 97 | TargetInfo::CreateTargetInfo(*Diags, TO); 98 | ], [AC_DEFINE([CREATETARGETINFO_TAKES_POINTER], [], 99 | [Define if TargetInfo::CreateTargetInfo takes pointer])]) 100 | AC_TRY_COMPILE([#include ], [ 101 | using namespace clang; 102 | DiagnosticConsumer *client; 103 | CompilerInstance *Clang; 104 | Clang->createDiagnostics(client); 105 | ], [], [AC_DEFINE([CREATEDIAGNOSTICS_TAKES_ARG], [], 106 | [Define if CompilerInstance::createDiagnostics takes argc and argv])]) 107 | AC_TRY_COMPILE([#include ], [ 108 | using namespace clang; 109 | HeaderSearchOptions HSO; 110 | HSO.AddPath("", frontend::Angled, false, false); 111 | ], [AC_DEFINE([ADDPATH_TAKES_4_ARGUMENTS], [], 112 | [Define if HeaderSearchOptions::AddPath takes 4 arguments])]) 113 | AC_EGREP_HEADER([getNumParams], 114 | [clang/AST/CanonicalType.h], 115 | [AC_DEFINE([getNumArgs], [getNumParams], 116 | [Define to getNumParams for newer versions of clang]) 117 | AC_DEFINE([getArgType], [getParamType], 118 | [Define to getParamType for newer versions of clang])]) 119 | AC_EGREP_HEADER([getReturnType], 120 | [clang/AST/CanonicalType.h], [], 121 | [AC_DEFINE([getReturnType], [getResultType], 122 | [Define to getResultType for older versions of clang])]) 123 | AC_TRY_COMPILE([#include ], [ 124 | using namespace clang; 125 | CompilerInstance *Clang; 126 | Clang->createPreprocessor(TU_Complete); 127 | ], [AC_DEFINE([CREATEPREPROCESSOR_TAKES_TUKIND], [], 128 | [Define if CompilerInstance::createPreprocessor takes TranslationUnitKind])]) 129 | AC_EGREP_HEADER([setMainFileID], [clang/Basic/SourceManager.h], 130 | [AC_DEFINE([HAVE_SETMAINFILEID], [], 131 | [Define if SourceManager has a setMainFileID method])]) 132 | AC_CHECK_HEADER([llvm/ADT/OwningPtr.h], 133 | [AC_DEFINE([HAVE_ADT_OWNINGPTR_H], [], 134 | [Define if llvm/ADT/OwningPtr.h exists])]) 135 | AC_EGREP_HEADER([initializeBuiltins], 136 | [clang/Basic/Builtins.h], [], 137 | [AC_DEFINE([initializeBuiltins], [InitializeBuiltins], 138 | [Define to InitializeBuiltins for older versions of clang])]) 139 | AC_EGREP_HEADER([IK_C], [clang/Frontend/FrontendOptions.h], [], 140 | [AC_DEFINE([IK_C], [InputKind::C], 141 | [Define to InputKind::C for newer versions of clang])]) 142 | AC_TRY_COMPILE([ 143 | #include 144 | #include 145 | #include 146 | ], [ 147 | using namespace clang; 148 | CompilerInstance *Clang; 149 | TargetOptions TO; 150 | llvm::Triple T(TO.Triple); 151 | PreprocessorOptions PO; 152 | CompilerInvocation::setLangDefaults(Clang->getLangOpts(), IK_C, 153 | T, PO, LangStandard::lang_unspecified); 154 | ], [AC_DEFINE([SETLANGDEFAULTS_TAKES_5_ARGUMENTS], [], 155 | [Define if CompilerInvocation::setLangDefaults takes 5 arguments])]) 156 | AC_TRY_COMPILE([ 157 | #include 158 | #include 159 | ], [ 160 | using namespace clang; 161 | CompilerInvocation *invocation; 162 | CompilerInstance *Clang; 163 | Clang->setInvocation(std::make_shared(*invocation)); 164 | ], [AC_DEFINE([SETINVOCATION_TAKES_SHARED_PTR], [], 165 | [Defined if CompilerInstance::setInvocation takes a shared_ptr])]) 166 | AC_CHECK_HEADER([llvm/Option/Arg.h], 167 | [AC_DEFINE([HAVE_LLVM_OPTION_ARG_H], [], 168 | [Define if llvm/Option/Arg.h exists])]) 169 | AC_LANG_POP 170 | CPPFLAGS="$SAVE_CPPFLAGS" 171 | 172 | SAVE_LDFLAGS="$LDFLAGS" 173 | LDFLAGS="$CLANG_LDFLAGS $LDFLAGS" 174 | AC_SUBST(LIB_CLANG_EDIT) 175 | AC_CHECK_LIB([clangEdit], [main], [LIB_CLANG_EDIT=-lclangEdit], []) 176 | LDFLAGS="$SAVE_LDFLAGS" 177 | ]) 178 | -------------------------------------------------------------------------------- /m4/ax_submodule.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([_AX_SUBMODULE], 2 | [ 3 | 4 | m4_if(m4_bregexp($3,|,choice),choice, 5 | [AC_ARG_WITH($2, 6 | [AS_HELP_STRING([--with-$1=$3], 7 | [Which $1 to use [default=$4]])])]) 8 | case "system" in 9 | $3) 10 | AC_ARG_WITH($2_prefix, 11 | [AS_HELP_STRING([--with-$1-prefix=DIR], 12 | [Prefix of $1 installation])]) 13 | AC_ARG_WITH($2_exec_prefix, 14 | [AS_HELP_STRING([--with-$1-exec-prefix=DIR], 15 | [Exec prefix of $1 installation])]) 16 | esac 17 | m4_if(m4_bregexp($3,build,build),build, 18 | [AC_ARG_WITH($2_builddir, 19 | [AS_HELP_STRING([--with-$1-builddir=DIR], 20 | [Location of $1 builddir])])]) 21 | if test "x$with_$2_prefix" != "x" -a "x$with_$2_exec_prefix" = "x"; then 22 | with_$2_exec_prefix=$with_$2_prefix 23 | fi 24 | if test "x$with_$2_prefix" != "x" -o "x$with_$2_exec_prefix" != "x"; then 25 | if test "x$with_$2" != "x" -a "x$with_$2" != "xsystem"; then 26 | AC_MSG_ERROR([Setting $with_$2_prefix implies use of system $1]) 27 | fi 28 | with_$2="system" 29 | fi 30 | if test "x$with_$2_builddir" != "x"; then 31 | if test "x$with_$2" != "x" -a "x$with_$2" != "xbuild"; then 32 | AC_MSG_ERROR([Setting $with_$2_builddir implies use of build $1]) 33 | fi 34 | with_$2="build" 35 | $2_srcdir=`echo @abs_srcdir@ | $with_$2_builddir/config.status --file=-` 36 | AC_MSG_NOTICE($1 sources in $$2_srcdir) 37 | fi 38 | if test "x$with_$2_exec_prefix" != "x"; then 39 | export PKG_CONFIG_PATH="$with_$2_exec_prefix/lib/pkgconfig${PKG_CONFIG_PATH+:$PKG_CONFIG_PATH}" 40 | fi 41 | case "$with_$2" in 42 | $3) 43 | ;; 44 | *) 45 | case "$4" in 46 | bundled) 47 | if test -d $srcdir/.git -a \ 48 | -d $srcdir/$1; then if test \ 49 | "`cd $srcdir; git submodule status $1 | cut -c1`" = '-'; then 50 | AC_MSG_WARN([git repo detected, but submodule $1 not initialized]) 51 | AC_MSG_WARN([You may want to run]) 52 | AC_MSG_WARN([ git submodule init]) 53 | AC_MSG_WARN([ git submodule update]) 54 | AC_MSG_WARN([ sh autogen.sh]) 55 | fi fi 56 | if test -f $srcdir/$1/configure; then 57 | with_$2="bundled" 58 | else 59 | case "system" in 60 | $3) 61 | with_$2="system" 62 | ;; 63 | *) 64 | with_$2="no" 65 | ;; 66 | esac 67 | fi 68 | ;; 69 | *) 70 | with_$2="$4" 71 | ;; 72 | esac 73 | ;; 74 | esac 75 | AC_MSG_CHECKING([which $1 to use]) 76 | AC_MSG_RESULT($with_$2) 77 | 78 | ]) 79 | 80 | AC_DEFUN([AX_SUBMODULE], [ 81 | _AX_SUBMODULE($1, m4_bpatsubst([$1], 82 | [[^_abcdefghijklmnopqrstuvwxyz0123456789]],[_]), $2, $3) 83 | ]) 84 | -------------------------------------------------------------------------------- /m4/pkg.m4: -------------------------------------------------------------------------------- 1 | # pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- 2 | # 3 | # Copyright © 2004 Scott James Remnant . 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation; either version 2 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, but 11 | # WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program; if not, write to the Free Software 17 | # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | # 19 | # As a special exception to the GNU General Public License, if you 20 | # distribute this file as part of a program that contains a 21 | # configuration script generated by Autoconf, you may include it under 22 | # the same distribution terms that you use for the rest of that program. 23 | 24 | # PKG_PROG_PKG_CONFIG([MIN-VERSION]) 25 | # ---------------------------------- 26 | AC_DEFUN([PKG_PROG_PKG_CONFIG], 27 | [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) 28 | m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) 29 | AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl 30 | if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then 31 | AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) 32 | fi 33 | if test -n "$PKG_CONFIG"; then 34 | _pkg_min_version=m4_default([$1], [0.9.0]) 35 | AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) 36 | if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then 37 | AC_MSG_RESULT([yes]) 38 | else 39 | AC_MSG_RESULT([no]) 40 | PKG_CONFIG="" 41 | fi 42 | 43 | fi[]dnl 44 | ])# PKG_PROG_PKG_CONFIG 45 | 46 | # PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) 47 | # 48 | # Check to see whether a particular set of modules exists. Similar 49 | # to PKG_CHECK_MODULES(), but does not set variables or print errors. 50 | # 51 | # 52 | # Similar to PKG_CHECK_MODULES, make sure that the first instance of 53 | # this or PKG_CHECK_MODULES is called, or make sure to call 54 | # PKG_CHECK_EXISTS manually 55 | # -------------------------------------------------------------- 56 | AC_DEFUN([PKG_CHECK_EXISTS], 57 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 58 | if test -n "$PKG_CONFIG" && \ 59 | AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then 60 | m4_ifval([$2], [$2], [:]) 61 | m4_ifvaln([$3], [else 62 | $3])dnl 63 | fi]) 64 | 65 | 66 | # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) 67 | # --------------------------------------------- 68 | m4_define([_PKG_CONFIG], 69 | [if test -n "$$1"; then 70 | pkg_cv_[]$1="$$1" 71 | elif test -n "$PKG_CONFIG"; then 72 | PKG_CHECK_EXISTS([$3], 73 | [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], 74 | [pkg_failed=yes]) 75 | else 76 | pkg_failed=untried 77 | fi[]dnl 78 | ])# _PKG_CONFIG 79 | 80 | # _PKG_SHORT_ERRORS_SUPPORTED 81 | # ----------------------------- 82 | AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], 83 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) 84 | if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then 85 | _pkg_short_errors_supported=yes 86 | else 87 | _pkg_short_errors_supported=no 88 | fi[]dnl 89 | ])# _PKG_SHORT_ERRORS_SUPPORTED 90 | 91 | 92 | # PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], 93 | # [ACTION-IF-NOT-FOUND]) 94 | # 95 | # 96 | # Note that if there is a possibility the first call to 97 | # PKG_CHECK_MODULES might not happen, you should be sure to include an 98 | # explicit call to PKG_PROG_PKG_CONFIG in your configure.ac 99 | # 100 | # 101 | # -------------------------------------------------------------- 102 | AC_DEFUN([PKG_CHECK_MODULES], 103 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 104 | AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl 105 | AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl 106 | 107 | pkg_failed=no 108 | AC_MSG_CHECKING([for $1]) 109 | 110 | _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) 111 | _PKG_CONFIG([$1][_LIBS], [libs], [$2]) 112 | 113 | m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS 114 | and $1[]_LIBS to avoid the need to call pkg-config. 115 | See the pkg-config man page for more details.]) 116 | 117 | if test $pkg_failed = yes; then 118 | _PKG_SHORT_ERRORS_SUPPORTED 119 | if test $_pkg_short_errors_supported = yes; then 120 | $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors "$2" 2>&1` 121 | else 122 | $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors "$2" 2>&1` 123 | fi 124 | # Put the nasty error message in config.log where it belongs 125 | echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD 126 | 127 | ifelse([$4], , [AC_MSG_ERROR(dnl 128 | [Package requirements ($2) were not met: 129 | 130 | $$1_PKG_ERRORS 131 | 132 | Consider adjusting the PKG_CONFIG_PATH environment variable if you 133 | installed software in a non-standard prefix. 134 | 135 | _PKG_TEXT 136 | ])], 137 | [AC_MSG_RESULT([no]) 138 | $4]) 139 | elif test $pkg_failed = untried; then 140 | ifelse([$4], , [AC_MSG_FAILURE(dnl 141 | [The pkg-config script could not be found or is too old. Make sure it 142 | is in your PATH or set the PKG_CONFIG environment variable to the full 143 | path to pkg-config. 144 | 145 | _PKG_TEXT 146 | 147 | To get pkg-config, see .])], 148 | [$4]) 149 | else 150 | $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS 151 | $1[]_LIBS=$pkg_cv_[]$1[]_LIBS 152 | AC_MSG_RESULT([yes]) 153 | ifelse([$3], , :, [$3]) 154 | fi[]dnl 155 | ])# PKG_CHECK_MODULES 156 | -------------------------------------------------------------------------------- /src/Access.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _ACCESS_H_ 6 | #define _ACCESS_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Definitions.h" 14 | #include "Program.h" 15 | 16 | // class holding the access information 17 | class Access { 18 | public: 19 | Access() = delete; 20 | Access(const Access &other) = default; 21 | Access(std::string Name, machine_model MachineModel, model_options ModelOptions, isl::set Domain, 22 | std::map ElementSizes) 23 | : Name_(Name), MachineModel_(MachineModel), ModelOptions_(ModelOptions), Domain_(Domain), 24 | ElementSizes_(ElementSizes) {} 25 | 26 | // control the cache miss computation 27 | void initAccess(std::vector ParameterValues, isl::set Parameters); 28 | 29 | // compute base cache miss information 30 | void countCompulsoryMisses(isl::union_map First); 31 | void computeStackDistances(isl::union_map BetweenMap); 32 | void countCapacityMisses(); 33 | misses getResult() const { return Result_; }; 34 | 35 | // compute the capacity misses for additional cache sizes 36 | std::vector countCapacityMisses(std::vector CacheSizes); 37 | 38 | // get the access properties 39 | std::string getName() const { return Name_; } 40 | isl::set getDomain() const { return Domain_; } 41 | 42 | private: 43 | // helper methods 44 | piece createPiece(isl::set Domain, isl::qpolynomial Piece) const; 45 | void extractStackDistanceExpression(isl::union_pw_qpolynomial Count); 46 | void storeAffinePieces(); 47 | 48 | void applyEqualization(); 49 | void applyRasterization(); 50 | 51 | // enumerate non-affine dimensions to count all points 52 | std::vector findNonAffineDimensions(piece Piece) const; 53 | void enumerateNonAffineDimensions(piece Piece); 54 | 55 | // counting methods 56 | std::vector countAffineDimensions(piece Piece, std::vector Limits) const; 57 | std::vector enumerateNonAffinePoints(piece Piece, std::vector Limits) const; 58 | 59 | // helper functions to analyze piece and extract the affine expression 60 | bool isPieceAffine(piece Piece) const; 61 | isl::pw_aff extractAffineExpression(piece Piece) const; 62 | isl::aff extractAffineExpression(isl::qpolynomial, isl::set Domain, std::map Values) const; 63 | long getPieceSize(piece &Piece) const; 64 | 65 | // elimination helper methods 66 | int computeExponent(piece Piece) const; 67 | int computeDimensionExponent(int Dimension, piece Piece) const; 68 | 69 | isl::qpolynomial computeReplacement(std::map Replacements, piece Piece) const; 70 | 71 | // methods to eliminate floor terms due to equalization 72 | std::vector>> findEqualizationCandidates(piece Piece) const; 73 | std::vector computeSplits(std::vector> Candidate, piece Piece) const; 74 | std::vector equalizeCandidate(std::vector> Candidate, std::vector Splits, 75 | piece Piece) const; 76 | 77 | // methods to eliminate floor terms due to rasterization 78 | std::vector findRasterDimensions(piece Piece) const; 79 | std::vector computeMultipliers(std::vector Dimensions, piece Piece) const; 80 | std::vector rasterDimension(int Dimension, isl::val Multiplier, piece Piece) const; 81 | 82 | // method to verify splits 83 | bool verifySplit(piece Piece, std::vector Pieces) const; 84 | 85 | std::string Name_; 86 | isl::set Domain_; 87 | machine_model MachineModel_; 88 | model_options ModelOptions_; 89 | std::map ElementSizes_; 90 | 91 | isl::set Parameters_; 92 | std::vector ParameterValues_; 93 | std::vector Expression_; 94 | 95 | // store the stack distance 96 | long Misses_; 97 | std::vector Affine_; 98 | std::vector NonAffine_; 99 | std::vector> Constant_; 100 | 101 | // result of the cache miss computation 102 | misses Result_; 103 | }; 104 | 105 | // support sorting 106 | inline bool operator<(Access const &A, Access const &B) { return A.getName() < B.getName(); } 107 | 108 | #endif -------------------------------------------------------------------------------- /src/Definitions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _DEFINITIONS_H_ 6 | #define _DEFINITIONS_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | // computation options 16 | #define RASTERIZATION 1 17 | #define EQUALIZATION 1 18 | #define DIMENSION_COUNTING 1 19 | #define COMPUTE_CONFLICTS 1 20 | 21 | //#define TIMERS 1 22 | //#define ENUMERATE_POINTS 1 23 | 24 | // flags to control the verification steps 25 | // #ifndef NDEBUG 26 | // #define VERIFY_RESULT 1 27 | // #endif 28 | 29 | // struct defining the machine properties 30 | struct machine_model { 31 | long CacheLineSize; 32 | std::vector CacheSizes; 33 | }; 34 | 35 | // struct defining the model options 36 | struct model_options { 37 | bool ComputeBounds; 38 | }; 39 | 40 | // struct holding the term information 41 | struct term { 42 | isl::val Coefficient; 43 | // the exponents 44 | std::vector Parameters; 45 | std::vector Variables; 46 | std::vector Divisors; 47 | // the term 48 | isl::qpolynomial Polynomial; 49 | }; 50 | 51 | // struct holding the piece information 52 | struct piece { 53 | isl::set Domain; 54 | long Size; 55 | // polynomial 56 | isl::qpolynomial Polynomial; 57 | // terms of the polynomial 58 | std::vector Terms; 59 | // affine form 60 | isl::pw_aff Expression; 61 | }; 62 | 63 | // struct holding the cache misses and compute statistics 64 | struct misses { 65 | long Total; 66 | long Counted; 67 | long CompulsoryMisses; 68 | std::vector CapacityMisses; 69 | }; 70 | 71 | // struct holding access information 72 | enum AccessType { Read, Write }; 73 | struct access_info { 74 | std::string Name; 75 | std::string Access; 76 | AccessType ReadOrWrite; 77 | unsigned Start; 78 | unsigned Stop; 79 | int Line; 80 | }; 81 | 82 | // define named types 83 | typedef std::pair NamedInt; 84 | typedef std::pair NamedLong; 85 | typedef std::pair> NamedVector; 86 | typedef std::pair NamedMisses; 87 | 88 | // compute the integer power 89 | inline long compute_power(long base, int exponent) { 90 | assert(exponent >= 0); 91 | long Result = 1; 92 | while (exponent) { 93 | if (exponent & 1) 94 | Result *= base; 95 | base *= base; 96 | exponent /= 2; 97 | } 98 | return Result; 99 | }; 100 | 101 | #endif -------------------------------------------------------------------------------- /src/HayStack.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "HayStack.h" 10 | #include "Timer.h" 11 | #include "isl-helpers.h" 12 | 13 | #include "barvinok/isl.h" 14 | 15 | isl_ctx *allocateContextWithIncludePaths(std::vector IncludePaths) { 16 | // pass the include paths to the context 17 | std::vector Arguments; 18 | char Argument1[] = "program"; 19 | char ArgumentI[] = "-I"; 20 | Arguments.push_back(Argument1); 21 | for (auto &IncludePath : IncludePaths) { 22 | Arguments.push_back(ArgumentI); 23 | Arguments.push_back(const_cast(IncludePath.c_str())); 24 | } 25 | int ArgumentCount = Arguments.size(); 26 | struct pet_options *options; 27 | options = pet_options_new_with_defaults(); 28 | ArgumentCount = pet_options_parse(options, ArgumentCount, &Arguments[0], ISL_ARG_ALL); 29 | return isl_ctx_alloc_with_options(&pet_options_args, options); 30 | } 31 | 32 | void HayStack::compileProgram(std::string SourceFile) { compileProgram(SourceFile, ""); } 33 | void HayStack::compileProgram(std::string SourceFile, std::string ScopFunction) { 34 | Program_.extractScop(SourceFile, ScopFunction); 35 | } 36 | 37 | void HayStack::initModel(std::vector Parameters) { 38 | assert(!Program_.getSchedule().is_null()); 39 | // extract the parameters 40 | Parameters_ = Program_.getSchedule().params(); 41 | int NumberOfParameters = Parameters_.dim(isl::dim::param); 42 | if (Parameters.size() < NumberOfParameters) { 43 | printf("-> exit(-1) not enough parameters\n"); 44 | exit(-1); 45 | } 46 | std::map SortedParameters; 47 | for (auto &Parameter : Parameters) { 48 | int Position = Parameters_.find_dim_by_name(isl::dim::param, Parameter.first); 49 | if (Position < 0 || Position >= NumberOfParameters) { 50 | printf("-> ignoring parameter %s\n", Parameter.first.c_str()); 51 | } else { 52 | Parameters_ = Parameters_.fix_si(isl::dim::param, Position, Parameter.second); 53 | SortedParameters[Position] = Parameter; 54 | } 55 | } 56 | // copy the sorted parameters to the parameter value vector 57 | for (auto &Entry : SortedParameters) { 58 | ParameterValues_.push_back(Entry.second); 59 | } 60 | // compute the access map 61 | Program_.computeAccessToLine(Parameters_); 62 | // compute the between map for all statements 63 | computeGlobalMaps(); 64 | // extract the access information 65 | extractAccesses(); 66 | } 67 | 68 | void HayStack::initModel() { 69 | std::vector NoParameters; 70 | initModel(NoParameters); 71 | } 72 | 73 | std::vector HayStack::countCacheMisses() { 74 | // define the result vector 75 | std::vector Results; 76 | // count the compulsory misses 77 | for (auto &Current : Accesses_) { 78 | Current.initAccess(ParameterValues_, Parameters_); 79 | Current.countCompulsoryMisses(First_); 80 | } 81 | #ifdef DIMENSION_COUNTING 82 | // compute the capacity misses dimension by dimension 83 | isl::space Space = LexSuccEq_.domain().get_space(); 84 | isl::map Universe = isl::map::universe(LexSuccEq_.get_space()); 85 | auto Remaining = SameLineSucc_.reverse(); 86 | for (int i = Space.dim(isl::dim::set) - 1; i >= 0; --i) { 87 | Timer::startTimer("ComputeBetweenMap"); 88 | // compute the filter for the dimension 89 | isl::local_space LSI = isl::local_space(Universe.domain().get_space()); 90 | isl::local_space LSO = isl::local_space(Universe.range().get_space()); 91 | isl::map Filter = Universe; 92 | for (int j = 0; j < i; ++j) { 93 | isl::pw_aff VarIn = isl::pw_aff::var_on_domain(LSI, isl::dim::set, j); 94 | isl::pw_aff VarOut = isl::pw_aff::var_on_domain(LSO, isl::dim::set, j); 95 | isl::map Constraint = VarIn.eq_map(VarOut); 96 | Filter = Filter.intersect(Constraint); 97 | } 98 | // compute the next map for the level 99 | auto Next = Remaining.intersect(Filter).lexmax(); 100 | if (i > 0) { 101 | Remaining = Remaining.subtract_domain(Next.domain()); 102 | // Remaining = Remaining.coalesce(); 103 | } 104 | // compute the between map 105 | Next = Schedule_.apply_range(Next).apply_range(Schedule_.reverse()).coalesce(); 106 | auto After = Next.apply_range(Forward_); 107 | auto BetweenMap = After.intersect(Before_); 108 | addConflicts(Next); 109 | BetweenMap = BetweenMap.apply_range(Program_.getAccessToLine()); 110 | BetweenMap = BetweenMap.detect_equalities(); // important 111 | Timer::stopTimer("ComputeBetweenMap"); 112 | // compute the cache misses 113 | for (auto &Current : Accesses_) { 114 | Current.computeStackDistances(BetweenMap); 115 | } 116 | } 117 | #else 118 | Timer::startTimer("ComputeBetweenMap"); 119 | auto Next = SameLineSucc_.reverse().lexmax(); 120 | Next = Schedule_.apply_range(Next).apply_range(Schedule_.reverse()).coalesce(); 121 | auto After = Next.apply_range(Forward_); 122 | auto BetweenMap = After.intersect(Before_); 123 | addConflicts(Next); 124 | BetweenMap = BetweenMap.apply_range(Program_.getAccessToLine()); 125 | BetweenMap = BetweenMap.detect_equalities(); // important 126 | Timer::stopTimer("ComputeBetweenMap"); 127 | // compute the cache misses 128 | for (auto &Current : Accesses_) { 129 | Current.computeStackDistances(BetweenMap); 130 | } 131 | #endif 132 | // count the capacity misses and collect the results 133 | for (auto &Current : Accesses_) { 134 | Current.countCapacityMisses(); 135 | auto CacheMisses = Current.getResult(); 136 | Results.push_back(std::make_pair(Current.getName(), CacheMisses)); 137 | } 138 | return Results; 139 | } 140 | 141 | std::vector HayStack::countCacheMisses(std::vector CacheSizes) { 142 | std::vector Results; 143 | // compute the misses for all accesses 144 | for (auto &Current : Accesses_) { 145 | auto CacheMisses = Current.countCapacityMisses(CacheSizes); 146 | Results.push_back(std::make_pair(Current.getName(), CacheMisses)); 147 | } 148 | return Results; 149 | } 150 | 151 | void HayStack::computeGlobalMaps() { 152 | Timer::startTimer("ComputeBetweenMap"); 153 | // get the schedule and limit parameters 154 | Schedule_ = Program_.getSchedule(); 155 | if (Parameters_.is_null()) { 156 | // if parameters are unknown introduce lower bound 157 | isl::set Parameters = Schedule_.params(); 158 | int NumberOfParameters = Parameters.dim(isl::dim::param); 159 | for (int i = 0; i < NumberOfParameters; ++i) { 160 | Parameters = Parameters.lower_bound_si(isl::dim::param, 0, 0); 161 | } 162 | Schedule_ = Schedule_.intersect_domain(Parameters); 163 | } else { 164 | // otherwise fix the parameters 165 | Schedule_ = Schedule_.intersect_domain(Parameters_); 166 | } 167 | // filter statements without array accesses 168 | Schedule_ = Schedule_.intersect_domain(Program_.getAccessDomain()); 169 | Schedule_ = Schedule_.coalesce(); 170 | 171 | // map the iterations to the cache lines and cache sets 172 | isl::union_map IterToLine = Program_.getAccessToLine().apply_domain(Schedule_); 173 | IterToLine = IterToLine.coalesce(); 174 | 175 | // get the successor maps 176 | isl::space Space = isl::set(IterToLine.domain()).get_space(); 177 | isl::map LexSucc = isl::map::lex_lt(Space); 178 | LexSuccEq_ = isl::map::lex_le(Space); 179 | 180 | // compute accesses of the same cache line 181 | isl::union_map SameLine = IterToLine.apply_range(IterToLine.reverse()); 182 | SameLineSucc_ = SameLine.intersect(LexSucc); 183 | SameLineSucc_ = SameLineSucc_.coalesce(); 184 | 185 | // compute the before and forward maps 186 | Before_ = Schedule_.apply_range(LexSuccEq_.reverse()).apply_range(Schedule_.reverse()).coalesce(); 187 | Forward_ = Schedule_.apply_range(LexSuccEq_).apply_range(Schedule_.reverse()).coalesce(); 188 | Timer::stopTimer("ComputeBetweenMap"); 189 | 190 | // compute the first map that connects the every memory location to the schedule value that loads it first 191 | First_ = Program_.getAccessToLine().reverse().apply_range(Schedule_).lexmin(); 192 | First_ = Schedule_.apply_range(First_.reverse()); 193 | First_ = First_.coalesce(); 194 | } 195 | 196 | void HayStack::extractAccesses() { 197 | // extract the between map accesses 198 | auto extractAccess = [&](isl::set Set) { 199 | std::string Statement = Set.get_tuple_name(); 200 | // iterate the read and write indexes 201 | auto AccessInfos = Program_.getAccessInfos()[Statement]; 202 | for (int i = 0; i < AccessInfos.size(); ++i) { 203 | // compute the domain 204 | isl::set Domain = Schedule_.domain().extract_set(Set.get_space()); 205 | Domain = Domain.fix_si(isl::dim::set, Domain.dim(isl::dim::set) - 1, i); 206 | // create the access 207 | Access Current(AccessInfos[i].Name, MachineModel_, ModelOptions_, Domain, Program_.getElementSizes()); 208 | Accesses_.push_back(Current); 209 | } 210 | return isl::stat::ok(); 211 | }; 212 | Schedule_.domain().foreach_set(extractAccess); 213 | std::sort(Accesses_.begin(), Accesses_.end()); 214 | } 215 | 216 | void HayStack::addConflicts(isl::union_map Next) { 217 | #ifdef COMPUTE_CONFLICTS 218 | // count the statements 219 | for (auto &Source : Accesses_) { 220 | auto Destinations = Next.intersect_domain(Source.getDomain()); 221 | if (!Destinations.is_empty()) { 222 | for (auto &Destination : Accesses_) { 223 | if (!Destinations.intersect_range(Destination.getDomain()).is_empty()) { 224 | Conflicts_[Source.getName()].push_back(Destination.getName()); 225 | } 226 | } 227 | std::sort(Conflicts_[Source.getName()].begin(), Conflicts_[Source.getName()].end()); 228 | auto Last = std::unique(Conflicts_[Source.getName()].begin(), Conflicts_[Source.getName()].end()); 229 | Conflicts_[Source.getName()].erase(Last, Conflicts_[Source.getName()].end()); 230 | } 231 | } 232 | #endif 233 | } 234 | -------------------------------------------------------------------------------- /src/HayStack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _HAY_STACK_H_ 6 | #define _HAY_STACK_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "Access.h" 13 | #include "Definitions.h" 14 | #include "Program.h" 15 | #include 16 | 17 | // allocate context with include paths 18 | isl_ctx *allocateContextWithIncludePaths(std::vector IncludePaths); 19 | 20 | // main class of the cache model 21 | class HayStack { 22 | public: 23 | HayStack() = delete; 24 | HayStack(const HayStack &other) = default; 25 | HayStack(isl::ctx Context, machine_model MachineModel, model_options ModelOptions) 26 | : Context_(Context), MachineModel_(MachineModel), ModelOptions_(ModelOptions), Program_(Context, MachineModel){}; 27 | HayStack(isl::ctx Context, machine_model MachineModel, model_options ModelOptions, Program P) 28 | : Context_(Context), MachineModel_(MachineModel), ModelOptions_(ModelOptions), Program_(P){}; 29 | 30 | void compileProgram(std::string SourceFile); 31 | void compileProgram(std::string SourceFile, std::string ScopFunction); 32 | 33 | // initialize the cache model 34 | void initModel(std::vector Parameters); 35 | void initModel(); 36 | 37 | // prepare the between maps 38 | void computeBetweenAndFirstMaps(); 39 | 40 | // count the actual performance info 41 | std::vector countCacheMisses(); 42 | std::vector countCacheMisses(std::vector CacheSizes); 43 | 44 | std::vector getAccesses() const { return Accesses_; } 45 | std::map> getConflicts() const { return Conflicts_; } 46 | 47 | std::pair getScopLoc() const { return Program_.getScopLoc(); } 48 | std::map> getAccessInfos() const { return Program_.getAccessInfos(); } 49 | 50 | private: 51 | void computeGlobalMaps(); 52 | void extractAccesses(); 53 | void addConflicts(isl::union_map BetweenMap); 54 | 55 | // parameters 56 | isl::ctx Context_; 57 | machine_model MachineModel_; 58 | model_options ModelOptions_; 59 | 60 | // program information 61 | Program Program_; 62 | 63 | // analysis results 64 | isl::set Parameters_; 65 | std::vector ParameterValues_; 66 | isl::union_map Schedule_; 67 | isl::map LexSuccEq_; 68 | isl::union_map SameLineSucc_; 69 | isl::union_map Before_; 70 | isl::union_map Forward_; 71 | isl::union_map First_; 72 | 73 | std::vector Accesses_; 74 | std::map> Conflicts_; 75 | }; 76 | 77 | #endif -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | if BUNDLED_ISL 3 | ISL_LA = $(top_builddir)/isl/libisl.la 4 | endif 5 | if BUILD_ISL 6 | ISL_LA = $(ISL_BUILDDIR)/libisl.la 7 | endif 8 | 9 | if BUNDLED_BARVINOK 10 | BARVINOK_LA = $(top_builddir)/barvinok/libbarvinok.la 11 | endif 12 | if BUILD_BARVINOK 13 | BARVINOK_LA = $(BARVINOK_BUILDDIR)/libbarvinok.la 14 | endif 15 | 16 | if BUNDLED_PET 17 | PET_LA = $(top_builddir)/pet/libpet.la 18 | endif 19 | if BUILD_PET 20 | PET_LA = $(PET_BUILDDIR)/libpet.la 21 | endif 22 | 23 | if DEBUG 24 | AM_CFLAGS = -g3 -O0 25 | AM_CXXFLAGS = -g3 -O0 26 | else 27 | AM_CFLAGS = -O3 -DNDEBUG 28 | AM_CXXFLAGS = -O3 -DNDEBUG 29 | endif 30 | 31 | FORCE: 32 | ../isl/libisl.la: FORCE 33 | cd ../isl; $(MAKE) $(AM_MAKEFLAGS) libisl.la 34 | ../barvinok/libbarvinok.la: FORCE 35 | cd ../barvinok; $(MAKE) $(AM_MAKEFLAGS) libbarvinok.la 36 | ../pet/libpet.la: FORCE 37 | cd ../pet; $(MAKE) $(AM_MAKEFLAGS) libpet.la 38 | 39 | AM_CPPFLAGS = @ISL_CFLAGS@ @BARVINOK_CFLAGS@ @PET_CFLAGS@ 40 | LIB_ISL = $(ISL_LA) @ISL_LIBS@ 41 | LIB_BARVINOK = $(BARVINOK_LA) @BARVINOK_LIBS@ 42 | LIB_PET = $(PET_LA) @PET_LIBS@ 43 | 44 | LINK = $(CXXLINK) 45 | 46 | # build the haystack library 47 | noinst_LTLIBRARIES = libhaystack.la 48 | libhaystack_la_SOURCES = \ 49 | Program.cpp \ 50 | HayStack.cpp \ 51 | Access.cpp \ 52 | isl-helpers.cpp \ 53 | Timer.cpp 54 | 55 | libhaystack_la_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) 56 | libhaystack_la_LIBADD = $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 57 | 58 | # build the haystack program 59 | bin_PROGRAMS = haystack 60 | haystack_SOURCES = main.cpp 61 | 62 | haystack_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) 63 | haystack_LDFLAGS = -static $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) 64 | haystack_LDADD = libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/Program.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include 6 | 7 | #include "Program.h" 8 | #include "isl-helpers.h" 9 | #include "op.h" 10 | 11 | void Program::extractScop(std::string SourceFile, std::string ScopFunction) { 12 | const char *Function = ScopFunction.empty() ? NULL : ScopFunction.c_str(); 13 | pet_scop *PetScop = pet_scop_extract_from_C_source(Context_.get(), SourceFile.c_str(), Function); 14 | if (PetScop == nullptr) { 15 | printf("-> exit(-1) cannot extract scope\n"); 16 | exit(-1); 17 | } 18 | 19 | // get the schedule and access information 20 | // (the tagged accesses allow us to distinguish multiple accesses of the same array) 21 | Schedule_ = isl::manage(pet_scop_get_schedule(PetScop)); 22 | Reads_ = isl::manage(pet_scop_get_tagged_may_reads(PetScop)); 23 | Writes_ = isl::manage(pet_scop_get_tagged_may_writes(PetScop)); 24 | 25 | // check if the schedule is bounded 26 | auto checkIfBounded = [](isl::set Set) { 27 | if (!Set.is_bounded()) { 28 | printf("-> exit(-1) schedule not bounded\n"); 29 | exit(-1); 30 | } 31 | return isl::stat::ok(); 32 | }; 33 | Schedule_.get_domain().foreach_set(checkIfBounded); 34 | 35 | // extract the array information 36 | for (int idx = 0; idx < PetScop->n_array; idx++) { 37 | isl::set Extent = isl::manage_copy(PetScop->arrays[idx]->extent); 38 | // ignore scalars 39 | if (Extent.dim(isl::dim::set) == 0) 40 | continue; 41 | // store the array information 42 | std::string Name = Extent.get_tuple_name(); 43 | ArrayExtents_[Name] = Extent; 44 | ElementSizes_[Name] = PetScop->arrays[idx]->element_size; 45 | } 46 | // extract the reads and writes 47 | extractReferences(); 48 | 49 | // compute detailed access information 50 | ScopLoc_ = std::make_pair(pet_loc_get_start(PetScop->loc), pet_loc_get_end(PetScop->loc)); 51 | for (int idx = 0; idx < PetScop->n_stmt; idx++) { 52 | // extract the statement info 53 | pet_expr *Expression = pet_tree_expr_get_expr(PetScop->stmts[idx]->body); 54 | isl::space Space = isl::manage(pet_stmt_get_space(PetScop->stmts[idx])); 55 | std::string Statement = Space.get_tuple_name(isl::dim::set); 56 | // extract the access info 57 | auto printExpression = [](__isl_keep pet_expr *Expr, void *User) { 58 | if (pet_expr_access_is_read(Expr) || pet_expr_access_is_write(Expr)) { 59 | isl::id RefId = isl::manage(pet_expr_access_get_ref_id(Expr)); 60 | std::string Name = RefId.to_str(); 61 | isl::multi_pw_aff Index = isl::manage(pet_expr_access_get_index(Expr)); 62 | // filter the array accesses 63 | if (Index.dim(isl::dim::out) > 0 && Index.has_tuple_id(isl::dim::out)) { 64 | std::string Access = Index.get_tuple_name(isl::dim::out); 65 | // process the array dimensions 66 | for (int i = 0; i < Index.dim(isl::dim::out); ++i) { 67 | std::vector Expressions; 68 | auto IndexExpr = Index.get_pw_aff(i); 69 | auto extractExpr = [&](isl::set Set, isl::aff Aff) { 70 | Access += "[" + isl::printExpression(Aff) + "]"; 71 | return isl::stat::ok(); 72 | }; 73 | IndexExpr.foreach_piece(extractExpr); 74 | } 75 | // find the access info 76 | auto AccessInfos = (std::vector *)User; 77 | auto Iter = AccessInfos->begin(); 78 | do { 79 | // find the pet references and update the access description 80 | Iter = std::find_if(Iter, AccessInfos->end(), [&](access_info AccessInfo) { 81 | if (AccessInfo.Access == Name) 82 | return true; 83 | return false; 84 | }); 85 | if (Iter != AccessInfos->end() && Iter->Access == Name) { 86 | Iter->Access = Access; 87 | Iter++; 88 | } 89 | } while (Iter != AccessInfos->end()); 90 | } 91 | } 92 | return 0; 93 | }; 94 | pet_expr_foreach_access_expr(Expression, printExpression, &AccessInfos_[Statement]); 95 | // get the line number 96 | pet_loc *Loc = pet_tree_get_loc(PetScop->stmts[idx]->body); 97 | int Line = pet_loc_get_line(Loc); 98 | int Start = pet_loc_get_start(Loc); 99 | int Stop = pet_loc_get_end(Loc); 100 | pet_loc_free(Loc); 101 | // store the access information 102 | for (auto &Access : AccessInfos_[Statement]) { 103 | Access.Line = Line; 104 | Access.Start = Start; 105 | Access.Stop = Stop; 106 | } 107 | pet_expr_free(Expression); 108 | } 109 | 110 | // extend the schedule and the read and write maps with an access dimension 111 | extendSchedule(); 112 | Reads_ = extendAccesses(Reads_, false); 113 | Writes_ = extendAccesses(Writes_, true); 114 | 115 | // compute the access domain 116 | AccessDomain_ = Reads_.domain().unite(Writes_.domain()).coalesce(); 117 | 118 | // free the pet scop 119 | pet_scop_free(PetScop); 120 | } 121 | 122 | void Program::extendSchedule() { 123 | // extend the schedule with the reference dimension 124 | ScheduleMap_ = Schedule_.get_map().intersect_domain(Schedule_.get_domain()); 125 | isl::union_map ScheduleExt = isl::map::empty(ScheduleMap_.get_space()); 126 | auto extendSchedule = [&](isl::map Schedule) { 127 | std::string Statement = Schedule.get_tuple_name(isl::dim::in); 128 | if (!(ReadReferences_[Statement].empty() && WriteReferences_[Statement].empty())) { 129 | // extend the schedule 130 | Schedule = Schedule.insert_dims(isl::dim::in, Schedule.dim(isl::dim::in), 1); 131 | Schedule = Schedule.insert_dims(isl::dim::out, Schedule.dim(isl::dim::out), 1); 132 | Schedule = Schedule.set_tuple_name(isl::dim::in, Statement); 133 | // connect the new dimension 134 | isl::local_space LSIn = isl::local_space(Schedule.domain().get_space()); 135 | isl::local_space LSOut = isl::local_space(Schedule.range().get_space()); 136 | isl::pw_aff VarIn = isl::pw_aff::var_on_domain(LSIn, isl::dim::set, Schedule.dim(isl::dim::in) - 1); 137 | isl::pw_aff VarOut = isl::pw_aff::var_on_domain(LSOut, isl::dim::set, Schedule.dim(isl::dim::out) - 1); 138 | isl::map EqualConstraint = VarIn.eq_map(VarOut); 139 | Schedule = Schedule.intersect(EqualConstraint); 140 | // compute the reference range 141 | isl::pw_aff Min = VarIn * 0; 142 | isl::pw_aff Max = VarIn * 0 + ReadReferences_[Statement].size() + WriteReferences_[Statement].size(); 143 | isl::set LowerBound = VarIn.ge_set(Min); 144 | isl::set UpperBound = VarIn.lt_set(Max); 145 | Schedule = Schedule.intersect_domain(LowerBound); 146 | Schedule = Schedule.intersect_domain(UpperBound); 147 | ScheduleExt = ScheduleExt.unite(isl::union_map(Schedule)); 148 | } 149 | return isl::stat::ok(); 150 | }; 151 | ScheduleMap_.foreach_map(extendSchedule); 152 | ScheduleMap_ = ScheduleExt; 153 | } 154 | 155 | void Program::extractReferences() { 156 | // extract the read and write references 157 | auto extractReads = [&](isl::map Map) { 158 | if (ElementSizes_.count(Map.get_tuple_name(isl::dim::out)) > 0) { 159 | std::string Statement = Map.domain().unwrap().get_tuple_name(isl::dim::in); 160 | std::string Reference = Map.domain().unwrap().get_tuple_name(isl::dim::out); 161 | ReadReferences_[Statement].push_back(Reference); 162 | } 163 | return isl::stat::ok(); 164 | }; 165 | Reads_.foreach_map(extractReads); 166 | auto extractWrites = [&](isl::map Map) { 167 | if (ElementSizes_.count(Map.get_tuple_name(isl::dim::out)) > 0) { 168 | std::string Statement = Map.domain().unwrap().get_tuple_name(isl::dim::in); 169 | std::string Reference = Map.domain().unwrap().get_tuple_name(isl::dim::out); 170 | WriteReferences_[Statement].push_back(Reference); 171 | } 172 | return isl::stat::ok(); 173 | }; 174 | Writes_.foreach_map(extractWrites); 175 | 176 | // sort the references 177 | auto compare = [](std::string &S1, std::string &S2) { 178 | if (S1.length() == S2.length()) 179 | return S1 < S2; 180 | else 181 | return S1.length() < S2.length(); 182 | }; 183 | for (auto &ReadReferences : ReadReferences_) 184 | std::sort(ReadReferences.second.begin(), ReadReferences.second.end(), compare); 185 | for (auto &WriteReferences : WriteReferences_) 186 | std::sort(WriteReferences.second.begin(), WriteReferences.second.end(), compare); 187 | 188 | // compute the access info 189 | AccessInfos_.clear(); 190 | for (auto &ReadReferences : ReadReferences_) { 191 | for (int i = 0; i < ReadReferences.second.size(); ++i) { 192 | AccessInfos_[ReadReferences.first].push_back( 193 | {ReadReferences.first + "(R" + std::to_string(i) + ")", ReadReferences.second[i], Read, 0, 0, 0}); 194 | } 195 | } 196 | for (auto &WriteReferences : WriteReferences_) { 197 | for (int i = 0; i < WriteReferences.second.size(); ++i) { 198 | AccessInfos_[WriteReferences.first].push_back( 199 | {WriteReferences.first + "(W" + std::to_string(i) + ")", WriteReferences.second[i], Write, 0, 0, 0}); 200 | } 201 | } 202 | } 203 | 204 | isl::union_map Program::extendAccesses(isl::union_map Accesses, bool WriteReferences) { 205 | // extend the access map 206 | isl::union_map AccessesExt = isl::map::empty(Accesses.get_space()); 207 | auto extendAccesses = [&](isl::map Access) { 208 | std::string Statement = Access.domain().unwrap().get_tuple_name(isl::dim::in); 209 | std::string Reference = Access.domain().unwrap().get_tuple_name(isl::dim::out); 210 | std::string Array = Access.get_tuple_name(isl::dim::out); 211 | if (ElementSizes_.count(Array) > 0) { 212 | // get the reference offset 213 | long Distance = 0; 214 | if (WriteReferences) { 215 | auto Iter = std::find(WriteReferences_[Statement].begin(), WriteReferences_[Statement].end(), Reference); 216 | assert(Iter != WriteReferences_[Statement].end()); 217 | Distance = std::distance(WriteReferences_[Statement].begin(), Iter) + ReadReferences_[Statement].size(); 218 | } else { 219 | auto Iter = std::find(ReadReferences_[Statement].begin(), ReadReferences_[Statement].end(), Reference); 220 | assert(Iter != ReadReferences_[Statement].end()); 221 | Distance = std::distance(ReadReferences_[Statement].begin(), Iter); 222 | } 223 | // extend the map with the offset 224 | Access = Access.insert_dims(isl::dim::in, Access.dim(isl::dim::in), 1); 225 | Access = Access.set_tuple_name(isl::dim::in, Statement); 226 | // set the offset number 227 | isl::local_space LSIn = isl::local_space(Access.domain().get_space()); 228 | isl::pw_aff VarIn = isl::pw_aff::var_on_domain(LSIn, isl::dim::set, Access.dim(isl::dim::in) - 1); 229 | isl::pw_aff Offset = VarIn * 0 + Distance; 230 | isl::set EqualConstraint = VarIn.eq_set(Offset); 231 | Access = Access.intersect_domain(EqualConstraint); 232 | AccessesExt = AccessesExt.unite(isl::union_map(Access)); 233 | } 234 | return isl::stat::ok(); 235 | }; 236 | Accesses.foreach_map(extendAccesses); 237 | return AccessesExt; 238 | } 239 | 240 | void Program::computeAccessToLine(isl::set Parameters) { 241 | if (AccessToLine_.is_null()) { 242 | // compute the access map 243 | AccessToLine_ = isl::map::empty(Writes_.get_space()); 244 | AccessToElement_ = isl::map::empty(Writes_.get_space()); 245 | for (auto Array : ArrayExtents_) { 246 | // extract the array information 247 | std::string Name = Array.first; 248 | isl::set Extent = Array.second; 249 | // apply the parameters 250 | if (!Parameters.is_null()) { 251 | Extent = Extent.intersect_params(Parameters); 252 | } 253 | // map the access to the array offsets 254 | isl::map AccessToArray = isl::map::identity(Extent.get_space().map_from_set()); 255 | AccessToElement_ = AccessToElement_.unite(isl::union_map(AccessToArray)); 256 | // compute elements per cache line 257 | long ElementsPerCacheLine = MachineModel_.CacheLineSize / ElementSizes_[Name]; 258 | AccessToArray = introduceCacheLines(Name, AccessToArray, ElementsPerCacheLine); 259 | AccessToLine_ = AccessToLine_.unite(isl::union_map(AccessToArray)); 260 | AccessToLine_ = AccessToLine_.coalesce(); 261 | } 262 | // compose the memory accesses with access map 263 | isl::union_map Accesses = Reads_.unite(Writes_); 264 | AccessToLine_ = Accesses.apply_range(AccessToLine_); 265 | AccessToLine_ = AccessToLine_.coalesce(); 266 | AccessToElement_ = Accesses.apply_range(AccessToElement_); 267 | AccessToElement_ = AccessToElement_.coalesce(); 268 | } 269 | } 270 | 271 | isl::map Program::introduceCacheLines(std::string Name, isl::map AccessToArray, long ElementsPerCacheLine) const { 272 | // introduce additional dimension that divides the innermost dimension by the cache line size 273 | int Dim = AccessToArray.dim(isl::dim::out) - 1; 274 | AccessToArray = AccessToArray.add_dims(isl::dim::out, 1); 275 | isl::local_space LS = isl::local_space(AccessToArray.range().get_space()); 276 | isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, Dim); 277 | isl::pw_aff VarPrime = isl::pw_aff::var_on_domain(LS, isl::dim::set, Dim + 1); 278 | isl::set ConstraintOne = (ElementsPerCacheLine * VarPrime).le_set(Var); 279 | isl::set ConstraintTwo = Var.le_set(ElementsPerCacheLine * VarPrime + (ElementsPerCacheLine - 1)); 280 | AccessToArray = AccessToArray.intersect_range(ConstraintOne).intersect_range(ConstraintTwo); 281 | AccessToArray = AccessToArray.project_out(isl::dim::out, Dim, 1); 282 | AccessToArray = AccessToArray.set_tuple_name(isl::dim::out, Name); 283 | // return the resulting array 284 | return AccessToArray; 285 | } 286 | 287 | isl::map Program::introduceCacheSets(std::string Name, isl::map AccessToArray, long NumberOfCacheSets) const { 288 | // introduce additional dimension that divides the innermost dimension by the cache line size 289 | int Dim = AccessToArray.dim(isl::dim::out) - 1; 290 | AccessToArray = AccessToArray.add_dims(isl::dim::out, 1); 291 | isl::local_space LS = isl::local_space(AccessToArray.range().get_space()); 292 | isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, Dim); 293 | isl::pw_aff VarPrime = isl::pw_aff::var_on_domain(LS, isl::dim::set, Dim + 1); 294 | isl::pw_aff Modulo = Var.mod(isl::val(Var.get_ctx(), NumberOfCacheSets)); 295 | isl::set Constraint = VarPrime.eq_set(Modulo); 296 | AccessToArray = AccessToArray.intersect_range(Constraint); 297 | AccessToArray = AccessToArray.set_tuple_name(isl::dim::out, Name); 298 | // return the resulting array 299 | return AccessToArray; 300 | } 301 | -------------------------------------------------------------------------------- /src/Program.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _SCOP_H_ 6 | #define _SCOP_H_ 7 | 8 | #include "pet.h" 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Definitions.h" 14 | 15 | // class parsing the source file and extracting schedule and memory accesses 16 | class Program { 17 | public: 18 | Program() = delete; 19 | Program(isl::ctx Context, machine_model MachineModel) : Context_(Context), MachineModel_(MachineModel) {} 20 | 21 | void extractScop(std::string SourceFile, std::string ScopFunction); 22 | 23 | void computeAccessToLine(isl::set Parameters); 24 | 25 | isl::union_map getSchedule() const { return ScheduleMap_; } 26 | isl::union_map getReads() const { return Reads_; } 27 | isl::union_map getWrites() const { return Writes_; } 28 | isl::union_set getAccessDomain() const { return AccessDomain_; } 29 | isl::union_map getAccessToLine() const { return AccessToLine_; } 30 | isl::union_map getAccessToElement() const { return AccessToElement_; } 31 | std::map getElementSizes() const { return ElementSizes_; } 32 | 33 | std::map> getAccessInfos() const { return AccessInfos_; } 34 | std::pair getScopLoc() const { return ScopLoc_; }; 35 | 36 | private: 37 | void extractReferences(); 38 | void extendSchedule(); 39 | isl::union_map extendAccesses(isl::union_map Accesses, bool WriteReferences); 40 | isl::map introduceCacheLines(std::string Name, isl::map AccessToArray, long ElementsPerCacheLine) const; 41 | isl::map introduceCacheSets(std::string Name, isl::map AccessToArray, long NumberOfCacheSets) const; 42 | 43 | isl::ctx Context_; 44 | machine_model MachineModel_; 45 | 46 | isl::schedule Schedule_; 47 | isl::union_map ScheduleMap_; 48 | isl::union_map Reads_; 49 | isl::union_map Writes_; 50 | isl::union_map AccessToLine_; 51 | isl::union_map AccessToElement_; 52 | isl::union_set AccessDomain_; 53 | 54 | // array information 55 | std::map ArrayExtents_; 56 | std::map ElementSizes_; 57 | 58 | // read and write references per statement 59 | std::map> ReadReferences_; 60 | std::map> WriteReferences_; 61 | 62 | // location info for scop and memory accesses 63 | std::pair ScopLoc_; 64 | std::map> AccessInfos_; 65 | }; 66 | 67 | #endif -------------------------------------------------------------------------------- /src/Timer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "Timer.h" 6 | 7 | std::map Timer::Clocks; -------------------------------------------------------------------------------- /src/Timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _TIMER_H_ 6 | #define _TIMER_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // class that allows to profile the execution times 14 | class Timer { 15 | public: 16 | typedef std::pair, double> Entry; 17 | 18 | static inline void startTimer(std::string Name) { 19 | #ifdef TIMERS 20 | Clocks[Name].first = std::chrono::high_resolution_clock::now(); 21 | #endif 22 | } 23 | 24 | static inline void stopTimer(std::string Name) { 25 | #ifdef TIMERS 26 | auto stop = std::chrono::high_resolution_clock::now(); 27 | Clocks[Name].second += std::chrono::duration(stop - Clocks[Name].first).count(); 28 | #endif 29 | } 30 | 31 | static inline void printClocks() { 32 | #ifdef TIMERS 33 | std::cout.setf(std::ios::fixed, std::ios::floatfield); 34 | std::cout.precision(3); 35 | std::cout << "==================================================" << std::endl; 36 | for (auto Clock : Clocks) { 37 | std::cout << " - " << Clock.first << ":\t" << Clock.second.second << "ms" << std::endl; 38 | } 39 | std::cout << "==================================================" << std::endl; 40 | #endif 41 | } 42 | 43 | private: 44 | static std::map Clocks; 45 | }; 46 | 47 | #endif -------------------------------------------------------------------------------- /src/isl-helpers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include 6 | 7 | #include "barvinok/isl.h" 8 | #include "isl-helpers.h" 9 | #include 10 | #include 11 | 12 | // safe conversion to integer value 13 | long isl::get_value(isl::val Value) { 14 | isl::val Denominator = isl::manage(isl_val_get_den_val(Value.get())); 15 | assert(Denominator.is_one()); 16 | return Value.get_num_si(); 17 | } 18 | 19 | long isl::computeMinimum(isl::set Domain, int Dimension) { 20 | // try to find constant bounds 21 | long Minimum = std::numeric_limits::max(); 22 | auto findMinimum = [&](isl::set Domain, isl::aff Value) { 23 | if (Value.is_cst()) { 24 | Minimum = std::min(Minimum, isl::get_value(Value.get_constant_val())); 25 | } else { 26 | Minimum = std::numeric_limits::min(); 27 | } 28 | return isl::stat::ok(); 29 | }; 30 | Domain.dim_min(Dimension).foreach_piece(findMinimum); 31 | return Minimum; 32 | } 33 | 34 | long isl::computeMaximum(isl::set Domain, int Dimension) { 35 | // try to find constant bounds 36 | long Maximum = std::numeric_limits::min(); 37 | auto findMaximum = [&](isl::set Domain, isl::aff Value) { 38 | if (Value.is_cst()) { 39 | Maximum = std::max(Maximum, isl::get_value(Value.get_constant_val())); 40 | } else { 41 | Maximum = std::numeric_limits::max(); 42 | } 43 | return isl::stat::ok(); 44 | }; 45 | Domain.dim_max(Dimension).foreach_piece(findMaximum); 46 | return Maximum; 47 | } 48 | 49 | long isl::cardinality(isl::set Set) { 50 | return get_value(isl::manage(isl_set_card(isl::set(Set).release())).max()); 51 | } 52 | 53 | std::string isl::printExpression(isl::aff Expression) { 54 | bool isFirst = true; 55 | std::string Result = ""; 56 | auto printValue = [&](isl::val Value, bool isCoefficient) { 57 | std::string Result = ""; 58 | // print the sign if needed 59 | int Sign = Value.sgn(); 60 | if (Sign < 0) { 61 | Value = Value.neg(); 62 | Result = "-"; 63 | } else if (!isFirst) { 64 | Result = "+"; 65 | } 66 | if (!(Value.is_one() && isCoefficient)) { 67 | Result += Value.to_str(); 68 | } 69 | return Result; 70 | }; 71 | auto printAff = [&](isl::aff Aff) { 72 | isl::val Denominator = Aff.get_denominator_val(); 73 | if (!Denominator.is_one()) { 74 | Result += "("; 75 | } 76 | // convert the parameters 77 | for (int i = 0; i < Aff.dim(isl::dim::param); ++i) { 78 | isl::val Coefficient = Aff.get_coefficient_val(isl::dim::param, i); 79 | if (!Coefficient.is_zero()) { 80 | Result += printValue(Coefficient.mul(Denominator), true); 81 | Result += Aff.get_dim_name(isl::dim::param, i); 82 | isFirst = false; 83 | } 84 | } 85 | // convert the variables 86 | for (int i = 0; i < Aff.dim(isl::dim::in); ++i) { 87 | isl::val Coefficient = Aff.get_coefficient_val(isl::dim::in, i); 88 | if (!Coefficient.is_zero()) { 89 | Result += printValue(Coefficient.mul(Denominator), true); 90 | Result += Aff.get_dim_name(isl::dim::in, i); 91 | isFirst = false; 92 | } 93 | } 94 | // add the constant 95 | isl::val Constant = Aff.get_constant_val(); 96 | if (!Constant.is_zero() || isFirst) { 97 | Result += printValue(Constant.mul(Denominator), false); 98 | isFirst = false; 99 | } 100 | if (!Denominator.is_one()) { 101 | isFirst = true; 102 | Result += ")/" + printValue(Denominator, false); 103 | isFirst = false; 104 | } 105 | }; 106 | // pint the main expression 107 | printAff(Expression); 108 | // add possible divisors 109 | for (int i = 0; i < Expression.dim(isl::dim::div); ++i) { 110 | isl::aff Divisor = Expression.get_div(i); 111 | isl::val Coefficient = Expression.get_coefficient_val(isl::dim::div, i); 112 | if (!Coefficient.is_zero()) { 113 | Result += printValue(Coefficient, true); 114 | Result += "\u230A"; 115 | isFirst = true; 116 | printAff(Divisor); 117 | Result += "\u230B"; 118 | } 119 | } 120 | return Result; 121 | } 122 | -------------------------------------------------------------------------------- /src/isl-helpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _ISL_HELPERS_H_ 6 | #define _ISL_HELPERS_H_ 7 | 8 | #include 9 | 10 | namespace isl { 11 | 12 | // val 13 | long get_value(isl::val Value); 14 | 15 | // set 16 | long computeMinimum(isl::set Domain, int Dimension); 17 | long computeMaximum(isl::set Domain, int Dimension); 18 | 19 | std::pair get_stride_info(isl::set Set, int Dimension); 20 | 21 | long cardinality(isl::set Set); 22 | 23 | std::string printExpression(isl::aff Aff); 24 | 25 | } // namespace isl 26 | 27 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "HayStack.h" 20 | #include "Timer.h" 21 | 22 | namespace po = boost::program_options; 23 | 24 | const int CACHE_LINE_SIZE = 64; 25 | 26 | // default 27 | const int CACHE_SIZE1 = 32 * 1024; 28 | const int CACHE_SIZE2 = 512 * 1024; 29 | 30 | bool check_path(std::string path) { 31 | std::ifstream f(path.c_str()); 32 | return f.good(); 33 | } 34 | 35 | std::map compute_lines(std::string FileName, std::pair ScopLoc) { 36 | std::map Result; 37 | std::ifstream SourceFile; 38 | SourceFile.open(FileName); 39 | std::string Line; 40 | int LineNumber = 0; 41 | while (std::getline(SourceFile, Line)) { 42 | LineNumber++; 43 | if (SourceFile.tellg() > ScopLoc.first && SourceFile.tellg() <= ScopLoc.second) { 44 | Result[LineNumber] = Line; 45 | } 46 | } 47 | SourceFile.close(); 48 | return Result; 49 | } 50 | 51 | std::map> compute_offsets(std::string FileName) { 52 | std::map> Result; 53 | std::ifstream SourceFile; 54 | SourceFile.open(FileName); 55 | std::string Line; 56 | int LineNumber = 0; 57 | long Start = SourceFile.tellg(); 58 | long End = 0; 59 | while (std::getline(SourceFile, Line)) { 60 | End = SourceFile.tellg(); 61 | Result[++LineNumber] = std::make_pair(Start, End); 62 | Start = End; 63 | } 64 | SourceFile.close(); 65 | return Result; 66 | } 67 | 68 | void print_scop(std::map &Lines, int Start, int Stop) { 69 | // compute number of necessary digits 70 | int Width = std::to_string(Lines.end()->first).length(); 71 | for (int i = Start; i < Stop; ++i) { 72 | std::cout << std::setw(Width) << std::right << std::to_string(i); 73 | std::cout << " " << Lines[i] << std::endl; 74 | } 75 | } 76 | 77 | // define print operators 78 | namespace std { 79 | std::ostream &operator<<(std::ostream &os, const std::vector &vec) { 80 | for (int i = 0; i < vec.size(); ++i) { 81 | os << vec[i]; 82 | if (i < vec.size() - 1) 83 | os << " "; 84 | } 85 | return os; 86 | } 87 | } // namespace std 88 | 89 | void run_model(isl::ctx Context, po::variables_map Variables) { 90 | // allocate the machine model with default values 91 | machine_model MachineModel = {Variables["line-size"].as(), Variables["cache-sizes"].as>()}; 92 | model_options ModelOptions = {Variables["compute-bounds"].as()}; 93 | printf("-> setting up cache levels\n"); 94 | std::sort(MachineModel.CacheSizes.begin(), MachineModel.CacheSizes.end()); 95 | for (auto CacheSize : MachineModel.CacheSizes) { 96 | if (CacheSize % 1024 == 0) { 97 | printf(" - %ldkB with %ldB cache lines\n", CacheSize / 1024, MachineModel.CacheLineSize); 98 | } else { 99 | printf(" - %ldB with %ldB cache lines\n", CacheSize, MachineModel.CacheLineSize); 100 | } 101 | } 102 | printf("-> done\n"); 103 | // compute the total time 104 | auto StartExecution = std::chrono::high_resolution_clock::now(); 105 | // allocate the cache model and compile the program 106 | HayStack Model(Context, MachineModel, ModelOptions); 107 | if (Variables.count("scop-function") == 0) { 108 | Model.compileProgram(Variables["input-file"].as()); 109 | } else { 110 | Model.compileProgram(Variables["input-file"].as(), Variables["scop-function"].as()); 111 | } 112 | // parsing the parameters 113 | std::vector Parameters; 114 | if (Variables.count("define-parameters") != 0) { 115 | printf("-> parsing the parameters...\n"); 116 | std::vector ParamStrings = Variables["define-parameters"].as>(); 117 | for (auto ParamString : ParamStrings) { 118 | std::string Name; 119 | long Value; 120 | try { 121 | std::string Delimiter = "="; 122 | auto Split = ParamString.find(Delimiter); 123 | if (Split == std::string::npos) { 124 | throw std::runtime_error("did not find delimiter"); 125 | } 126 | std::string Name = ParamString.substr(0, ParamString.find(Delimiter)); 127 | long Value = std::stol(ParamString.substr(ParamString.find(Delimiter) + 1)); 128 | printf(" - %s = %ld\n", Name.c_str(), Value); 129 | Parameters.push_back(std::make_pair(Name, Value)); 130 | } catch (std::exception) { 131 | printf("-> exit(-1) failed to parse %s\n", ParamString.c_str()); 132 | exit(-1); 133 | } 134 | } 135 | printf("-> done\n"); 136 | } 137 | // run the preprocessing 138 | printf("-> start processing...\n"); 139 | auto Start = std::chrono::high_resolution_clock::now(); 140 | Model.initModel(Parameters); 141 | // execute the cache model 142 | auto CacheMisses = Model.countCacheMisses(); 143 | auto Stop = std::chrono::high_resolution_clock::now(); 144 | double TotalEvaluation = std::chrono::duration(Stop - Start).count(); 145 | printf("-> done after (%.2fms)\n", TotalEvaluation); 146 | // collect and print result 147 | long TotalAccesses = 0; 148 | long TotalCompulsory = 0; 149 | std::vector TotalCapacity(MachineModel.CacheSizes.size(), 0); 150 | // sum the cache misses for all accesses 151 | for (auto &CacheMiss : CacheMisses) { 152 | TotalAccesses += CacheMiss.second.Total; 153 | TotalCompulsory += CacheMiss.second.CompulsoryMisses; 154 | std::transform(TotalCapacity.begin(), TotalCapacity.end(), CacheMiss.second.CapacityMisses.begin(), 155 | TotalCapacity.begin(), std::plus()); 156 | }; 157 | // open the input file and seek the start of the scop 158 | std::map Lines = compute_lines(Variables["input-file"].as(), Model.getScopLoc()); 159 | std::map> Offsets = compute_offsets(Variables["input-file"].as()); 160 | long Position = Lines.begin()->first; 161 | std::string LineStart; 162 | std::string SingleLine; 163 | std::string DoubleLine; 164 | LineStart.resize(std::to_string(Lines.rbegin()->first).length() + 1, ' '); 165 | SingleLine.resize(80 - LineStart.length(), '-'); 166 | DoubleLine.resize(80, '='); 167 | // print the access infos sorted by position 168 | size_t RefWidth = 16; 169 | std::map> AccessInfosByLn; 170 | std::map AccessInfoByName; 171 | for (auto AccessInfos : Model.getAccessInfos()) { 172 | if (AccessInfos.second.empty()) 173 | continue; 174 | AccessInfosByLn[AccessInfos.second[0].Line] = AccessInfos.second; 175 | for (auto AccessInfo : AccessInfos.second) { 176 | AccessInfoByName[AccessInfo.Name] = AccessInfo; 177 | RefWidth = std::max(RefWidth, AccessInfo.Name.length() + 1); 178 | } 179 | } 180 | // print the cache info access by access 181 | std::cout << DoubleLine << std::endl; 182 | std::cout << " relative number of cache misses (statement)" << std::endl; 183 | std::cout << DoubleLine << std::endl; 184 | for (auto AccessInfos : AccessInfosByLn) { 185 | // determine the last line of multiline 186 | int Next = AccessInfos.first; 187 | while (Offsets[Next].second < AccessInfos.second[0].Stop) { 188 | Next++; 189 | } 190 | // print the sources 191 | print_scop(Lines, Position, Next + 1); 192 | Position = Next + 1; 193 | // print header 194 | std::cout << LineStart << SingleLine << std::endl; 195 | std::cout << std::setw(RefWidth) << std::right << "ref"; 196 | std::cout << " "; 197 | std::cout << std::setw(6) << std::left << "type"; 198 | std::cout << std::setw(10) << std::left << "comp[%]"; 199 | for (int i = 1; i <= MachineModel.CacheSizes.size(); ++i) { 200 | std::string Capacity = "L" + std::to_string(i) + "[%]"; 201 | std::cout << std::setw(10) << std::left << Capacity; 202 | } 203 | std::cout << std::setw(10) << std::left << "tot[%]"; 204 | std::cout << std::setw(10) << std::left << "reuse[ln]"; 205 | std::cout << std::endl; 206 | // print the accesses 207 | for (auto AccessInfo : AccessInfos.second) { 208 | // find the actual cache miss info 209 | auto Iter = std::find_if(CacheMisses.begin(), CacheMisses.end(), 210 | [&](NamedMisses Misses) { return Misses.first == AccessInfo.Name; }); 211 | assert(Iter != CacheMisses.end()); 212 | auto Compulsory = Iter->second.CompulsoryMisses; 213 | auto Capacity = Iter->second.CapacityMisses; 214 | auto Total = Iter->second.Total; 215 | // print the access info 216 | std::string Name = AccessInfo.Access; 217 | if (Name.length() > RefWidth) 218 | Name = Name.substr(0, RefWidth); 219 | std::cout << std::setw(RefWidth) << std::right << Name; 220 | std::cout << " "; 221 | std::cout << std::setw(6) << std::left << (AccessInfo.ReadOrWrite == Read ? "rd" : "wr"); 222 | std::cout << std::setw(10) << std::left << std::setprecision(5) << std::fixed 223 | << 100.0 * (double)Compulsory / (double)TotalAccesses; 224 | for (int i = 0; i < MachineModel.CacheSizes.size(); ++i) { 225 | std::cout << std::setw(10) << std::left << std::setprecision(5) << std::fixed 226 | << 100.0 * (double)Capacity[i] / (double)TotalAccesses; 227 | } 228 | std::cout << std::setw(10) << std::left << std::setprecision(5) << std::fixed 229 | << 100.0 * (double)Total / (double)TotalAccesses; 230 | // compute the reuse line numbers 231 | auto Conflicts = Model.getConflicts()[AccessInfo.Name]; 232 | // compute the reuse line numbers 233 | std::vector ReuseLines; 234 | for (auto Conflict : Conflicts) { 235 | ReuseLines.push_back(AccessInfoByName[Conflict].Line); 236 | } 237 | // sort the line numbers and remove duplicates 238 | std::sort(ReuseLines.begin(), ReuseLines.end()); 239 | auto Last = std::unique(ReuseLines.begin(), ReuseLines.end()); 240 | for (auto Iter = ReuseLines.begin(); Iter != Last;) { 241 | std::cout << *Iter; 242 | if (++Iter != Last) 243 | std::cout << ","; 244 | } 245 | std::cout << std::endl; 246 | } 247 | std::cout << LineStart << SingleLine << std::endl; 248 | } 249 | print_scop(Lines, Position, Lines.rbegin()->first + 1); 250 | // print the scop info 251 | std::cout << DoubleLine << std::endl; 252 | std::cout << " absolute number of cache misses (SCOP)" << std::endl; 253 | std::cout << DoubleLine << std::endl; 254 | std::cout.imbue(std::locale("")); 255 | std::cout << std::setw(16) << std::left << "compulsory:"; 256 | std::cout << std::setw(20) << std::right << TotalCompulsory << std::endl; 257 | for (int i = 1; i <= MachineModel.CacheSizes.size(); ++i) { 258 | std::string Capacity = "capacity (L" + std::to_string(i) + "):"; 259 | std::cout << std::setw(16) << std::left << Capacity; 260 | std::cout << std::setw(20) << std::right << TotalCapacity[i - 1] << std::endl; 261 | } 262 | std::cout << std::setw(16) << std::left << "total:"; 263 | std::cout << std::setw(20) << std::right << TotalAccesses << std::endl; 264 | std::cout << DoubleLine << std::endl; 265 | } 266 | 267 | int main(int argc, const char **args) { 268 | try { 269 | // define the program options 270 | po::options_description Descriptor("Program options"); 271 | Descriptor.add_options() // 272 | ("help,h", "print the program options") // 273 | ("cache-sizes,c", po::value>()->multitoken()->default_value({CACHE_SIZE1, CACHE_SIZE2}), 274 | "cache sizes in byte") // 275 | ("line-size,l", po::value()->default_value(CACHE_LINE_SIZE), "cache-line size in byte") // 276 | ("input-file,f", po::value(), "set the source file [file name]") // 277 | ("include-path,I", po::value>(), "set the include path [include path]") // 278 | ("define-parameters,d", po::value>()->multitoken(), "parameter values [N=10 M=100]") // 279 | ("scop-function,s", po::value(), "set the scop function scop") // 280 | ("compute-bounds,b", po::value()->default_value(false), "compute stack distance bounds"); 281 | 282 | // parse the program options 283 | po::variables_map Variables; 284 | po::store(po::parse_command_line(argc, args, Descriptor), Variables); 285 | po::notify(Variables); 286 | if (Variables.count("help") || Variables.count("input-file") == 0) { 287 | std::cout << Descriptor << std::endl; 288 | return 0; 289 | } 290 | // check if the include paths are valid 291 | for (int i = 0; i < Variables.count("include-path"); ++i) { 292 | std::string IncludePath = Variables["include-path"].as>()[i]; 293 | if (!check_path(IncludePath)) { 294 | printf("-> exit(-1) include path %s not valid\n", IncludePath.c_str()); 295 | exit(-1); 296 | } 297 | } 298 | // check if the source file is valid 299 | if (!check_path(Variables["input-file"].as())) { 300 | printf("-> exit(-1) input file %s not found\n", Variables["input-file"].as().c_str()); 301 | exit(-1); 302 | } 303 | // allocate the context outside of the cache model 304 | std::vector IncludePaths; 305 | if (Variables.count("include-path") > 0) 306 | IncludePaths = Variables["include-path"].as>(); 307 | isl::ctx Context = allocateContextWithIncludePaths(IncludePaths); 308 | isl_options_set_on_error(Context.get(), ISL_ON_ERROR_ABORT); 309 | 310 | // run the cache model 311 | run_model(Context, Variables); 312 | 313 | isl_ctx_free(Context.get()); 314 | } catch (const boost::program_options::error &ex) { 315 | printf("-> exit(-1) option parsing error: %s\n", ex.what()); 316 | } 317 | return 0; 318 | } 319 | -------------------------------------------------------------------------------- /src/op.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef ISL_OPERATORS 4 | #define ISL_OPERATORS 5 | 6 | inline isl::pw_aff operator+(isl::pw_aff A, isl::pw_aff B) { return A.add(B); } 7 | 8 | inline isl::pw_aff operator+(isl::val V, isl::pw_aff A) { 9 | isl::pw_aff AV(A.domain(), V); 10 | return A.add(AV); 11 | } 12 | 13 | inline isl::pw_aff operator+(isl::pw_aff A, isl::val V) { return V + A; } 14 | 15 | inline isl::pw_aff operator+(int i, isl::pw_aff A) { 16 | isl::ctx ctx = A.get_ctx(); 17 | return A + isl::val(ctx, i); 18 | } 19 | 20 | inline isl::pw_aff operator+(isl::pw_aff A, int i) { return i + A; } 21 | 22 | inline isl::val operator+(isl::val A, isl::val B) { 23 | return A.mul(B); 24 | } 25 | 26 | inline isl::pw_aff operator*(isl::pw_aff A, isl::pw_aff B) { return A.mul(B); } 27 | 28 | inline isl::pw_aff operator*(isl::val V, isl::pw_aff A) { 29 | isl::pw_aff AV(A.domain(), V); 30 | return A.mul(AV); 31 | } 32 | 33 | inline isl::pw_aff operator*(isl::pw_aff A, isl::val V) { return V * A; } 34 | 35 | inline isl::pw_aff operator*(int i, isl::pw_aff A) { 36 | isl::ctx ctx = A.get_ctx(); 37 | return A * isl::val(ctx, i); 38 | } 39 | 40 | inline isl::pw_aff operator*(isl::pw_aff A, int i) { return i * A; } 41 | 42 | inline isl::val operator*(isl::val A, isl::val B) { 43 | return A.mul(B); 44 | } 45 | 46 | inline isl::pw_aff operator-(isl::pw_aff A, isl::pw_aff B) { return A.sub(B); } 47 | 48 | inline isl::pw_aff operator-(isl::val V, isl::pw_aff A) { 49 | isl::pw_aff AV(A.domain(), V); 50 | return AV - A; 51 | } 52 | 53 | inline isl::pw_aff operator-(isl::pw_aff A, isl::val V) { 54 | isl::pw_aff AV(A.domain(), V); 55 | return A - AV; 56 | } 57 | 58 | inline isl::pw_aff operator-(int i, isl::pw_aff A) { 59 | isl::ctx ctx = A.get_ctx(); 60 | return isl::val(ctx, i) - A; 61 | } 62 | 63 | inline isl::pw_aff operator-(isl::pw_aff A, int i) { 64 | isl::ctx ctx = A.get_ctx(); 65 | return A - isl::val(ctx, i); 66 | } 67 | 68 | inline isl::pw_aff operator/(isl::pw_aff A, isl::pw_aff B) { 69 | return A.tdiv_q(B); 70 | } 71 | 72 | inline isl::pw_aff operator/(isl::val V, isl::pw_aff A) { 73 | isl::pw_aff AV(A.domain(), V); 74 | return AV / A; 75 | } 76 | 77 | inline isl::pw_aff operator/(isl::pw_aff A, isl::val V) { 78 | isl::pw_aff AV(A.domain(), V); 79 | return A / AV; 80 | } 81 | 82 | inline isl::pw_aff operator/(int i, isl::pw_aff A) { 83 | isl::ctx ctx = A.get_ctx(); 84 | return isl::val(ctx, i) / A; 85 | } 86 | 87 | inline isl::pw_aff operator/(isl::pw_aff A, int i) { 88 | isl::ctx ctx = A.get_ctx(); 89 | return A / isl::val(ctx, i); 90 | } 91 | 92 | inline isl::pw_aff operator%(isl::pw_aff A, isl::pw_aff B) { 93 | return A.tdiv_r(B); 94 | } 95 | 96 | inline isl::pw_aff operator%(isl::val V, isl::pw_aff A) { 97 | isl::pw_aff AV(A.domain(), V); 98 | return AV % A; 99 | } 100 | 101 | inline isl::pw_aff operator%(isl::pw_aff A, isl::val V) { 102 | isl::pw_aff AV(A.domain(), V); 103 | return A % AV; 104 | } 105 | 106 | inline isl::pw_aff operator%(int i, isl::pw_aff A) { 107 | isl::ctx ctx = A.get_ctx(); 108 | return isl::val(ctx, i) % A; 109 | } 110 | 111 | inline isl::pw_aff operator%(isl::pw_aff A, int i) { 112 | isl::ctx ctx = A.get_ctx(); 113 | return A % isl::val(ctx, i); 114 | } 115 | 116 | inline isl::set operator==(isl::pw_aff A, int i) { 117 | return A.eq_set(0 * A + i); 118 | } 119 | 120 | inline isl::set operator==(isl::pw_aff A, isl::pw_aff B) { 121 | return A.eq_set(B); 122 | } 123 | 124 | inline isl::set operator>=(isl::pw_aff A, isl::pw_aff B) { 125 | return A.ge_set(B); 126 | } 127 | 128 | inline isl::set operator&&(isl::set A, isl::set B) { 129 | return A.intersect(B); 130 | } 131 | 132 | inline isl::set operator||(isl::set A, isl::set B) { 133 | return A.unite(B); 134 | } 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /tests/BlinkTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 1024 / ElementSize; 16 | 17 | // execute the blink code with the emulator 18 | void emulateBlink(int N1, int N2, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | int Stride = (N2 + CacheLineSize - 1) / CacheLineSize; 21 | auto CL = [&](int i, int j) { return i * Stride + j / CacheLineSize; }; 22 | // define the index to cachline conversion 23 | // run blink 24 | for (int t = 0; t < 4; ++t) { 25 | for (int i = 0; i < N1; i++) 26 | for (int j = 0; j < N2; j++) 27 | // A[N1][N2] = 0.0; 28 | Emulator.accessMemory("S0", TimeStamp, {CL(i, j)}); 29 | for (int i = 0; i < N1; i++) 30 | for (int j = 0; j < N2; j++) 31 | // A[N1][N2] = 1.0; 32 | Emulator.accessMemory("S1", TimeStamp, {CL(i, j)}); 33 | } 34 | } 35 | 36 | class BlinkTest : public ::testing::Test { 37 | protected: 38 | BlinkTest() { 39 | Context_ = isl_ctx_alloc_with_pet_options(); 40 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 41 | 42 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 43 | Base_->compileProgram("./blink.c"); 44 | } 45 | 46 | virtual ~BlinkTest() { 47 | delete Base_; 48 | isl_ctx_free(Context_); 49 | } 50 | 51 | virtual void SetUp() { 52 | // get fresh copy for every test 53 | Model_ = new HayStack(*Base_); 54 | } 55 | 56 | virtual void TearDown() { delete Model_; } 57 | 58 | isl_ctx *Context_; 59 | HayStack *Base_; 60 | HayStack *Model_; 61 | }; 62 | 63 | TEST_F(BlinkTest, CapacityMissesEven) { 64 | // define the setup 65 | int N1 = 32; 66 | int N2 = 16; 67 | int CacheLines = N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 68 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 69 | // emulate the stack distances 70 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 71 | emulateBlink(N1, N2, CacheLineSize, Emulator); 72 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 73 | // compute the stack distances 74 | Model_->initModel(Parameters); 75 | std::map> ComputedCapacityMisses; 76 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 77 | std::string Statement = ComputedCapacityMiss.first; 78 | auto length = Statement.find_first_of("("); 79 | Statement = Statement.substr(0, length); 80 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 81 | } 82 | 83 | // print computed and expected stack distances 84 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 85 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 86 | for (auto Distance : ComputedCapacityMiss.second) 87 | printf("%ld ", Distance); 88 | printf("\n"); 89 | } 90 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 91 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 92 | for (auto Distance : ExpectedCapacityMiss.second) 93 | printf("%ld ", Distance); 94 | printf("\n"); 95 | } 96 | 97 | // make sure the sizes agree 98 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 99 | 100 | // compare the stack distances for all statements 101 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 102 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 103 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 104 | 105 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 106 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 107 | } 108 | } 109 | 110 | TEST_F(BlinkTest, CompulsoryMissesEven) { 111 | // define the setup 112 | int N1 = 32; 113 | int N2 = 16; 114 | int CacheLines = N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 115 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 116 | // emulate the stack distances 117 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 118 | emulateBlink(N1, N2, CacheLineSize, Emulator); 119 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 120 | // compute the stack distances 121 | Model_->initModel(Parameters); 122 | std::map> ComputedCompulsoryMisses; 123 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 124 | std::string Statement = ComputedCompulsoryMiss.first; 125 | auto length = Statement.find_first_of("("); 126 | Statement = Statement.substr(0, length); 127 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 128 | } 129 | 130 | // print computed and expected stack distances 131 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 132 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 133 | for (auto Distance : ComputedCompulsoryMiss.second) 134 | printf("%ld ", Distance); 135 | printf("\n"); 136 | } 137 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 138 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 139 | for (auto Distance : ExpectedCompulsoryMiss.second) 140 | printf("%ld ", Distance); 141 | printf("\n"); 142 | } 143 | 144 | // make sure the sizes agree 145 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 146 | 147 | // compare the stack distances for all statements 148 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 149 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 150 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 151 | 152 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 153 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 154 | } 155 | } 156 | 157 | TEST_F(BlinkTest, CapacityMissesOdd) { 158 | // define the setup 159 | int N1 = 33; 160 | int N2 = 11; 161 | int CacheLines = N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 162 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 163 | // emulate the stack distances 164 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 165 | emulateBlink(N1, N2, CacheLineSize, Emulator); 166 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 167 | // compute the stack distances 168 | Model_->initModel(Parameters); 169 | std::map> ComputedCapacityMisses; 170 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 171 | std::string Statement = ComputedCapacityMiss.first; 172 | auto length = Statement.find_first_of("("); 173 | Statement = Statement.substr(0, length); 174 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 175 | } 176 | 177 | // print computed and expected stack distances 178 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 179 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 180 | for (auto Distance : ComputedCapacityMiss.second) 181 | printf("%ld ", Distance); 182 | printf("\n"); 183 | } 184 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 185 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 186 | for (auto Distance : ExpectedCapacityMiss.second) 187 | printf("%ld ", Distance); 188 | printf("\n"); 189 | } 190 | 191 | // make sure the sizes agree 192 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 193 | 194 | // compare the stack distances for all statements 195 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 196 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 197 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 198 | 199 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 200 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 201 | } 202 | } 203 | 204 | TEST_F(BlinkTest, CompulsoryMissesOdd) { 205 | // define the setup 206 | int N1 = 33; 207 | int N2 = 11; 208 | int CacheLines = N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 209 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 210 | // emulate the stack distances 211 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 212 | emulateBlink(N1, N2, CacheLineSize, Emulator); 213 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 214 | // compute the stack distances 215 | Model_->initModel(Parameters); 216 | std::map> ComputedCompulsoryMisses; 217 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 218 | std::string Statement = ComputedCompulsoryMiss.first; 219 | auto length = Statement.find_first_of("("); 220 | Statement = Statement.substr(0, length); 221 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 222 | } 223 | 224 | // print computed and expected stack distances 225 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 226 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 227 | for (auto Distance : ComputedCompulsoryMiss.second) 228 | printf("%ld ", Distance); 229 | printf("\n"); 230 | } 231 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 232 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 233 | for (auto Distance : ExpectedCompulsoryMiss.second) 234 | printf("%ld ", Distance); 235 | printf("\n"); 236 | } 237 | 238 | // make sure the sizes agree 239 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 240 | 241 | // compare the stack distances for all statements 242 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 243 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 244 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 245 | 246 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 247 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /tests/CacheEmulator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include 6 | 7 | #include "CacheEmulator.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | void CacheEmulator::accessMemory(std::string Statement, int &TimeStamp, std::vector CacheLines) { 14 | // initialize the result if necessary 15 | if (StackDistances_[Statement].size() == 0) 16 | StackDistances_[Statement].resize(CacheLines.size(), 0); 17 | if (CapacityMisses_[Statement].size() == 0) 18 | CapacityMisses_[Statement].resize(CacheLines.size(), 0); 19 | if (CompulsoryMisses_[Statement].size() == 0) 20 | CompulsoryMisses_[Statement].resize(CacheLines.size(), 0); 21 | // process the accesses one after the other 22 | for (int i = 0; i < CacheLines.size(); i++) { 23 | // get the last access 24 | assert(CacheLines[i] < TimeStamps_.size()); 25 | long LastAccess = TimeStamps_[CacheLines[i]]; 26 | // count the number of cache lines touched since the last access 27 | long StackDistance = 0; 28 | for (long j = 0; j < TimeStamps_.size(); ++j) { 29 | if (LastAccess >= 0 && LastAccess <= TimeStamps_[j]) 30 | StackDistance++; 31 | } 32 | // update the time stamp 33 | TimeStamps_[CacheLines[i]] = TimeStamp; 34 | // store the results 35 | if (StackDistance > StackDistances_[Statement][i]) 36 | StackDistances_[Statement][i] = StackDistance; 37 | if (LastAccess == -1) 38 | CompulsoryMisses_[Statement][i]++; 39 | if (StackDistance > CacheSize_) 40 | CapacityMisses_[Statement][i]++; 41 | // increment the time stamp 42 | TimeStamp++; 43 | } 44 | } 45 | 46 | std::map> CacheEmulator::getStackDistances() const { 47 | // remove zero entries 48 | std::map> Results; 49 | std::copy_if(StackDistances_.begin(), StackDistances_.end(), std::inserter(Results, Results.end()), 50 | [](decltype(Results)::value_type const &Statement) { 51 | return *std::max_element(Statement.second.begin(), Statement.second.end()) > 0; 52 | }); 53 | return Results; 54 | } 55 | 56 | std::map> CacheEmulator::getCapacityMisses() const { return CapacityMisses_; } 57 | 58 | std::map> CacheEmulator::getCompulsoryMisses() const { return CompulsoryMisses_; } -------------------------------------------------------------------------------- /tests/CacheEmulator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #ifndef _CHACHE_EMULATOR_H_ 6 | #define _CHACHE_EMULATOR_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "../src/Definitions.h" 13 | 14 | class CacheEmulator { 15 | public: 16 | CacheEmulator() = delete; 17 | CacheEmulator(int CacheLines, int CacheSize) : TimeStamps_(CacheLines, -1), CacheSize_(CacheSize) {} 18 | 19 | void accessMemory(std::string Statement, int &TimeStamp, std::vector CacheLines); 20 | 21 | std::map> getStackDistances() const; 22 | std::map> getCapacityMisses() const; 23 | std::map> getCompulsoryMisses() const; 24 | 25 | private: 26 | int CacheSize_; 27 | // time stamp per cache line 28 | std::vector TimeStamps_; 29 | 30 | // cache information per statement 31 | std::map> StackDistances_; 32 | std::map> CapacityMisses_; 33 | std::map> CompulsoryMisses_; 34 | }; 35 | 36 | #endif -------------------------------------------------------------------------------- /tests/CholeskyTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 1024 / ElementSize; 16 | 17 | // execute cholesky with the emulator 18 | void emulateCholesky(int N, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | // define the index to cachline conversion 21 | int Stride = (N + CacheLineSize - 1) / CacheLineSize; 22 | auto CL = [&](int i, int j) { return i * Stride + j / CacheLineSize; }; 23 | // run cholesky 24 | for (int i = 0; i < N; i++) { 25 | for (int j = 0; j < i; j++) { 26 | for (int k = 0; k < j; k++) { 27 | // A[i][j] -= A[i][k] * A[j][k]; 28 | Emulator.accessMemory("S0", TimeStamp, {CL(i, j), CL(i, k), CL(j, k), CL(i, j)}); 29 | } 30 | // A[i][j] /= A[j][j]; 31 | Emulator.accessMemory("S1", TimeStamp, {CL(i, j), CL(j, j), CL(i, j)}); 32 | } 33 | 34 | for (int k = 0; k < i; k++) { 35 | // A[i][i] -= A[i][k] * A[i][k]; 36 | Emulator.accessMemory("S2", TimeStamp, {CL(i, i), CL(i, k), CL(i, k), CL(i, i)}); 37 | } 38 | // A[i][i] = SQRT_FUN(A[i][i]); 39 | Emulator.accessMemory("S3", TimeStamp, {CL(i, i), CL(i, i)}); 40 | } 41 | } 42 | 43 | class CholeskyTest : public ::testing::Test { 44 | protected: 45 | CholeskyTest() { 46 | Context_ = isl_ctx_alloc_with_pet_options(); 47 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 48 | 49 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 50 | Base_->compileProgram("./cholesky.c"); 51 | } 52 | 53 | virtual ~CholeskyTest() { 54 | delete Base_; 55 | isl_ctx_free(Context_); 56 | } 57 | 58 | virtual void SetUp() { 59 | // get fresh copy for every test 60 | Model_ = new HayStack(*Base_); 61 | } 62 | 63 | virtual void TearDown() { delete Model_; } 64 | 65 | isl_ctx *Context_; 66 | HayStack *Base_; 67 | HayStack *Model_; 68 | }; 69 | 70 | TEST_F(CholeskyTest, CapacityMissesEven) { 71 | // define the setup 72 | int N = 32; 73 | int CacheLines = N * ((N + CacheLineSize - 1) / CacheLineSize); 74 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 75 | // emulate the stack distances 76 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 77 | emulateCholesky(N, CacheLineSize, Emulator); 78 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 79 | // compute the stack distances 80 | Model_->initModel(Parameters); 81 | // 82 | std::map> ComputedCapacityMisses; 83 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 84 | std::string Statement = ComputedCapacityMiss.first; 85 | auto length = Statement.find_first_of("("); 86 | Statement = Statement.substr(0, length); 87 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 88 | } 89 | 90 | // print computed and expected stack distances 91 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 92 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 93 | for (auto Distance : ComputedCapacityMiss.second) 94 | printf("%ld ", Distance); 95 | printf("\n"); 96 | } 97 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 98 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 99 | for (auto Distance : ExpectedCapacityMiss.second) 100 | printf("%ld ", Distance); 101 | printf("\n"); 102 | } 103 | 104 | // make sure the sizes agree 105 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 106 | 107 | // compare the stack distances for all statements 108 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 109 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 110 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 111 | 112 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 113 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 114 | } 115 | } 116 | 117 | TEST_F(CholeskyTest, CompulsoryMissesEven) { 118 | // define the setup 119 | int N = 32; 120 | int CacheLines = N * ((N + CacheLineSize - 1) / CacheLineSize); 121 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 122 | // emulate the stack distances 123 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 124 | emulateCholesky(N, CacheLineSize, Emulator); 125 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 126 | // compute the stack distances 127 | Model_->initModel(Parameters); 128 | // 129 | std::map> ComputedCompulsoryMisses; 130 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 131 | std::string Statement = ComputedCompulsoryMiss.first; 132 | auto length = Statement.find_first_of("("); 133 | Statement = Statement.substr(0, length); 134 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 135 | } 136 | 137 | // print computed and expected stack distances 138 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 139 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 140 | for (auto Distance : ComputedCompulsoryMiss.second) 141 | printf("%ld ", Distance); 142 | printf("\n"); 143 | } 144 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 145 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 146 | for (auto Distance : ExpectedCompulsoryMiss.second) 147 | printf("%ld ", Distance); 148 | printf("\n"); 149 | } 150 | 151 | // make sure the sizes agree 152 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 153 | 154 | // compare the stack distances for all statements 155 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 156 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 157 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 158 | 159 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 160 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 161 | } 162 | } 163 | 164 | TEST_F(CholeskyTest, CapacityMissesOdd) { 165 | // define the setup 166 | int N = 33; 167 | int CacheLines = N * ((N + CacheLineSize - 1) / CacheLineSize); 168 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 169 | // emulate the stack distances 170 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 171 | emulateCholesky(N, CacheLineSize, Emulator); 172 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 173 | // compute the stack distances 174 | Model_->initModel(Parameters); 175 | // 176 | std::map> ComputedCapacityMisses; 177 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 178 | std::string Statement = ComputedCapacityMiss.first; 179 | auto length = Statement.find_first_of("("); 180 | Statement = Statement.substr(0, length); 181 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 182 | } 183 | 184 | // print computed and expected stack distances 185 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 186 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 187 | for (auto Distance : ComputedCapacityMiss.second) 188 | printf("%ld ", Distance); 189 | printf("\n"); 190 | } 191 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 192 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 193 | for (auto Distance : ExpectedCapacityMiss.second) 194 | printf("%ld ", Distance); 195 | printf("\n"); 196 | } 197 | 198 | // make sure the sizes agree 199 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 200 | 201 | // compare the stack distances for all statements 202 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 203 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 204 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 205 | 206 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 207 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 208 | } 209 | } 210 | 211 | TEST_F(CholeskyTest, CompulsoryMissesOdd) { 212 | // define the setup 213 | int N = 33; 214 | int CacheLines = N * ((N + CacheLineSize - 1) / CacheLineSize); 215 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 216 | // emulate the stack distances 217 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 218 | emulateCholesky(N, CacheLineSize, Emulator); 219 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 220 | // compute the stack distances 221 | Model_->initModel(Parameters); 222 | // 223 | std::map> ComputedCompulsoryMisses; 224 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 225 | std::string Statement = ComputedCompulsoryMiss.first; 226 | auto length = Statement.find_first_of("("); 227 | Statement = Statement.substr(0, length); 228 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 229 | } 230 | 231 | // print computed and expected stack distances 232 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 233 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 234 | for (auto Distance : ComputedCompulsoryMiss.second) 235 | printf("%ld ", Distance); 236 | printf("\n"); 237 | } 238 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 239 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 240 | for (auto Distance : ExpectedCompulsoryMiss.second) 241 | printf("%ld ", Distance); 242 | printf("\n"); 243 | } 244 | 245 | // make sure the sizes agree 246 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 247 | 248 | // compare the stack distances for all statements 249 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 250 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 251 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 252 | 253 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 254 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /tests/CopyTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 1024 / ElementSize; 16 | 17 | // execute the copy code with the emulator 18 | void emulateCopy(int N1, int N2, int N3, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | int StrideA = (N3 + CacheLineSize - 1) / CacheLineSize; 21 | int StrideB = (N3 + CacheLineSize - 1) / CacheLineSize; 22 | int StrideT = (N1 + CacheLineSize - 1) / CacheLineSize; 23 | int OffsetB = N1 * N2 * StrideA; 24 | int OffsetT = OffsetB + N1 * N2 * StrideB; 25 | auto CLA = [&](int i, int j, int k) { return i * StrideA * N2 + j * StrideA + k / CacheLineSize; }; 26 | auto CLB = [&](int i, int j, int k) { return OffsetB + i * StrideB * N2 + j * StrideB + k / CacheLineSize; }; 27 | auto CLT = [&](int i, int j, int k) { return OffsetT + i * StrideT * N2 + j * StrideT + k / CacheLineSize; }; 28 | // run copy 29 | for (int i = 0; i < N1; i++) 30 | for (int j = 0; j < N2; j++) 31 | for (int k = 0; k < N3; k++) 32 | // T[k][j][i] = A[i][j][k]; 33 | Emulator.accessMemory("S0", TimeStamp, {CLA(i, j, k), CLT(k, j, i)}); 34 | for (int i = 0; i < N1; i++) 35 | for (int j = 0; j < N2; j++) 36 | for (int k = 0; k < N3; k++) 37 | // B[i][j][k] = T[k][j][i]; 38 | Emulator.accessMemory("S1", TimeStamp, {CLT(k, j, i), CLB(i, j, k)}); 39 | } 40 | 41 | class CopyTest : public ::testing::Test { 42 | protected: 43 | CopyTest() { 44 | Context_ = isl_ctx_alloc_with_pet_options(); 45 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 46 | 47 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 48 | Base_->compileProgram("./copy.c"); 49 | } 50 | 51 | virtual ~CopyTest() { 52 | delete Base_; 53 | isl_ctx_free(Context_); 54 | } 55 | 56 | virtual void SetUp() { 57 | // get fresh copy for every test 58 | Model_ = new HayStack(*Base_); 59 | } 60 | 61 | virtual void TearDown() { delete Model_; } 62 | 63 | isl_ctx *Context_; 64 | HayStack *Base_; 65 | HayStack *Model_; 66 | }; 67 | 68 | TEST_F(CopyTest, CapacityMissEven) { 69 | // define the setup 70 | int N1 = 32; 71 | int N2 = 16; 72 | int N3 = 8; 73 | int CacheLines = 74 | 2 * N1 * N2 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N2 * N3 * ((N1 + CacheLineSize - 1) / CacheLineSize); 75 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 76 | std::make_pair(std::string("N3"), N3)}; 77 | // emulate the stack distances 78 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 79 | emulateCopy(N1, N2, N3, CacheLineSize, Emulator); 80 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 81 | // compute the stack distances 82 | Model_->initModel(Parameters); 83 | // 84 | std::map> ComputedCapacityMisses; 85 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 86 | std::string Statement = ComputedCapacityMiss.first; 87 | auto length = Statement.find_first_of("("); 88 | Statement = Statement.substr(0, length); 89 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 90 | } 91 | 92 | // print computed and expected stack distances 93 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 94 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 95 | for (auto Distance : ComputedCapacityMiss.second) 96 | printf("%ld ", Distance); 97 | printf("\n"); 98 | } 99 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 100 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 101 | for (auto Distance : ExpectedCapacityMiss.second) 102 | printf("%ld ", Distance); 103 | printf("\n"); 104 | } 105 | 106 | // make sure the sizes agree 107 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 108 | 109 | // compare the stack distances for all statements 110 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 111 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 112 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 113 | 114 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 115 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 116 | } 117 | } 118 | 119 | TEST_F(CopyTest, CompulsoryMissEven) { 120 | // define the setup 121 | int N1 = 32; 122 | int N2 = 16; 123 | int N3 = 8; 124 | int CacheLines = 125 | 2 * N1 * N2 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * N2 * ((N1 + CacheLineSize - 1) / CacheLineSize); 126 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 127 | std::make_pair(std::string("N3"), N3)}; 128 | // emulate the stack distances 129 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 130 | emulateCopy(N1, N2, N3, CacheLineSize, Emulator); 131 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 132 | // compute the stack distances 133 | Model_->initModel(Parameters); 134 | // 135 | std::map> ComputedCompulsoryMisses; 136 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 137 | std::string Statement = ComputedCompulsoryMiss.first; 138 | auto length = Statement.find_first_of("("); 139 | Statement = Statement.substr(0, length); 140 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 141 | } 142 | 143 | // print computed and expected stack distances 144 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 145 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 146 | for (auto Distance : ComputedCompulsoryMiss.second) 147 | printf("%ld ", Distance); 148 | printf("\n"); 149 | } 150 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 151 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 152 | for (auto Distance : ExpectedCompulsoryMiss.second) 153 | printf("%ld ", Distance); 154 | printf("\n"); 155 | } 156 | 157 | // make sure the sizes agree 158 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 159 | 160 | // compare the stack distances for all statements 161 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 162 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 163 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 164 | 165 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 166 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 167 | } 168 | } 169 | 170 | TEST_F(CopyTest, CapacityMissOdd) { 171 | // define the setup 172 | int N1 = 11; 173 | int N2 = 22; 174 | int N3 = 33; 175 | int CacheLines = 176 | 2 * N1 * N2 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * N2 * ((N1 + CacheLineSize - 1) / CacheLineSize); 177 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 178 | std::make_pair(std::string("N3"), N3)}; 179 | // emulate the stack distances 180 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 181 | emulateCopy(N1, N2, N3, CacheLineSize, Emulator); 182 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 183 | // compute the stack distances 184 | Model_->initModel(Parameters); 185 | // 186 | std::map> ComputedCapacityMisses; 187 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 188 | std::string Statement = ComputedCapacityMiss.first; 189 | auto length = Statement.find_first_of("("); 190 | Statement = Statement.substr(0, length); 191 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 192 | } 193 | 194 | // print computed and expected stack distances 195 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 196 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 197 | for (auto Distance : ComputedCapacityMiss.second) 198 | printf("%ld ", Distance); 199 | printf("\n"); 200 | } 201 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 202 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 203 | for (auto Distance : ExpectedCapacityMiss.second) 204 | printf("%ld ", Distance); 205 | printf("\n"); 206 | } 207 | 208 | // make sure the sizes agree 209 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 210 | 211 | // compare the stack distances for all statements 212 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 213 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 214 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 215 | 216 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 217 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 218 | } 219 | } 220 | 221 | TEST_F(CopyTest, CompulsoryMissOdd) { 222 | // define the setup 223 | int N1 = 11; 224 | int N2 = 22; 225 | int N3 = 33; 226 | int CacheLines = 227 | 2 * N1 * N2 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * N2 * ((N1 + CacheLineSize - 1) / CacheLineSize); 228 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 229 | std::make_pair(std::string("N3"), N3)}; 230 | // emulate the stack distances 231 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 232 | emulateCopy(N1, N2, N3, CacheLineSize, Emulator); 233 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 234 | // compute the stack distances 235 | Model_->initModel(Parameters); 236 | // 237 | std::map> ComputedCompulsoryMisses; 238 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 239 | std::string Statement = ComputedCompulsoryMiss.first; 240 | auto length = Statement.find_first_of("("); 241 | Statement = Statement.substr(0, length); 242 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 243 | } 244 | 245 | // print computed and expected stack distances 246 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 247 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 248 | for (auto Distance : ComputedCompulsoryMiss.second) 249 | printf("%ld ", Distance); 250 | printf("\n"); 251 | } 252 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 253 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 254 | for (auto Distance : ExpectedCompulsoryMiss.second) 255 | printf("%ld ", Distance); 256 | printf("\n"); 257 | } 258 | 259 | // make sure the sizes agree 260 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 261 | 262 | // compare the stack distances for all statements 263 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 264 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 265 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 266 | 267 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 268 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUNDLED_ISL 2 | ISL_LA = $(top_builddir)/isl/libisl.la 3 | endif 4 | if BUILD_ISL 5 | ISL_LA = $(ISL_BUILDDIR)/libisl.la 6 | endif 7 | 8 | if BUNDLED_BARVINOK 9 | BARVINOK_LA = $(top_builddir)/barvinok/libbarvinok.la 10 | endif 11 | if BUILD_BARVINOK 12 | BARVINOK_LA = $(BARVINOK_BUILDDIR)/libbarvinok.la 13 | endif 14 | 15 | if BUNDLED_PET 16 | PET_LA = $(top_builddir)/pet/libpet.la 17 | endif 18 | if BUILD_PET 19 | PET_LA = $(PET_BUILDDIR)/libpet.la 20 | endif 21 | 22 | if DEBUG 23 | AM_CFLAGS = -g3 -O0 24 | AM_CXXFLAGS = -g3 -O0 25 | else 26 | AM_CFLAGS = -O3 -DNDEBUG 27 | AM_CXXFLAGS = -O3 -DNDEBUG 28 | endif 29 | 30 | FORCE: 31 | ../src/libhaystack.la: FORCE 32 | cd ../src; $(MAKE) $(AM_MAKEFLAGS) libhaystack.la 33 | 34 | AM_CPPFLAGS = @ISL_CFLAGS@ @BARVINOK_CFLAGS@ @PET_CFLAGS@ 35 | LIB_ISL = $(ISL_LA) @ISL_LIBS@ 36 | LIB_BARVINOK = $(BARVINOK_LA) @BARVINOK_LIBS@ 37 | LIB_PET = $(PET_LA) @PET_LIBS@ 38 | 39 | check_LTLIBRARIES = libgtest.la 40 | libgtest_la_SOURCES = ../googletest/googletest/src/gtest-all.cc 41 | libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest 42 | libgtest_la_LDFLAGS = -pthread 43 | 44 | check_PROGRAMS = \ 45 | ToyTest \ 46 | BlinkTest \ 47 | CopyTest \ 48 | CholeskyTest \ 49 | MultiplicationTest \ 50 | StencilTest 51 | 52 | BlinkTest_SOURCES = \ 53 | main.cpp \ 54 | CacheEmulator.cpp \ 55 | BlinkTest.cpp 56 | 57 | BlinkTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 58 | BlinkTest_LDFLAGS = -pthread 59 | BlinkTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 60 | 61 | CopyTest_SOURCES = \ 62 | main.cpp \ 63 | CacheEmulator.cpp \ 64 | CopyTest.cpp 65 | 66 | CopyTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 67 | CopyTest_LDFLAGS = -pthread 68 | CopyTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 69 | 70 | 71 | ToyTest_SOURCES = \ 72 | main.cpp \ 73 | CacheEmulator.cpp \ 74 | ToyTest.cpp 75 | 76 | ToyTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 77 | ToyTest_LDFLAGS = -pthread 78 | ToyTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 79 | 80 | CholeskyTest_SOURCES = \ 81 | main.cpp \ 82 | CacheEmulator.cpp \ 83 | CholeskyTest.cpp 84 | 85 | CholeskyTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 86 | CholeskyTest_LDFLAGS = -pthread 87 | CholeskyTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 88 | 89 | MultiplicationTest_SOURCES = \ 90 | main.cpp \ 91 | CacheEmulator.cpp \ 92 | MultiplicationTest.cpp 93 | 94 | MultiplicationTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 95 | MultiplicationTest_LDFLAGS = -pthread 96 | MultiplicationTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 97 | 98 | StencilTest_SOURCES = \ 99 | main.cpp \ 100 | CacheEmulator.cpp \ 101 | ToyTest.cpp 102 | 103 | StencilTest_CPPFLAGS = $(AM_CPPFLAGS) $(BOOST_CPPFLAGS) -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread 104 | StencilTest_LDFLAGS = -pthread 105 | StencilTest_LDADD = libgtest.la ../src/libhaystack.la $(LIB_ISL) $(LIB_BARVINOK) $(LIB_PET) 106 | 107 | TESTS = ToyTest 108 | TESTS += CopyTest 109 | TESTS += BlinkTest 110 | TESTS += CholeskyTest 111 | TESTS += MultiplicationTest 112 | TESTS += StencilTest -------------------------------------------------------------------------------- /tests/MultiplicationTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 1024 / ElementSize; 16 | 17 | // execute multiplication with the emulator 18 | void emulateMultiplication(int N1, int N2, int N3, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | // define the index to cachline conversion 21 | int StrideA = (N3 + CacheLineSize - 1) / CacheLineSize; 22 | int StrideB = (N2 + CacheLineSize - 1) / CacheLineSize; 23 | int StrideC = (N2 + CacheLineSize - 1) / CacheLineSize; 24 | int OffsetB = N1 * StrideA; 25 | int OffsetC = OffsetB + N3 * StrideB; 26 | auto CLA = [&](int i, int j) { return i * StrideA + j / CacheLineSize; }; 27 | auto CLB = [&](int i, int j) { return OffsetB + i * StrideB + j / CacheLineSize; }; 28 | auto CLC = [&](int i, int j) { return OffsetC + i * StrideC + j / CacheLineSize; }; 29 | // run multiplication 30 | for (int i = 0; i < N1; i++) { 31 | for (int j = 0; j < N2; j++) 32 | // C[i][j] *= beta; 33 | Emulator.accessMemory("S0", TimeStamp, {CLC(i, j), CLC(i, j)}); 34 | for (int k = 0; k < N3; k++) { 35 | for (int j = 0; j < N2; j++) 36 | // C[i][j] += alpha * A[i][k] * B[k][j]; 37 | Emulator.accessMemory("S1", TimeStamp, {CLC(i, j), CLA(i, k), CLB(k, j), CLC(i, j)}); 38 | } 39 | } 40 | } 41 | 42 | class MultiplicationTest : public ::testing::Test { 43 | protected: 44 | MultiplicationTest() { 45 | Context_ = isl_ctx_alloc_with_pet_options(); 46 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 47 | 48 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 49 | Base_->compileProgram("./multiplication.c"); 50 | } 51 | 52 | virtual ~MultiplicationTest() { 53 | delete Base_; 54 | isl_ctx_free(Context_); 55 | } 56 | 57 | virtual void SetUp() { 58 | // get fresh copy for every test 59 | Model_ = new HayStack(*Base_); 60 | } 61 | 62 | virtual void TearDown() { delete Model_; } 63 | 64 | isl_ctx *Context_; 65 | HayStack *Base_; 66 | HayStack *Model_; 67 | }; 68 | 69 | TEST_F(MultiplicationTest, CapacityMissesEven) { 70 | // define the setup 71 | int N1 = 32; 72 | int N2 = 16; 73 | int N3 = 24; 74 | int CacheLines = N1 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * ((N2 + CacheLineSize - 1) / CacheLineSize) + 75 | N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 76 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 77 | std::make_pair(std::string("N3"), N3)}; 78 | // emulate the stack distances 79 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 80 | emulateMultiplication(N1, N2, N3, CacheLineSize, Emulator); 81 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 82 | // compute the stack distances 83 | Model_->initModel(Parameters); 84 | // 85 | std::map> ComputedCapacityMisses; 86 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 87 | std::string Statement = ComputedCapacityMiss.first; 88 | auto length = Statement.find_first_of("("); 89 | Statement = Statement.substr(0, length); 90 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 91 | } 92 | 93 | // print computed and expected stack distances 94 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 95 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 96 | for (auto Distance : ComputedCapacityMiss.second) 97 | printf("%ld ", Distance); 98 | printf("\n"); 99 | } 100 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 101 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 102 | for (auto Distance : ExpectedCapacityMiss.second) 103 | printf("%ld ", Distance); 104 | printf("\n"); 105 | } 106 | 107 | // make sure the sizes agree 108 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 109 | 110 | // compare the stack distances for all statements 111 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 112 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 113 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 114 | 115 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 116 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 117 | } 118 | } 119 | 120 | TEST_F(MultiplicationTest, CompulsoryMissesEven) { 121 | // define the setup 122 | int N1 = 32; 123 | int N2 = 16; 124 | int N3 = 24; 125 | int CacheLines = N1 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * ((N2 + CacheLineSize - 1) / CacheLineSize) + 126 | N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 127 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 128 | std::make_pair(std::string("N3"), N3)}; 129 | // emulate the stack distances 130 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 131 | emulateMultiplication(N1, N2, N3, CacheLineSize, Emulator); 132 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 133 | // compute the stack distances 134 | Model_->initModel(Parameters); 135 | // 136 | std::map> ComputedCompulsoryMisses; 137 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 138 | std::string Statement = ComputedCompulsoryMiss.first; 139 | auto length = Statement.find_first_of("("); 140 | Statement = Statement.substr(0, length); 141 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 142 | } 143 | 144 | // print computed and expected stack distances 145 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 146 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 147 | for (auto Distance : ComputedCompulsoryMiss.second) 148 | printf("%ld ", Distance); 149 | printf("\n"); 150 | } 151 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 152 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 153 | for (auto Distance : ExpectedCompulsoryMiss.second) 154 | printf("%ld ", Distance); 155 | printf("\n"); 156 | } 157 | 158 | // make sure the sizes agree 159 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 160 | 161 | // compare the stack distances for all statements 162 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 163 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 164 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 165 | 166 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 167 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 168 | } 169 | } 170 | 171 | TEST_F(MultiplicationTest, CapacityMissesOdd) { 172 | // define the setup 173 | int N1 = 33; 174 | int N2 = 11; 175 | int N3 = 22; 176 | int CacheLines = N1 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * ((N2 + CacheLineSize - 1) / CacheLineSize) + 177 | N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 178 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 179 | std::make_pair(std::string("N3"), N3)}; 180 | // emulate the stack distances 181 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 182 | emulateMultiplication(N1, N2, N3, CacheLineSize, Emulator); 183 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 184 | // compute the stack distances 185 | Model_->initModel(Parameters); 186 | // 187 | std::map> ComputedCapacityMisses; 188 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 189 | std::string Statement = ComputedCapacityMiss.first; 190 | auto length = Statement.find_first_of("("); 191 | Statement = Statement.substr(0, length); 192 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 193 | } 194 | 195 | // print computed and expected stack distances 196 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 197 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 198 | for (auto Distance : ComputedCapacityMiss.second) 199 | printf("%ld ", Distance); 200 | printf("\n"); 201 | } 202 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 203 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 204 | for (auto Distance : ExpectedCapacityMiss.second) 205 | printf("%ld ", Distance); 206 | printf("\n"); 207 | } 208 | 209 | // make sure the sizes agree 210 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 211 | 212 | // compare the stack distances for all statements 213 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 214 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 215 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 216 | 217 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 218 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 219 | } 220 | } 221 | 222 | TEST_F(MultiplicationTest, CompulsoryMissesOdd) { 223 | // define the setup 224 | int N1 = 33; 225 | int N2 = 11; 226 | int N3 = 22; 227 | int CacheLines = N1 * ((N3 + CacheLineSize - 1) / CacheLineSize) + N3 * ((N2 + CacheLineSize - 1) / CacheLineSize) + 228 | N1 * ((N2 + CacheLineSize - 1) / CacheLineSize); 229 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2), 230 | std::make_pair(std::string("N3"), N3)}; 231 | // emulate the stack distances 232 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 233 | emulateMultiplication(N1, N2, N3, CacheLineSize, Emulator); 234 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 235 | // compute the stack distances 236 | Model_->initModel(Parameters); 237 | // 238 | std::map> ComputedCompulsoryMisses; 239 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 240 | std::string Statement = ComputedCompulsoryMiss.first; 241 | auto length = Statement.find_first_of("("); 242 | Statement = Statement.substr(0, length); 243 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 244 | } 245 | 246 | // print computed and expected stack distances 247 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 248 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 249 | for (auto Distance : ComputedCompulsoryMiss.second) 250 | printf("%ld ", Distance); 251 | printf("\n"); 252 | } 253 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 254 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 255 | for (auto Distance : ExpectedCompulsoryMiss.second) 256 | printf("%ld ", Distance); 257 | printf("\n"); 258 | } 259 | 260 | // make sure the sizes agree 261 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 262 | 263 | // compare the stack distances for all statements 264 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 265 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 266 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 267 | 268 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 269 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /tests/StencilTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 1024 / ElementSize; 16 | 17 | // execute stencil with the emulator 18 | void emulateStencil(int N1, int N2, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | // define the index to cachline conversion 21 | int Stride = (N2 + CacheLineSize - 1) / CacheLineSize; 22 | int Offset = N2 * Stride; 23 | auto CLA = [&](int i, int j) { return i * Stride + j / CacheLineSize; }; 24 | auto CLB = [&](int i, int j) { return Offset + i * Stride + j / CacheLineSize; }; 25 | // run stencil 26 | for (int t = 0; t < N1; t++) { 27 | for (int i = 1; i < N2 - 1; i++) 28 | for (int j = 1; j < N2 - 1; j++) 29 | // B[i][j] = 0.2 * (A[i][j] + A[i][j - 1] + A[i][1 + j] + A[1 + i][j] + A[i - 1][j]); 30 | Emulator.accessMemory("S0", TimeStamp, 31 | {CLA(i, j), CLA(i, j - 1), CLA(i, j + 1), CLA(i + 1, j), CLA(i - 1, j), CLB(i, j)}); 32 | for (int i = 1; i < N2 - 1; i++) 33 | for (int j = 1; j < N2 - 1; j++) 34 | // A[i][j] = 0.2 * (B[i][j] + B[i][j - 1] + B[i][1 + j] + B[1 + i][j] + B[i - 1][j]); 35 | Emulator.accessMemory("S1", TimeStamp, 36 | {CLB(i, j), CLB(i, j - 1), CLB(i, j + 1), CLB(i + 1, j), CLB(i - 1, j), CLA(i, j)}); 37 | } 38 | } 39 | 40 | class StencilTest : public ::testing::Test { 41 | protected: 42 | StencilTest() { 43 | Context_ = isl_ctx_alloc_with_pet_options(); 44 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 45 | 46 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 47 | Base_->compileProgram("./stencil.c"); 48 | } 49 | 50 | virtual ~StencilTest() { 51 | delete Base_; 52 | isl_ctx_free(Context_); 53 | } 54 | 55 | virtual void SetUp() { 56 | // get fresh copy for every test 57 | Model_ = new HayStack(*Base_); 58 | } 59 | 60 | virtual void TearDown() { delete Model_; } 61 | 62 | isl_ctx *Context_; 63 | HayStack *Base_; 64 | HayStack *Model_; 65 | }; 66 | 67 | TEST_F(StencilTest, CapacityMissEven) { 68 | // define the setup 69 | int N1 = 2; 70 | int N2 = 32; 71 | int CacheLines = 2 * N2 * ((N2 + CacheLineSize - 1) / CacheLineSize); 72 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 73 | // emulate the stack distances 74 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 75 | emulateStencil(N1, N2, CacheLineSize, Emulator); 76 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 77 | // compute the stack distances 78 | Model_->initModel(Parameters); 79 | // 80 | std::map> ComputedCapacityMisses; 81 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 82 | std::string Statement = ComputedCapacityMiss.first; 83 | auto length = Statement.find_first_of("("); 84 | Statement = Statement.substr(0, length); 85 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 86 | } 87 | 88 | // print computed and expected stack distances 89 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 90 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 91 | for (auto Distance : ComputedCapacityMiss.second) 92 | printf("%ld ", Distance); 93 | printf("\n"); 94 | } 95 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 96 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 97 | for (auto Distance : ExpectedCapacityMiss.second) 98 | printf("%ld ", Distance); 99 | printf("\n"); 100 | } 101 | 102 | // make sure the sizes agree 103 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 104 | 105 | // compare the stack distances for all statements 106 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 107 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 108 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 109 | 110 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 111 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 112 | } 113 | } 114 | 115 | TEST_F(StencilTest, CompulsoryMissEven) { 116 | // define the setup 117 | int N1 = 2; 118 | int N2 = 32; 119 | int CacheLines = 2 * N2 * ((N2 + CacheLineSize - 1) / CacheLineSize); 120 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 121 | // emulate the stack distances 122 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 123 | emulateStencil(N1, N2, CacheLineSize, Emulator); 124 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 125 | // compute the stack distances 126 | Model_->initModel(Parameters); 127 | // 128 | std::map> ComputedCompulsoryMisses; 129 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 130 | std::string Statement = ComputedCompulsoryMiss.first; 131 | auto length = Statement.find_first_of("("); 132 | Statement = Statement.substr(0, length); 133 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 134 | } 135 | 136 | // print computed and expected stack distances 137 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 138 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 139 | for (auto Distance : ComputedCompulsoryMiss.second) 140 | printf("%ld ", Distance); 141 | printf("\n"); 142 | } 143 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 144 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 145 | for (auto Distance : ExpectedCompulsoryMiss.second) 146 | printf("%ld ", Distance); 147 | printf("\n"); 148 | } 149 | 150 | // make sure the sizes agree 151 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 152 | 153 | // compare the stack distances for all statements 154 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 155 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 156 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 157 | 158 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 159 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 160 | } 161 | } 162 | 163 | TEST_F(StencilTest, CapacityMissOdd) { 164 | // define the setup 165 | int N1 = 2; 166 | int N2 = 33; 167 | int CacheLines = 2 * N2 * ((N2 + CacheLineSize - 1) / CacheLineSize); 168 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 169 | // emulate the stack distances 170 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 171 | emulateStencil(N1, N2, CacheLineSize, Emulator); 172 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 173 | // compute the stack distances 174 | Model_->initModel(Parameters); 175 | // 176 | std::map> ComputedCapacityMisses; 177 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 178 | std::string Statement = ComputedCapacityMiss.first; 179 | auto length = Statement.find_first_of("("); 180 | Statement = Statement.substr(0, length); 181 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 182 | } 183 | 184 | // print computed and expected stack distances 185 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 186 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 187 | for (auto Distance : ComputedCapacityMiss.second) 188 | printf("%ld ", Distance); 189 | printf("\n"); 190 | } 191 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 192 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 193 | for (auto Distance : ExpectedCapacityMiss.second) 194 | printf("%ld ", Distance); 195 | printf("\n"); 196 | } 197 | 198 | // make sure the sizes agree 199 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 200 | 201 | // compare the stack distances for all statements 202 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 203 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 204 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 205 | 206 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 207 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 208 | } 209 | } 210 | 211 | TEST_F(StencilTest, CompulsoryMissOdd) { 212 | // define the setup 213 | int N1 = 2; 214 | int N2 = 33; 215 | int CacheLines = 2 * N2 * ((N2 + CacheLineSize - 1) / CacheLineSize); 216 | std::vector Parameters = {std::make_pair(std::string("N1"), N1), std::make_pair(std::string("N2"), N2)}; 217 | // emulate the stack distances 218 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 219 | emulateStencil(N1, N2, CacheLineSize, Emulator); 220 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 221 | // compute the stack distances 222 | Model_->initModel(Parameters); 223 | // 224 | std::map> ComputedCompulsoryMisses; 225 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 226 | std::string Statement = ComputedCompulsoryMiss.first; 227 | auto length = Statement.find_first_of("("); 228 | Statement = Statement.substr(0, length); 229 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 230 | } 231 | 232 | // print computed and expected stack distances 233 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 234 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 235 | for (auto Distance : ComputedCompulsoryMiss.second) 236 | printf("%ld ", Distance); 237 | printf("\n"); 238 | } 239 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 240 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 241 | for (auto Distance : ExpectedCompulsoryMiss.second) 242 | printf("%ld ", Distance); 243 | printf("\n"); 244 | } 245 | 246 | // make sure the sizes agree 247 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 248 | 249 | // compare the stack distances for all statements 250 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 251 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 252 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 253 | 254 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 255 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /tests/ToyTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | #include "../src/HayStack.h" 10 | #include "CacheEmulator.h" 11 | 12 | // test setup 13 | const int ElementSize = 4; 14 | const int CacheLineSize = 16 / ElementSize; 15 | const int CacheSize = 256 / ElementSize; 16 | 17 | // execute the toy code with the emulator 18 | void emulateToy(int N, int CacheLineSize, CacheEmulator &Emulator) { 19 | int TimeStamp = 0; 20 | // define the index to cachline conversion 21 | auto CL = [&](int i) { return i / CacheLineSize; }; 22 | // run toy 23 | for (int i = 0; i < N; i++) { 24 | // A[i] = 0; 25 | Emulator.accessMemory("S0", TimeStamp, {CL(i)}); 26 | // A[N-1-i] = 1; 27 | Emulator.accessMemory("S1", TimeStamp, {CL(N - 1 - i)}); 28 | if (i < N / 2) 29 | // A[2*i] = 2; 30 | Emulator.accessMemory("S2", TimeStamp, {CL(2 * i)}); 31 | } 32 | } 33 | 34 | class ToyTest : public ::testing::Test { 35 | protected: 36 | ToyTest() { 37 | Context_ = isl_ctx_alloc_with_pet_options(); 38 | isl_options_set_on_error(Context_, ISL_ON_ERROR_ABORT); 39 | 40 | Base_ = new HayStack(Context_, {CacheLineSize * ElementSize, {CacheSize * ElementSize}}, {true}); 41 | Base_->compileProgram("./toy.c"); 42 | } 43 | 44 | virtual ~ToyTest() { 45 | delete Base_; 46 | isl_ctx_free(Context_); 47 | } 48 | 49 | virtual void SetUp() { 50 | // get fresh copy for every test 51 | Model_ = new HayStack(*Base_); 52 | } 53 | 54 | virtual void TearDown() { delete Model_; } 55 | 56 | isl_ctx *Context_; 57 | HayStack *Base_; 58 | HayStack *Model_; 59 | }; 60 | 61 | TEST_F(ToyTest, CapacityMissEven) { 62 | // define the setup 63 | int N = 128; 64 | int CacheLines = (N + CacheLineSize - 1) / CacheLineSize; 65 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 66 | // emulate the stack distances 67 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 68 | emulateToy(N, CacheLineSize, Emulator); 69 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 70 | // compute the stack distances 71 | Model_->initModel(Parameters); 72 | // 73 | std::map> ComputedCapacityMisses; 74 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 75 | std::string Statement = ComputedCapacityMiss.first; 76 | auto length = Statement.find_first_of("("); 77 | Statement = Statement.substr(0, length); 78 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 79 | } 80 | 81 | // print computed and expected stack distances 82 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 83 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 84 | for (auto Distance : ComputedCapacityMiss.second) 85 | printf("%ld ", Distance); 86 | printf("\n"); 87 | } 88 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 89 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 90 | for (auto Distance : ExpectedCapacityMiss.second) 91 | printf("%ld ", Distance); 92 | printf("\n"); 93 | } 94 | 95 | // make sure the sizes agree 96 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 97 | 98 | // compare the stack distances for all statements 99 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 100 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 101 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 102 | 103 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 104 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 105 | } 106 | } 107 | 108 | TEST_F(ToyTest, CompulsoryMissEven) { 109 | // define the setup 110 | int N = 128; 111 | int CacheLines = (N + CacheLineSize - 1) / CacheLineSize; 112 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 113 | // emulate the stack distances 114 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 115 | emulateToy(N, CacheLineSize, Emulator); 116 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 117 | // compute the stack distances 118 | Model_->initModel(Parameters); 119 | // 120 | std::map> ComputedCompulsoryMisses; 121 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 122 | std::string Statement = ComputedCompulsoryMiss.first; 123 | auto length = Statement.find_first_of("("); 124 | Statement = Statement.substr(0, length); 125 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 126 | } 127 | 128 | // print computed and expected stack distances 129 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 130 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 131 | for (auto Distance : ComputedCompulsoryMiss.second) 132 | printf("%ld ", Distance); 133 | printf("\n"); 134 | } 135 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 136 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 137 | for (auto Distance : ExpectedCompulsoryMiss.second) 138 | printf("%ld ", Distance); 139 | printf("\n"); 140 | } 141 | 142 | // make sure the sizes agree 143 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 144 | 145 | // compare the stack distances for all statements 146 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 147 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 148 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 149 | 150 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 151 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 152 | } 153 | } 154 | 155 | TEST_F(ToyTest, CapacityMissOdd) { 156 | // define the setup 157 | int N = 177; 158 | int CacheLines = (N + CacheLineSize - 1) / CacheLineSize; 159 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 160 | // emulate the stack distances 161 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 162 | emulateToy(N, CacheLineSize, Emulator); 163 | auto ExpectedCapacityMisses = Emulator.getCapacityMisses(); 164 | // compute the stack distances 165 | Model_->initModel(Parameters); 166 | // 167 | std::map> ComputedCapacityMisses; 168 | for (auto ComputedCapacityMiss : Model_->countCacheMisses()) { 169 | std::string Statement = ComputedCapacityMiss.first; 170 | auto length = Statement.find_first_of("("); 171 | Statement = Statement.substr(0, length); 172 | ComputedCapacityMisses[Statement].push_back(ComputedCapacityMiss.second.CapacityMisses[0]); 173 | } 174 | 175 | // print computed and expected stack distances 176 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 177 | printf("Computed %s -> ", ComputedCapacityMiss.first.c_str()); 178 | for (auto Distance : ComputedCapacityMiss.second) 179 | printf("%ld ", Distance); 180 | printf("\n"); 181 | } 182 | for (auto ExpectedCapacityMiss : ExpectedCapacityMisses) { 183 | printf("Expected %s -> ", ExpectedCapacityMiss.first.c_str()); 184 | for (auto Distance : ExpectedCapacityMiss.second) 185 | printf("%ld ", Distance); 186 | printf("\n"); 187 | } 188 | 189 | // make sure the sizes agree 190 | ASSERT_EQ(ExpectedCapacityMisses.size(), ComputedCapacityMisses.size()); 191 | 192 | // compare the stack distances for all statements 193 | for (auto ComputedCapacityMiss : ComputedCapacityMisses) { 194 | auto ExpectedCapacityMiss = ExpectedCapacityMisses[ComputedCapacityMiss.first]; 195 | ASSERT_EQ(ExpectedCapacityMiss.size(), ComputedCapacityMiss.second.size()); 196 | 197 | for (int i = 0; i < ComputedCapacityMiss.second.size(); ++i) 198 | EXPECT_EQ(ExpectedCapacityMiss[i], ComputedCapacityMiss.second[i]); 199 | } 200 | } 201 | 202 | TEST_F(ToyTest, CompulsoryMissOdd) { 203 | // define the setup 204 | int N = 177; 205 | int CacheLines = (N + CacheLineSize - 1) / CacheLineSize; 206 | std::vector Parameters = {std::make_pair(std::string("N"), N)}; 207 | // emulate the stack distances 208 | CacheEmulator Emulator(CacheLines, CacheSize / CacheLineSize); 209 | emulateToy(N, CacheLineSize, Emulator); 210 | auto ExpectedCompulsoryMisses = Emulator.getCompulsoryMisses(); 211 | // compute the stack distances 212 | Model_->initModel(Parameters); 213 | // 214 | std::map> ComputedCompulsoryMisses; 215 | for (auto ComputedCompulsoryMiss : Model_->countCacheMisses()) { 216 | std::string Statement = ComputedCompulsoryMiss.first; 217 | auto length = Statement.find_first_of("("); 218 | Statement = Statement.substr(0, length); 219 | ComputedCompulsoryMisses[Statement].push_back(ComputedCompulsoryMiss.second.CompulsoryMisses); 220 | } 221 | 222 | // print computed and expected stack distances 223 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 224 | printf("Computed %s -> ", ComputedCompulsoryMiss.first.c_str()); 225 | for (auto Distance : ComputedCompulsoryMiss.second) 226 | printf("%ld ", Distance); 227 | printf("\n"); 228 | } 229 | for (auto ExpectedCompulsoryMiss : ExpectedCompulsoryMisses) { 230 | printf("Expected %s -> ", ExpectedCompulsoryMiss.first.c_str()); 231 | for (auto Distance : ExpectedCompulsoryMiss.second) 232 | printf("%ld ", Distance); 233 | printf("\n"); 234 | } 235 | 236 | // make sure the sizes agree 237 | ASSERT_EQ(ExpectedCompulsoryMisses.size(), ComputedCompulsoryMisses.size()); 238 | 239 | // compare the stack distances for all statements 240 | for (auto ComputedCompulsoryMiss : ComputedCompulsoryMisses) { 241 | auto ExpectedCompulsoryMiss = ExpectedCompulsoryMisses[ComputedCompulsoryMiss.first]; 242 | ASSERT_EQ(ExpectedCompulsoryMiss.size(), ComputedCompulsoryMiss.second.size()); 243 | 244 | for (int i = 0; i < ComputedCompulsoryMiss.second.size(); ++i) 245 | EXPECT_EQ(ExpectedCompulsoryMiss[i], ComputedCompulsoryMiss.second[i]); 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /tests/inputs/blink.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N1, N2; 3 | float A[N1][N2]; 4 | 5 | #pragma scop 6 | for (int t = 0; t < 4; ++t) { 7 | for (int i = 0; i < N1; i++) 8 | for (int j = 0; j < N2; j++) 9 | S0: A[i][j] = 0.0; 10 | for (int i = 0; i < N1; i++) 11 | for (int j = 0; j < N2; j++) 12 | S1: A[i][j] = 1.0; 13 | } 14 | #pragma endscop 15 | } 16 | -------------------------------------------------------------------------------- /tests/inputs/cholesky.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N; 3 | float A[N][N]; 4 | 5 | #pragma scop 6 | for (int i = 0; i < N; i++) { 7 | for (int j = 0; j < i; j++) { 8 | for (int k = 0; k < j; k++) { 9 | S0: A[i][j] -= A[i][k] * A[j][k]; 10 | } 11 | S1: A[i][j] /= A[j][j]; 12 | } 13 | for (int k = 0; k < i; k++) { 14 | S2: A[i][i] -= A[i][k] * A[i][k]; 15 | } 16 | S3: A[i][i] = A[i][i]; 17 | } 18 | #pragma endscop 19 | } 20 | -------------------------------------------------------------------------------- /tests/inputs/copy.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N1, N2, N3; 3 | float A[N1][N2][N3]; 4 | float B[N1][N2][N3]; 5 | float T[N3][N2][N1]; 6 | 7 | #pragma scop 8 | for (int i = 0; i < N1; i++) 9 | for (int j = 0; j < N2; j++) 10 | for (int k = 0; k < N3; k++) 11 | S0: T[k][j][i] = A[i][j][k]; 12 | for (int i = 0; i < N1; i++) 13 | for (int j = 0; j < N2; j++) 14 | for (int k = 0; k < N3; k++) 15 | S1: B[i][j][k] = T[k][j][i]; 16 | #pragma endscop 17 | } 18 | -------------------------------------------------------------------------------- /tests/inputs/multiplication.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N1, N2, N3; 3 | float A[N1][N3]; 4 | float B[N3][N2]; 5 | float C[N1][N2]; 6 | float alpha, beta, tmp; 7 | 8 | #pragma scop 9 | for (int i = 0; i < N1; i++) { 10 | for (int j = 0; j < N2; j++) 11 | S0: C[i][j] *= beta; 12 | for (int k = 0; k < N3; k++) 13 | for (int j = 0; j < N2; j++) { 14 | S1: C[i][j] += alpha * A[i][k] * B[k][j]; 15 | } 16 | } 17 | #pragma endscop 18 | } -------------------------------------------------------------------------------- /tests/inputs/stencil.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N1, N2; 3 | float A[N2][N2]; 4 | float B[N2][N2]; 5 | 6 | #pragma scop 7 | for (int t = 0; t < N1; t++) { 8 | for (int i = 1; i < N2 - 1; i++) 9 | for (int j = 1; j < N2 - 1; j++) 10 | S0: B[i][j] = 0.2 * (A[i][j] + A[i][j - 1] + A[i][1 + j] + A[1 + i][j] + A[i - 1][j]); 11 | for (int i = 1; i < N2 - 1; i++) 12 | for (int j = 1; j < N2 - 1; j++) 13 | S1: A[i][j] = 0.2 * (B[i][j] + B[i][j - 1] + B[i][1 + j] + B[1 + i][j] + B[i - 1][j]); 14 | } 15 | #pragma endscop 16 | } -------------------------------------------------------------------------------- /tests/inputs/toy.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int N; 3 | float A[N]; 4 | 5 | #pragma scop 6 | for (int i = 0; i < N; i++) { 7 | S0: A[i] = 0; 8 | S1: A[N-1-i] = 1; 9 | if (i < N/2) 10 | S2: A[2*i] = 2; 11 | } 12 | #pragma endscop 13 | } 14 | -------------------------------------------------------------------------------- /tests/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, ETH Zurich 3 | */ 4 | 5 | #include "gtest/gtest.h" 6 | 7 | int main(int argc, char **argv) { 8 | ::testing::InitGoogleTest(&argc, argv); 9 | return RUN_ALL_TESTS(); 10 | } --------------------------------------------------------------------------------