├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── Makefile.in ├── NEWS ├── README ├── aclocal.m4 ├── build-aux ├── ar-lib ├── compile ├── config.guess ├── config.sub ├── depcomp ├── install-sh ├── libtool.m4 ├── ltmain.sh ├── ltoptions.m4 ├── ltsugar.m4 ├── ltversion.m4 ├── lt~obsolete.m4 ├── missing ├── pkg.m4 └── test-driver ├── config.h.in ├── configure ├── configure.ac ├── notes └── func-list.txt ├── results ├── memcpybw-memsync.out ├── memcpybw-mhelper.out └── nullker-mhelper.out ├── scripts ├── Makefile.am ├── Makefile.in ├── lib64 │ ├── libcudart.so │ ├── libcudart.so.6.5 │ ├── rCUDAcommIB.so │ └── rCUDAcommTCP.so ├── mrcudaexec.py.template └── plotters │ └── overhead.py ├── src ├── Makefile.am ├── Makefile.in ├── comm.c ├── comm.h ├── common.h ├── datatypes.h ├── intercomm.c ├── intercomm.h ├── intercomm_interface.c ├── intercomm_interface.h ├── intercomm_mem.c ├── intercomm_mem.h ├── interface.c ├── mhelper.c ├── mrcuda.c ├── mrcuda.h ├── record.c └── record.h └── tests ├── Makefile.am ├── Makefile.in ├── check_comm.c ├── check_record.c └── progs ├── benchmark.memcpybw.sh ├── benchmark.nullker.cudamemcpy.sh ├── cudamemcpy.cu ├── gpuaddr.cu ├── hello.cu ├── hellomul.cu ├── matmul_mul.cu ├── matmul_par.cu ├── memcpybw.cu ├── multigpuaddr.cu ├── nullker.cu └── thread_dev.cu /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.swp 3 | *.swo 4 | src/libcudart.so.5.0 5 | lib64/* 6 | build/* 7 | autom4te.cache 8 | *~ 9 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Pak Markthub the creator of this project. 2 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2016-09-28 Pak Markthub 2 | 3 | * all git log before the creation of this ChangeLog 4 | c617dd9 (HEAD -> installation, origin/installation) Create libcudart.so.7.0 symlink when installing mrCUDA 5 | c4ae1ca Regenerate all make and configuration scripts on Paris, hopefully it will work fine with other systems 6 | 1cdc3ea Add missing files 7 | 6b6cf38 Modify related files in the installation process 8 | c01849a Add missing files necessary for configure and make 9 | f1508e4 Add the missing config.h.in 10 | 0bcd211 Add the missing aclocal.m4 11 | c45bd5a Make the generation of the linked filenames of rCUDAcomm*.so more generic 12 | cdf5ed4 Create links to rCUDAcommIB.so and rCUDAcommTCP.so when installing mrCUDA 13 | cb954e8 Make now auto-generates correct mrcudaexec 14 | 77893c0 Check for python2.7 in configure 15 | d396f9d Add options for manually specifying NVIDIA's libcudart and nvcc 16 | 0513643 Detect the installation path of rCUDA's libcudart.so, rCUDAcommIB.so, and rCUDAcommTCP.so 17 | 332f4c9 Use absolute path for nvcc after checking 18 | b838985 Add checking for nvcc 19 | cf9a96f Create configure and its supported files 20 | 3913e41 (origin/multi-gpu, origin/master, origin/HEAD, github/master, multi-gpu, master) Change the labels of memsync plotter. 21 | 7e799ad Modify code so that it can run on Paris and matrixMul and vectorAdd can use mrCUDA 22 | aee10f1 Change the font size of some figures 23 | 6bc4b07 Add benchmark scripts and programs for mrCUDA's overhead 24 | 4bda4d0 Add manual profiling 25 | bcc42b6 Change many labels' sizes 26 | b2ff7c0 Add plot_record_replay to the overhead.py 27 | df634e0 Add plot_mhelper_memcpybw to the overhead.py 28 | ab7707e Add plot_mhelper_nullker to overhead.py 29 | 151283b Get multi-GPU migration benchmark's results 30 | 23dba8e Change the legend size in memsync-bw plot 31 | 801d4a4 Implement memsync-bw plot in the overhead.py 32 | 87ed363 Get memcpybw-memsync benchmark result 33 | 4fa2a15 Add a plotter overhead.py 34 | 2beb2e4 Remove cudaMemcpy and cudaMemcpyToSymbol profiling 35 | 1709ea6 Add manual profiling 36 | a7f915a Fix mhelper does not exit when the main program exited 37 | 342930e Fix cudaLaunch error bugs 38 | 64ecbcb Fix cudaMemcpy bugs in mhelper 39 | 544ee52 Fix mhelper does not set device bugs 40 | 8c79e14 Fix mhelper communication bugs 41 | 5473461 Fix deadlock in cudaSetDevice 42 | d7a6ad7 Implement mhelper.c 43 | 2ee9d72 Implement intercomm_interface.c 44 | 9ab3d67 Implement some interfaces in intercomm_interface 45 | f5f261e Fix runtime error when using switching for single GPU case 46 | aa2abf4 Fix runtime errors when using only rCUDA or native 47 | eddf55e Fix compliation errors 48 | 32bb9b7 Refactor code to support multi-gpu migration 49 | b40b53a Implement __cudaRegisterFatBinary in mhelper 50 | 5cab822 Partially implement mhelper 51 | fff4bf4 Implement intercomm.c 52 | b1dff8a Partially implement intercomm module 53 | 7c32128 Implement intercomm_mem 54 | 3263bce Roughly define data structures and functions 55 | 5274e9e Merge branch 'rcuda-5.0' 56 | de3d5b2 (origin/rcuda-5.0) Implement multi-GPU matmul 57 | 6b13153 Fix multiple reports of the total sizes of cudaMemcpy and cudaMemcpyToSymbol 58 | 65c10b3 Add cudaMemcpy and cudaMemcpyToSymbol profiling 59 | facce7e Add mrcuda_record time 60 | fde0ad0 Remove cudaMemcpyToSymbol replay and use sync symbol instead 61 | 50bdf0a Include mrcuda_replay_cudaMemcpyToSymbol to mrcuda_sync_mem profile 62 | 0aa19c9 Implement manual profiling 63 | 325623b Add mrcudaRecordCache 64 | 953ae29 Implement mrCUDAExec 65 | 7db0609 Add MRCUDA_SWITCH_THRESHOLD support for testing purpose 66 | c7ddadf Add cudaSetDeviceFlags support 67 | a1e7164 mrCUDA works with LAMMPS 68 | 2a765b3 Fix sync_memory bugs 69 | c697d6a Unknown test code 70 | 693042c Hard-code mrcuda_switching when num cudaLaunch reach a certain number 71 | cac9952 Fix segmentation-fault bug 72 | 03e9535 Fix compile-error bugs 73 | b9170c3 Connect modules together 74 | 94cfc0f Partially implement record and replay functions 75 | 2fa6ed4 Partially implement recording system 76 | c0fdd22 Implement and test comm.c 77 | 6335192 Use autotools as the make system 78 | 6bb38d7 Partially implement communication module. 79 | 7146ceb Implement function interfaces that are needed for LAMMPS 80 | c9e90a6 Implement mrcuda_init and mrcuda_fini 81 | 1123ca9 Partially define some headers 82 | 2c0e9f7 Change directory structure 83 | 92fdc95 Modify matmul_par.cu to make successful migratio more obvious 84 | 6ca6d5b Finish implementing rCUDA to native migration mini prototype 85 | 40db4b4 Successfully run hellowolrd concurrently on rCUDA and native 86 | d116474 Create an example of libcudart's hook 87 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Installation Instructions 2 | ************************* 3 | 4 | Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, 5 | Inc. 6 | 7 | Copying and distribution of this file, with or without modification, 8 | are permitted in any medium without royalty provided the copyright 9 | notice and this notice are preserved. This file is offered as-is, 10 | without warranty of any kind. 11 | 12 | Basic Installation 13 | ================== 14 | 15 | Briefly, the shell commands `./configure; make; make install' should 16 | configure, build, and install this package. The following 17 | more-detailed instructions are generic; see the `README' file for 18 | instructions specific to this package. Some packages provide this 19 | `INSTALL' file but do not implement all of the features documented 20 | below. The lack of an optional feature in a given package is not 21 | necessarily a bug. More recommendations for GNU packages can be found 22 | in *note Makefile Conventions: (standards)Makefile Conventions. 23 | 24 | The `configure' shell script attempts to guess correct values for 25 | various system-dependent variables used during compilation. It uses 26 | those values to create a `Makefile' in each directory of the package. 27 | It may also create one or more `.h' files containing system-dependent 28 | definitions. Finally, it creates a shell script `config.status' that 29 | you can run in the future to recreate the current configuration, and a 30 | file `config.log' containing compiler output (useful mainly for 31 | debugging `configure'). 32 | 33 | It can also use an optional file (typically called `config.cache' 34 | and enabled with `--cache-file=config.cache' or simply `-C') that saves 35 | the results of its tests to speed up reconfiguring. Caching is 36 | disabled by default to prevent problems with accidental use of stale 37 | cache files. 38 | 39 | If you need to do unusual things to compile the package, please try 40 | to figure out how `configure' could check whether to do them, and mail 41 | diffs or instructions to the address given in the `README' so they can 42 | be considered for the next release. If you are using the cache, and at 43 | some point `config.cache' contains results you don't want to keep, you 44 | may remove or edit it. 45 | 46 | The file `configure.ac' (or `configure.in') is used to create 47 | `configure' by a program called `autoconf'. You need `configure.ac' if 48 | you want to change it or regenerate `configure' using a newer version 49 | of `autoconf'. 50 | 51 | The simplest way to compile this package is: 52 | 53 | 1. `cd' to the directory containing the package's source code and type 54 | `./configure' to configure the package for your system. 55 | 56 | Running `configure' might take a while. While running, it prints 57 | some messages telling which features it is checking for. 58 | 59 | 2. Type `make' to compile the package. 60 | 61 | 3. Optionally, type `make check' to run any self-tests that come with 62 | the package, generally using the just-built uninstalled binaries. 63 | 64 | 4. Type `make install' to install the programs and any data files and 65 | documentation. When installing into a prefix owned by root, it is 66 | recommended that the package be configured and built as a regular 67 | user, and only the `make install' phase executed with root 68 | privileges. 69 | 70 | 5. Optionally, type `make installcheck' to repeat any self-tests, but 71 | this time using the binaries in their final installed location. 72 | This target does not install anything. Running this target as a 73 | regular user, particularly if the prior `make install' required 74 | root privileges, verifies that the installation completed 75 | correctly. 76 | 77 | 6. You can remove the program binaries and object files from the 78 | source code directory by typing `make clean'. To also remove the 79 | files that `configure' created (so you can compile the package for 80 | a different kind of computer), type `make distclean'. There is 81 | also a `make maintainer-clean' target, but that is intended mainly 82 | for the package's developers. If you use it, you may have to get 83 | all sorts of other programs in order to regenerate files that came 84 | with the distribution. 85 | 86 | 7. Often, you can also type `make uninstall' to remove the installed 87 | files again. In practice, not all packages have tested that 88 | uninstallation works correctly, even though it is required by the 89 | GNU Coding Standards. 90 | 91 | 8. Some packages, particularly those that use Automake, provide `make 92 | distcheck', which can by used by developers to test that all other 93 | targets like `make install' and `make uninstall' work correctly. 94 | This target is generally not run by end users. 95 | 96 | Compilers and Options 97 | ===================== 98 | 99 | Some systems require unusual options for compilation or linking that 100 | the `configure' script does not know about. Run `./configure --help' 101 | for details on some of the pertinent environment variables. 102 | 103 | You can give `configure' initial values for configuration parameters 104 | by setting variables in the command line or in the environment. Here 105 | is an example: 106 | 107 | ./configure CC=c99 CFLAGS=-g LIBS=-lposix 108 | 109 | *Note Defining Variables::, for more details. 110 | 111 | Compiling For Multiple Architectures 112 | ==================================== 113 | 114 | You can compile the package for more than one kind of computer at the 115 | same time, by placing the object files for each architecture in their 116 | own directory. To do this, you can use GNU `make'. `cd' to the 117 | directory where you want the object files and executables to go and run 118 | the `configure' script. `configure' automatically checks for the 119 | source code in the directory that `configure' is in and in `..'. This 120 | is known as a "VPATH" build. 121 | 122 | With a non-GNU `make', it is safer to compile the package for one 123 | architecture at a time in the source code directory. After you have 124 | installed the package for one architecture, use `make distclean' before 125 | reconfiguring for another architecture. 126 | 127 | On MacOS X 10.5 and later systems, you can create libraries and 128 | executables that work on multiple system types--known as "fat" or 129 | "universal" binaries--by specifying multiple `-arch' options to the 130 | compiler but only a single `-arch' option to the preprocessor. Like 131 | this: 132 | 133 | ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 134 | CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ 135 | CPP="gcc -E" CXXCPP="g++ -E" 136 | 137 | This is not guaranteed to produce working output in all cases, you 138 | may have to build one architecture at a time and combine the results 139 | using the `lipo' tool if you have problems. 140 | 141 | Installation Names 142 | ================== 143 | 144 | By default, `make install' installs the package's commands under 145 | `/usr/local/bin', include files under `/usr/local/include', etc. You 146 | can specify an installation prefix other than `/usr/local' by giving 147 | `configure' the option `--prefix=PREFIX', where PREFIX must be an 148 | absolute file name. 149 | 150 | You can specify separate installation prefixes for 151 | architecture-specific files and architecture-independent files. If you 152 | pass the option `--exec-prefix=PREFIX' to `configure', the package uses 153 | PREFIX as the prefix for installing programs and libraries. 154 | Documentation and other data files still use the regular prefix. 155 | 156 | In addition, if you use an unusual directory layout you can give 157 | options like `--bindir=DIR' to specify different values for particular 158 | kinds of files. Run `configure --help' for a list of the directories 159 | you can set and what kinds of files go in them. In general, the 160 | default for these options is expressed in terms of `${prefix}', so that 161 | specifying just `--prefix' will affect all of the other directory 162 | specifications that were not explicitly provided. 163 | 164 | The most portable way to affect installation locations is to pass the 165 | correct locations to `configure'; however, many packages provide one or 166 | both of the following shortcuts of passing variable assignments to the 167 | `make install' command line to change installation locations without 168 | having to reconfigure or recompile. 169 | 170 | The first method involves providing an override variable for each 171 | affected directory. For example, `make install 172 | prefix=/alternate/directory' will choose an alternate location for all 173 | directory configuration variables that were expressed in terms of 174 | `${prefix}'. Any directories that were specified during `configure', 175 | but not in terms of `${prefix}', must each be overridden at install 176 | time for the entire installation to be relocated. The approach of 177 | makefile variable overrides for each directory variable is required by 178 | the GNU Coding Standards, and ideally causes no recompilation. 179 | However, some platforms have known limitations with the semantics of 180 | shared libraries that end up requiring recompilation when using this 181 | method, particularly noticeable in packages that use GNU Libtool. 182 | 183 | The second method involves providing the `DESTDIR' variable. For 184 | example, `make install DESTDIR=/alternate/directory' will prepend 185 | `/alternate/directory' before all installation names. The approach of 186 | `DESTDIR' overrides is not required by the GNU Coding Standards, and 187 | does not work on platforms that have drive letters. On the other hand, 188 | it does better at avoiding recompilation issues, and works well even 189 | when some directory options were not specified in terms of `${prefix}' 190 | at `configure' time. 191 | 192 | Optional Features 193 | ================= 194 | 195 | If the package supports it, you can cause programs to be installed 196 | with an extra prefix or suffix on their names by giving `configure' the 197 | option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. 198 | 199 | Some packages pay attention to `--enable-FEATURE' options to 200 | `configure', where FEATURE indicates an optional part of the package. 201 | They may also pay attention to `--with-PACKAGE' options, where PACKAGE 202 | is something like `gnu-as' or `x' (for the X Window System). The 203 | `README' should mention any `--enable-' and `--with-' options that the 204 | package recognizes. 205 | 206 | For packages that use the X Window System, `configure' can usually 207 | find the X include and library files automatically, but if it doesn't, 208 | you can use the `configure' options `--x-includes=DIR' and 209 | `--x-libraries=DIR' to specify their locations. 210 | 211 | Some packages offer the ability to configure how verbose the 212 | execution of `make' will be. For these packages, running `./configure 213 | --enable-silent-rules' sets the default to minimal output, which can be 214 | overridden with `make V=1'; while running `./configure 215 | --disable-silent-rules' sets the default to verbose, which can be 216 | overridden with `make V=0'. 217 | 218 | Particular systems 219 | ================== 220 | 221 | On HP-UX, the default C compiler is not ANSI C compatible. If GNU 222 | CC is not installed, it is recommended to use the following options in 223 | order to use an ANSI C compiler: 224 | 225 | ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" 226 | 227 | and if that doesn't work, install pre-built binaries of GCC for HP-UX. 228 | 229 | HP-UX `make' updates targets which have the same time stamps as 230 | their prerequisites, which makes it generally unusable when shipped 231 | generated files such as `configure' are involved. Use GNU `make' 232 | instead. 233 | 234 | On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot 235 | parse its `' header file. The option `-nodtk' can be used as 236 | a workaround. If GNU CC is not installed, it is therefore recommended 237 | to try 238 | 239 | ./configure CC="cc" 240 | 241 | and if that doesn't work, try 242 | 243 | ./configure CC="cc -nodtk" 244 | 245 | On Solaris, don't put `/usr/ucb' early in your `PATH'. This 246 | directory contains several dysfunctional programs; working variants of 247 | these programs are available in `/usr/bin'. So, if you need `/usr/ucb' 248 | in your `PATH', put it _after_ `/usr/bin'. 249 | 250 | On Haiku, software installed for all users goes in `/boot/common', 251 | not `/usr/local'. It is recommended to use the following options: 252 | 253 | ./configure --prefix=/boot/common 254 | 255 | Specifying the System Type 256 | ========================== 257 | 258 | There may be some features `configure' cannot figure out 259 | automatically, but needs to determine by the type of machine the package 260 | will run on. Usually, assuming the package is built to be run on the 261 | _same_ architectures, `configure' can figure that out, but if it prints 262 | a message saying it cannot guess the machine type, give it the 263 | `--build=TYPE' option. TYPE can either be a short name for the system 264 | type, such as `sun4', or a canonical name which has the form: 265 | 266 | CPU-COMPANY-SYSTEM 267 | 268 | where SYSTEM can have one of these forms: 269 | 270 | OS 271 | KERNEL-OS 272 | 273 | See the file `config.sub' for the possible values of each field. If 274 | `config.sub' isn't included in this package, then this package doesn't 275 | need to know the machine type. 276 | 277 | If you are _building_ compiler tools for cross-compiling, you should 278 | use the option `--target=TYPE' to select the type of system they will 279 | produce code for. 280 | 281 | If you want to _use_ a cross compiler, that generates code for a 282 | platform different from the build platform, you should specify the 283 | "host" platform (i.e., that on which the generated programs will 284 | eventually be run) with `--host=TYPE'. 285 | 286 | Sharing Defaults 287 | ================ 288 | 289 | If you want to set default values for `configure' scripts to share, 290 | you can create a site shell script called `config.site' that gives 291 | default values for variables like `CC', `cache_file', and `prefix'. 292 | `configure' looks for `PREFIX/share/config.site' if it exists, then 293 | `PREFIX/etc/config.site' if it exists. Or, you can set the 294 | `CONFIG_SITE' environment variable to the location of the site script. 295 | A warning: not all `configure' scripts look for a site script. 296 | 297 | Defining Variables 298 | ================== 299 | 300 | Variables not defined in a site shell script can be set in the 301 | environment passed to `configure'. However, some packages may run 302 | configure again during the build, and the customized values of these 303 | variables may be lost. In order to avoid this problem, you should set 304 | them in the `configure' command line, using `VAR=value'. For example: 305 | 306 | ./configure CC=/usr/local2/bin/gcc 307 | 308 | causes the specified `gcc' to be used as the C compiler (unless it is 309 | overridden in the site shell script). 310 | 311 | Unfortunately, this technique does not work for `CONFIG_SHELL' due to 312 | an Autoconf limitation. Until the limitation is lifted, you can use 313 | this workaround: 314 | 315 | CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash 316 | 317 | `configure' Invocation 318 | ====================== 319 | 320 | `configure' recognizes the following options to control how it 321 | operates. 322 | 323 | `--help' 324 | `-h' 325 | Print a summary of all of the options to `configure', and exit. 326 | 327 | `--help=short' 328 | `--help=recursive' 329 | Print a summary of the options unique to this package's 330 | `configure', and exit. The `short' variant lists options used 331 | only in the top level, while the `recursive' variant lists options 332 | also present in any nested packages. 333 | 334 | `--version' 335 | `-V' 336 | Print the version of Autoconf used to generate the `configure' 337 | script, and exit. 338 | 339 | `--cache-file=FILE' 340 | Enable the cache: use and save the results of the tests in FILE, 341 | traditionally `config.cache'. FILE defaults to `/dev/null' to 342 | disable caching. 343 | 344 | `--config-cache' 345 | `-C' 346 | Alias for `--cache-file=config.cache'. 347 | 348 | `--quiet' 349 | `--silent' 350 | `-q' 351 | Do not print messages saying which checks are being made. To 352 | suppress all normal output, redirect it to `/dev/null' (any error 353 | messages will still be shown). 354 | 355 | `--srcdir=DIR' 356 | Look for the package's source code in directory DIR. Usually 357 | `configure' can determine that directory automatically. 358 | 359 | `--prefix=DIR' 360 | Use DIR as the installation prefix. *note Installation Names:: 361 | for more details, including other options available for fine-tuning 362 | the installation locations. 363 | 364 | `--no-create' 365 | `-n' 366 | Run the configure checks, but stop before creating any output 367 | files. 368 | 369 | `configure' also accepts some other, not widely useful, options. Run 370 | `configure --help' for more details. 371 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I build-aux 2 | SUBDIRS = src . tests scripts 3 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 2016-09-28 Pak Markthub 2 | * This is the first alpha release version. 3 | * mrCUDA supports multi-GPU remote-to-local GPU migration. 4 | * Only a subset of CUDA Runtime APIs v7.0 are supported, but at least they are enough for LAMMPS to run without problem. 5 | 6 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | mrCUDA: Migratable rCUDA 2 | 3 | What is it? 4 | =========== 5 | 6 | mrCUDA is an extension of rCUDA (http://rcuda.net), which aims at enabling 7 | remote-to-local GPU migration. We develop this project in order to solve the 8 | performance problems caused by remote GPU communication: overhead from rCUDA, 9 | and network congestion. By using mrCUDA, a user can migrate execution on a 10 | remote GPU to a local GPU when one becomes available. mrCUDA works seamlessly 11 | with rCUDA and programs that use CUDA Runtime API. There is no need to recompile 12 | the program in order to use mrCUDA. More information regarding mrCUDA can be 13 | found in: 14 | 15 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Serving More GPU Jobs, with 16 | Low Penalty, using Remote GPU Execution and Migration." IEEE Cluster 2016. 17 | 18 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Finishing GPU Jobs 19 | running on a Multi-GPU Batch-Queue Node-Sharing System Earlier with Remote 20 | GPU Execution and Migration." ISC2016. 21 | 22 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Reducing Remote GPU 23 | Execution's Overhead with mrCUDA." GTC2016. 24 | 25 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Serving More GPU Jobs 26 | in Multi-GPU Batch-Queue Systems using Remote GPU Execution and Migration 27 | (Unrefereed Workshop manuscript)." IPSJ SIG Notes 2016-HPC-153. 28 | 29 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: Low-Overhead 30 | Middleware for Transparently Migrating CUDA Execution from Remote to Local 31 | GPUs." SC15. 32 | 33 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: Low-Overhead 34 | Middleware for Transparently Migrating CUDA Execution from Remote to Local 35 | GPUs." GTC Japan 2015. 36 | 37 | * Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: A middleware 38 | for migrating rCUDA virtual GPUs to native GPUs (Unrefereed Workshop 39 | manuscript)." IPSJ SIG Notes 2015-HPC-150 (SWoPP2015). 40 | 41 | Installation 42 | ============ 43 | 44 | Prerequisites 45 | ------------- 46 | 47 | - check 48 | - CUDA7.0 49 | - glibc-2.0 50 | - Python2.7 51 | - rCUDAv15.07 52 | 53 | How to install 54 | -------------- 55 | 56 | mkdir build 57 | cd build 58 | ../configure --prefix=~/mrCUDA-bin --with-rcuda= 59 | make 60 | make install 61 | 62 | Note: We recommend you to specify --prefix because mrCUDA creates its own 63 | libcudart.so that might conflict with the installed libcudart.so from NVIDIA on 64 | your system. 65 | 66 | How to use? 67 | =========== 68 | 69 | 1. Make sure your program works with rCUDAv15.07. 70 | 2. Start rCUDAd on a node. 71 | 3. Go to mrCUDA's installed directory. 72 | 4. cd bin 73 | 5. ./mrcudaexec -s -t \ 74 | --switch-threshold= -- 75 | 76 | Notes: 77 | 1. By specifying --switch-threshold, mrCUDA will automatically migrate execution 78 | when it encounters 'cudaLaunch' more than the specified number. This is helpful 79 | for testing mrCUDA's migration functionality. 80 | 81 | 2. In future release, mrCUDA will create a UNIX socket that you can send a 82 | migration command in to start GPU migration. 83 | 84 | Acknowledgement 85 | =============== 86 | 87 | This research was supported by JST, CREST (Research Area: Advanced Core 88 | Technologies for Big Data Integration). 89 | 90 | -------------------------------------------------------------------------------- /build-aux/ar-lib: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Wrapper for Microsoft lib.exe 3 | 4 | me=ar-lib 5 | scriptversion=2012-03-01.08; # UTC 6 | 7 | # Copyright (C) 2010-2013 Free Software Foundation, Inc. 8 | # Written by Peter Rosin . 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation; either version 2, or (at your option) 13 | # any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | 23 | # As a special exception to the GNU General Public License, if you 24 | # distribute this file as part of a program that contains a 25 | # configuration script generated by Autoconf, you may include it under 26 | # the same distribution terms that you use for the rest of that program. 27 | 28 | # This file is maintained in Automake, please report 29 | # bugs to or send patches to 30 | # . 31 | 32 | 33 | # func_error message 34 | func_error () 35 | { 36 | echo "$me: $1" 1>&2 37 | exit 1 38 | } 39 | 40 | file_conv= 41 | 42 | # func_file_conv build_file 43 | # Convert a $build file to $host form and store it in $file 44 | # Currently only supports Windows hosts. 45 | func_file_conv () 46 | { 47 | file=$1 48 | case $file in 49 | / | /[!/]*) # absolute file, and not a UNC file 50 | if test -z "$file_conv"; then 51 | # lazily determine how to convert abs files 52 | case `uname -s` in 53 | MINGW*) 54 | file_conv=mingw 55 | ;; 56 | CYGWIN*) 57 | file_conv=cygwin 58 | ;; 59 | *) 60 | file_conv=wine 61 | ;; 62 | esac 63 | fi 64 | case $file_conv in 65 | mingw) 66 | file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` 67 | ;; 68 | cygwin) 69 | file=`cygpath -m "$file" || echo "$file"` 70 | ;; 71 | wine) 72 | file=`winepath -w "$file" || echo "$file"` 73 | ;; 74 | esac 75 | ;; 76 | esac 77 | } 78 | 79 | # func_at_file at_file operation archive 80 | # Iterate over all members in AT_FILE performing OPERATION on ARCHIVE 81 | # for each of them. 82 | # When interpreting the content of the @FILE, do NOT use func_file_conv, 83 | # since the user would need to supply preconverted file names to 84 | # binutils ar, at least for MinGW. 85 | func_at_file () 86 | { 87 | operation=$2 88 | archive=$3 89 | at_file_contents=`cat "$1"` 90 | eval set x "$at_file_contents" 91 | shift 92 | 93 | for member 94 | do 95 | $AR -NOLOGO $operation:"$member" "$archive" || exit $? 96 | done 97 | } 98 | 99 | case $1 in 100 | '') 101 | func_error "no command. Try '$0 --help' for more information." 102 | ;; 103 | -h | --h*) 104 | cat <. 8 | # 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; either version 2, or (at your option) 12 | # any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | 22 | # As a special exception to the GNU General Public License, if you 23 | # distribute this file as part of a program that contains a 24 | # configuration script generated by Autoconf, you may include it under 25 | # the same distribution terms that you use for the rest of that program. 26 | 27 | # This file is maintained in Automake, please report 28 | # bugs to or send patches to 29 | # . 30 | 31 | nl=' 32 | ' 33 | 34 | # We need space, tab and new line, in precisely that order. Quoting is 35 | # there to prevent tools from complaining about whitespace usage. 36 | IFS=" "" $nl" 37 | 38 | file_conv= 39 | 40 | # func_file_conv build_file lazy 41 | # Convert a $build file to $host form and store it in $file 42 | # Currently only supports Windows hosts. If the determined conversion 43 | # type is listed in (the comma separated) LAZY, no conversion will 44 | # take place. 45 | func_file_conv () 46 | { 47 | file=$1 48 | case $file in 49 | / | /[!/]*) # absolute file, and not a UNC file 50 | if test -z "$file_conv"; then 51 | # lazily determine how to convert abs files 52 | case `uname -s` in 53 | MINGW*) 54 | file_conv=mingw 55 | ;; 56 | CYGWIN*) 57 | file_conv=cygwin 58 | ;; 59 | *) 60 | file_conv=wine 61 | ;; 62 | esac 63 | fi 64 | case $file_conv/,$2, in 65 | *,$file_conv,*) 66 | ;; 67 | mingw/*) 68 | file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` 69 | ;; 70 | cygwin/*) 71 | file=`cygpath -m "$file" || echo "$file"` 72 | ;; 73 | wine/*) 74 | file=`winepath -w "$file" || echo "$file"` 75 | ;; 76 | esac 77 | ;; 78 | esac 79 | } 80 | 81 | # func_cl_dashL linkdir 82 | # Make cl look for libraries in LINKDIR 83 | func_cl_dashL () 84 | { 85 | func_file_conv "$1" 86 | if test -z "$lib_path"; then 87 | lib_path=$file 88 | else 89 | lib_path="$lib_path;$file" 90 | fi 91 | linker_opts="$linker_opts -LIBPATH:$file" 92 | } 93 | 94 | # func_cl_dashl library 95 | # Do a library search-path lookup for cl 96 | func_cl_dashl () 97 | { 98 | lib=$1 99 | found=no 100 | save_IFS=$IFS 101 | IFS=';' 102 | for dir in $lib_path $LIB 103 | do 104 | IFS=$save_IFS 105 | if $shared && test -f "$dir/$lib.dll.lib"; then 106 | found=yes 107 | lib=$dir/$lib.dll.lib 108 | break 109 | fi 110 | if test -f "$dir/$lib.lib"; then 111 | found=yes 112 | lib=$dir/$lib.lib 113 | break 114 | fi 115 | if test -f "$dir/lib$lib.a"; then 116 | found=yes 117 | lib=$dir/lib$lib.a 118 | break 119 | fi 120 | done 121 | IFS=$save_IFS 122 | 123 | if test "$found" != yes; then 124 | lib=$lib.lib 125 | fi 126 | } 127 | 128 | # func_cl_wrapper cl arg... 129 | # Adjust compile command to suit cl 130 | func_cl_wrapper () 131 | { 132 | # Assume a capable shell 133 | lib_path= 134 | shared=: 135 | linker_opts= 136 | for arg 137 | do 138 | if test -n "$eat"; then 139 | eat= 140 | else 141 | case $1 in 142 | -o) 143 | # configure might choose to run compile as 'compile cc -o foo foo.c'. 144 | eat=1 145 | case $2 in 146 | *.o | *.[oO][bB][jJ]) 147 | func_file_conv "$2" 148 | set x "$@" -Fo"$file" 149 | shift 150 | ;; 151 | *) 152 | func_file_conv "$2" 153 | set x "$@" -Fe"$file" 154 | shift 155 | ;; 156 | esac 157 | ;; 158 | -I) 159 | eat=1 160 | func_file_conv "$2" mingw 161 | set x "$@" -I"$file" 162 | shift 163 | ;; 164 | -I*) 165 | func_file_conv "${1#-I}" mingw 166 | set x "$@" -I"$file" 167 | shift 168 | ;; 169 | -l) 170 | eat=1 171 | func_cl_dashl "$2" 172 | set x "$@" "$lib" 173 | shift 174 | ;; 175 | -l*) 176 | func_cl_dashl "${1#-l}" 177 | set x "$@" "$lib" 178 | shift 179 | ;; 180 | -L) 181 | eat=1 182 | func_cl_dashL "$2" 183 | ;; 184 | -L*) 185 | func_cl_dashL "${1#-L}" 186 | ;; 187 | -static) 188 | shared=false 189 | ;; 190 | -Wl,*) 191 | arg=${1#-Wl,} 192 | save_ifs="$IFS"; IFS=',' 193 | for flag in $arg; do 194 | IFS="$save_ifs" 195 | linker_opts="$linker_opts $flag" 196 | done 197 | IFS="$save_ifs" 198 | ;; 199 | -Xlinker) 200 | eat=1 201 | linker_opts="$linker_opts $2" 202 | ;; 203 | -*) 204 | set x "$@" "$1" 205 | shift 206 | ;; 207 | *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) 208 | func_file_conv "$1" 209 | set x "$@" -Tp"$file" 210 | shift 211 | ;; 212 | *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) 213 | func_file_conv "$1" mingw 214 | set x "$@" "$file" 215 | shift 216 | ;; 217 | *) 218 | set x "$@" "$1" 219 | shift 220 | ;; 221 | esac 222 | fi 223 | shift 224 | done 225 | if test -n "$linker_opts"; then 226 | linker_opts="-link$linker_opts" 227 | fi 228 | exec "$@" $linker_opts 229 | exit 1 230 | } 231 | 232 | eat= 233 | 234 | case $1 in 235 | '') 236 | echo "$0: No command. Try '$0 --help' for more information." 1>&2 237 | exit 1; 238 | ;; 239 | -h | --h*) 240 | cat <<\EOF 241 | Usage: compile [--help] [--version] PROGRAM [ARGS] 242 | 243 | Wrapper for compilers which do not understand '-c -o'. 244 | Remove '-o dest.o' from ARGS, run PROGRAM with the remaining 245 | arguments, and rename the output as expected. 246 | 247 | If you are trying to build a whole package this is not the 248 | right script to run: please start by reading the file 'INSTALL'. 249 | 250 | Report bugs to . 251 | EOF 252 | exit $? 253 | ;; 254 | -v | --v*) 255 | echo "compile $scriptversion" 256 | exit $? 257 | ;; 258 | cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) 259 | func_cl_wrapper "$@" # Doesn't return... 260 | ;; 261 | esac 262 | 263 | ofile= 264 | cfile= 265 | 266 | for arg 267 | do 268 | if test -n "$eat"; then 269 | eat= 270 | else 271 | case $1 in 272 | -o) 273 | # configure might choose to run compile as 'compile cc -o foo foo.c'. 274 | # So we strip '-o arg' only if arg is an object. 275 | eat=1 276 | case $2 in 277 | *.o | *.obj) 278 | ofile=$2 279 | ;; 280 | *) 281 | set x "$@" -o "$2" 282 | shift 283 | ;; 284 | esac 285 | ;; 286 | *.c) 287 | cfile=$1 288 | set x "$@" "$1" 289 | shift 290 | ;; 291 | *) 292 | set x "$@" "$1" 293 | shift 294 | ;; 295 | esac 296 | fi 297 | shift 298 | done 299 | 300 | if test -z "$ofile" || test -z "$cfile"; then 301 | # If no '-o' option was seen then we might have been invoked from a 302 | # pattern rule where we don't need one. That is ok -- this is a 303 | # normal compilation that the losing compiler can handle. If no 304 | # '.c' file was seen then we are probably linking. That is also 305 | # ok. 306 | exec "$@" 307 | fi 308 | 309 | # Name of file we expect compiler to create. 310 | cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` 311 | 312 | # Create the lock directory. 313 | # Note: use '[/\\:.-]' here to ensure that we don't use the same name 314 | # that we are using for the .o file. Also, base the name on the expected 315 | # object file name, since that is what matters with a parallel build. 316 | lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d 317 | while true; do 318 | if mkdir "$lockdir" >/dev/null 2>&1; then 319 | break 320 | fi 321 | sleep 1 322 | done 323 | # FIXME: race condition here if user kills between mkdir and trap. 324 | trap "rmdir '$lockdir'; exit 1" 1 2 15 325 | 326 | # Run the compile. 327 | "$@" 328 | ret=$? 329 | 330 | if test -f "$cofile"; then 331 | test "$cofile" = "$ofile" || mv "$cofile" "$ofile" 332 | elif test -f "${cofile}bj"; then 333 | test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" 334 | fi 335 | 336 | rmdir "$lockdir" 337 | exit $ret 338 | 339 | # Local Variables: 340 | # mode: shell-script 341 | # sh-indentation: 2 342 | # eval: (add-hook 'write-file-hooks 'time-stamp) 343 | # time-stamp-start: "scriptversion=" 344 | # time-stamp-format: "%:y-%02m-%02d.%02H" 345 | # time-stamp-time-zone: "UTC" 346 | # time-stamp-end: "; # UTC" 347 | # End: 348 | -------------------------------------------------------------------------------- /build-aux/install-sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # install - install a program, script, or datafile 3 | 4 | scriptversion=2011-11-20.07; # UTC 5 | 6 | # This originates from X11R5 (mit/util/scripts/install.sh), which was 7 | # later released in X11R6 (xc/config/util/install.sh) with the 8 | # following copyright and license. 9 | # 10 | # Copyright (C) 1994 X Consortium 11 | # 12 | # Permission is hereby granted, free of charge, to any person obtaining a copy 13 | # of this software and associated documentation files (the "Software"), to 14 | # deal in the Software without restriction, including without limitation the 15 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 | # sell copies of the Software, and to permit persons to whom the Software is 17 | # furnished to do so, subject to the following conditions: 18 | # 19 | # The above copyright notice and this permission notice shall be included in 20 | # all copies or substantial portions of the Software. 21 | # 22 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 26 | # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- 27 | # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | # 29 | # Except as contained in this notice, the name of the X Consortium shall not 30 | # be used in advertising or otherwise to promote the sale, use or other deal- 31 | # ings in this Software without prior written authorization from the X Consor- 32 | # tium. 33 | # 34 | # 35 | # FSF changes to this file are in the public domain. 36 | # 37 | # Calling this script install-sh is preferred over install.sh, to prevent 38 | # 'make' implicit rules from creating a file called install from it 39 | # when there is no Makefile. 40 | # 41 | # This script is compatible with the BSD install script, but was written 42 | # from scratch. 43 | 44 | nl=' 45 | ' 46 | IFS=" "" $nl" 47 | 48 | # set DOITPROG to echo to test this script 49 | 50 | # Don't use :- since 4.3BSD and earlier shells don't like it. 51 | doit=${DOITPROG-} 52 | if test -z "$doit"; then 53 | doit_exec=exec 54 | else 55 | doit_exec=$doit 56 | fi 57 | 58 | # Put in absolute file names if you don't have them in your path; 59 | # or use environment vars. 60 | 61 | chgrpprog=${CHGRPPROG-chgrp} 62 | chmodprog=${CHMODPROG-chmod} 63 | chownprog=${CHOWNPROG-chown} 64 | cmpprog=${CMPPROG-cmp} 65 | cpprog=${CPPROG-cp} 66 | mkdirprog=${MKDIRPROG-mkdir} 67 | mvprog=${MVPROG-mv} 68 | rmprog=${RMPROG-rm} 69 | stripprog=${STRIPPROG-strip} 70 | 71 | posix_glob='?' 72 | initialize_posix_glob=' 73 | test "$posix_glob" != "?" || { 74 | if (set -f) 2>/dev/null; then 75 | posix_glob= 76 | else 77 | posix_glob=: 78 | fi 79 | } 80 | ' 81 | 82 | posix_mkdir= 83 | 84 | # Desired mode of installed file. 85 | mode=0755 86 | 87 | chgrpcmd= 88 | chmodcmd=$chmodprog 89 | chowncmd= 90 | mvcmd=$mvprog 91 | rmcmd="$rmprog -f" 92 | stripcmd= 93 | 94 | src= 95 | dst= 96 | dir_arg= 97 | dst_arg= 98 | 99 | copy_on_change=false 100 | no_target_directory= 101 | 102 | usage="\ 103 | Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE 104 | or: $0 [OPTION]... SRCFILES... DIRECTORY 105 | or: $0 [OPTION]... -t DIRECTORY SRCFILES... 106 | or: $0 [OPTION]... -d DIRECTORIES... 107 | 108 | In the 1st form, copy SRCFILE to DSTFILE. 109 | In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. 110 | In the 4th, create DIRECTORIES. 111 | 112 | Options: 113 | --help display this help and exit. 114 | --version display version info and exit. 115 | 116 | -c (ignored) 117 | -C install only if different (preserve the last data modification time) 118 | -d create directories instead of installing files. 119 | -g GROUP $chgrpprog installed files to GROUP. 120 | -m MODE $chmodprog installed files to MODE. 121 | -o USER $chownprog installed files to USER. 122 | -s $stripprog installed files. 123 | -t DIRECTORY install into DIRECTORY. 124 | -T report an error if DSTFILE is a directory. 125 | 126 | Environment variables override the default commands: 127 | CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG 128 | RMPROG STRIPPROG 129 | " 130 | 131 | while test $# -ne 0; do 132 | case $1 in 133 | -c) ;; 134 | 135 | -C) copy_on_change=true;; 136 | 137 | -d) dir_arg=true;; 138 | 139 | -g) chgrpcmd="$chgrpprog $2" 140 | shift;; 141 | 142 | --help) echo "$usage"; exit $?;; 143 | 144 | -m) mode=$2 145 | case $mode in 146 | *' '* | *' '* | *' 147 | '* | *'*'* | *'?'* | *'['*) 148 | echo "$0: invalid mode: $mode" >&2 149 | exit 1;; 150 | esac 151 | shift;; 152 | 153 | -o) chowncmd="$chownprog $2" 154 | shift;; 155 | 156 | -s) stripcmd=$stripprog;; 157 | 158 | -t) dst_arg=$2 159 | # Protect names problematic for 'test' and other utilities. 160 | case $dst_arg in 161 | -* | [=\(\)!]) dst_arg=./$dst_arg;; 162 | esac 163 | shift;; 164 | 165 | -T) no_target_directory=true;; 166 | 167 | --version) echo "$0 $scriptversion"; exit $?;; 168 | 169 | --) shift 170 | break;; 171 | 172 | -*) echo "$0: invalid option: $1" >&2 173 | exit 1;; 174 | 175 | *) break;; 176 | esac 177 | shift 178 | done 179 | 180 | if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then 181 | # When -d is used, all remaining arguments are directories to create. 182 | # When -t is used, the destination is already specified. 183 | # Otherwise, the last argument is the destination. Remove it from $@. 184 | for arg 185 | do 186 | if test -n "$dst_arg"; then 187 | # $@ is not empty: it contains at least $arg. 188 | set fnord "$@" "$dst_arg" 189 | shift # fnord 190 | fi 191 | shift # arg 192 | dst_arg=$arg 193 | # Protect names problematic for 'test' and other utilities. 194 | case $dst_arg in 195 | -* | [=\(\)!]) dst_arg=./$dst_arg;; 196 | esac 197 | done 198 | fi 199 | 200 | if test $# -eq 0; then 201 | if test -z "$dir_arg"; then 202 | echo "$0: no input file specified." >&2 203 | exit 1 204 | fi 205 | # It's OK to call 'install-sh -d' without argument. 206 | # This can happen when creating conditional directories. 207 | exit 0 208 | fi 209 | 210 | if test -z "$dir_arg"; then 211 | do_exit='(exit $ret); exit $ret' 212 | trap "ret=129; $do_exit" 1 213 | trap "ret=130; $do_exit" 2 214 | trap "ret=141; $do_exit" 13 215 | trap "ret=143; $do_exit" 15 216 | 217 | # Set umask so as not to create temps with too-generous modes. 218 | # However, 'strip' requires both read and write access to temps. 219 | case $mode in 220 | # Optimize common cases. 221 | *644) cp_umask=133;; 222 | *755) cp_umask=22;; 223 | 224 | *[0-7]) 225 | if test -z "$stripcmd"; then 226 | u_plus_rw= 227 | else 228 | u_plus_rw='% 200' 229 | fi 230 | cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; 231 | *) 232 | if test -z "$stripcmd"; then 233 | u_plus_rw= 234 | else 235 | u_plus_rw=,u+rw 236 | fi 237 | cp_umask=$mode$u_plus_rw;; 238 | esac 239 | fi 240 | 241 | for src 242 | do 243 | # Protect names problematic for 'test' and other utilities. 244 | case $src in 245 | -* | [=\(\)!]) src=./$src;; 246 | esac 247 | 248 | if test -n "$dir_arg"; then 249 | dst=$src 250 | dstdir=$dst 251 | test -d "$dstdir" 252 | dstdir_status=$? 253 | else 254 | 255 | # Waiting for this to be detected by the "$cpprog $src $dsttmp" command 256 | # might cause directories to be created, which would be especially bad 257 | # if $src (and thus $dsttmp) contains '*'. 258 | if test ! -f "$src" && test ! -d "$src"; then 259 | echo "$0: $src does not exist." >&2 260 | exit 1 261 | fi 262 | 263 | if test -z "$dst_arg"; then 264 | echo "$0: no destination specified." >&2 265 | exit 1 266 | fi 267 | dst=$dst_arg 268 | 269 | # If destination is a directory, append the input filename; won't work 270 | # if double slashes aren't ignored. 271 | if test -d "$dst"; then 272 | if test -n "$no_target_directory"; then 273 | echo "$0: $dst_arg: Is a directory" >&2 274 | exit 1 275 | fi 276 | dstdir=$dst 277 | dst=$dstdir/`basename "$src"` 278 | dstdir_status=0 279 | else 280 | # Prefer dirname, but fall back on a substitute if dirname fails. 281 | dstdir=` 282 | (dirname "$dst") 2>/dev/null || 283 | expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ 284 | X"$dst" : 'X\(//\)[^/]' \| \ 285 | X"$dst" : 'X\(//\)$' \| \ 286 | X"$dst" : 'X\(/\)' \| . 2>/dev/null || 287 | echo X"$dst" | 288 | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ 289 | s//\1/ 290 | q 291 | } 292 | /^X\(\/\/\)[^/].*/{ 293 | s//\1/ 294 | q 295 | } 296 | /^X\(\/\/\)$/{ 297 | s//\1/ 298 | q 299 | } 300 | /^X\(\/\).*/{ 301 | s//\1/ 302 | q 303 | } 304 | s/.*/./; q' 305 | ` 306 | 307 | test -d "$dstdir" 308 | dstdir_status=$? 309 | fi 310 | fi 311 | 312 | obsolete_mkdir_used=false 313 | 314 | if test $dstdir_status != 0; then 315 | case $posix_mkdir in 316 | '') 317 | # Create intermediate dirs using mode 755 as modified by the umask. 318 | # This is like FreeBSD 'install' as of 1997-10-28. 319 | umask=`umask` 320 | case $stripcmd.$umask in 321 | # Optimize common cases. 322 | *[2367][2367]) mkdir_umask=$umask;; 323 | .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; 324 | 325 | *[0-7]) 326 | mkdir_umask=`expr $umask + 22 \ 327 | - $umask % 100 % 40 + $umask % 20 \ 328 | - $umask % 10 % 4 + $umask % 2 329 | `;; 330 | *) mkdir_umask=$umask,go-w;; 331 | esac 332 | 333 | # With -d, create the new directory with the user-specified mode. 334 | # Otherwise, rely on $mkdir_umask. 335 | if test -n "$dir_arg"; then 336 | mkdir_mode=-m$mode 337 | else 338 | mkdir_mode= 339 | fi 340 | 341 | posix_mkdir=false 342 | case $umask in 343 | *[123567][0-7][0-7]) 344 | # POSIX mkdir -p sets u+wx bits regardless of umask, which 345 | # is incompatible with FreeBSD 'install' when (umask & 300) != 0. 346 | ;; 347 | *) 348 | tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ 349 | trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 350 | 351 | if (umask $mkdir_umask && 352 | exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 353 | then 354 | if test -z "$dir_arg" || { 355 | # Check for POSIX incompatibilities with -m. 356 | # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or 357 | # other-writable bit of parent directory when it shouldn't. 358 | # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. 359 | ls_ld_tmpdir=`ls -ld "$tmpdir"` 360 | case $ls_ld_tmpdir in 361 | d????-?r-*) different_mode=700;; 362 | d????-?--*) different_mode=755;; 363 | *) false;; 364 | esac && 365 | $mkdirprog -m$different_mode -p -- "$tmpdir" && { 366 | ls_ld_tmpdir_1=`ls -ld "$tmpdir"` 367 | test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" 368 | } 369 | } 370 | then posix_mkdir=: 371 | fi 372 | rmdir "$tmpdir/d" "$tmpdir" 373 | else 374 | # Remove any dirs left behind by ancient mkdir implementations. 375 | rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null 376 | fi 377 | trap '' 0;; 378 | esac;; 379 | esac 380 | 381 | if 382 | $posix_mkdir && ( 383 | umask $mkdir_umask && 384 | $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" 385 | ) 386 | then : 387 | else 388 | 389 | # The umask is ridiculous, or mkdir does not conform to POSIX, 390 | # or it failed possibly due to a race condition. Create the 391 | # directory the slow way, step by step, checking for races as we go. 392 | 393 | case $dstdir in 394 | /*) prefix='/';; 395 | [-=\(\)!]*) prefix='./';; 396 | *) prefix='';; 397 | esac 398 | 399 | eval "$initialize_posix_glob" 400 | 401 | oIFS=$IFS 402 | IFS=/ 403 | $posix_glob set -f 404 | set fnord $dstdir 405 | shift 406 | $posix_glob set +f 407 | IFS=$oIFS 408 | 409 | prefixes= 410 | 411 | for d 412 | do 413 | test X"$d" = X && continue 414 | 415 | prefix=$prefix$d 416 | if test -d "$prefix"; then 417 | prefixes= 418 | else 419 | if $posix_mkdir; then 420 | (umask=$mkdir_umask && 421 | $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break 422 | # Don't fail if two instances are running concurrently. 423 | test -d "$prefix" || exit 1 424 | else 425 | case $prefix in 426 | *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; 427 | *) qprefix=$prefix;; 428 | esac 429 | prefixes="$prefixes '$qprefix'" 430 | fi 431 | fi 432 | prefix=$prefix/ 433 | done 434 | 435 | if test -n "$prefixes"; then 436 | # Don't fail if two instances are running concurrently. 437 | (umask $mkdir_umask && 438 | eval "\$doit_exec \$mkdirprog $prefixes") || 439 | test -d "$dstdir" || exit 1 440 | obsolete_mkdir_used=true 441 | fi 442 | fi 443 | fi 444 | 445 | if test -n "$dir_arg"; then 446 | { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && 447 | { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && 448 | { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || 449 | test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 450 | else 451 | 452 | # Make a couple of temp file names in the proper directory. 453 | dsttmp=$dstdir/_inst.$$_ 454 | rmtmp=$dstdir/_rm.$$_ 455 | 456 | # Trap to clean up those temp files at exit. 457 | trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 458 | 459 | # Copy the file name to the temp name. 460 | (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && 461 | 462 | # and set any options; do chmod last to preserve setuid bits. 463 | # 464 | # If any of these fail, we abort the whole thing. If we want to 465 | # ignore errors from any of these, just make sure not to ignore 466 | # errors from the above "$doit $cpprog $src $dsttmp" command. 467 | # 468 | { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && 469 | { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && 470 | { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && 471 | { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && 472 | 473 | # If -C, don't bother to copy if it wouldn't change the file. 474 | if $copy_on_change && 475 | old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && 476 | new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && 477 | 478 | eval "$initialize_posix_glob" && 479 | $posix_glob set -f && 480 | set X $old && old=:$2:$4:$5:$6 && 481 | set X $new && new=:$2:$4:$5:$6 && 482 | $posix_glob set +f && 483 | 484 | test "$old" = "$new" && 485 | $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 486 | then 487 | rm -f "$dsttmp" 488 | else 489 | # Rename the file to the real destination. 490 | $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || 491 | 492 | # The rename failed, perhaps because mv can't rename something else 493 | # to itself, or perhaps because mv is so ancient that it does not 494 | # support -f. 495 | { 496 | # Now remove or move aside any old file at destination location. 497 | # We try this two ways since rm can't unlink itself on some 498 | # systems and the destination file might be busy for other 499 | # reasons. In this case, the final cleanup might fail but the new 500 | # file should still install successfully. 501 | { 502 | test ! -f "$dst" || 503 | $doit $rmcmd -f "$dst" 2>/dev/null || 504 | { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && 505 | { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } 506 | } || 507 | { echo "$0: cannot unlink or rename $dst" >&2 508 | (exit 1); exit 1 509 | } 510 | } && 511 | 512 | # Now rename the file to the real destination. 513 | $doit $mvcmd "$dsttmp" "$dst" 514 | } 515 | fi || exit 1 516 | 517 | trap '' 0 518 | fi 519 | done 520 | 521 | # Local variables: 522 | # eval: (add-hook 'write-file-hooks 'time-stamp) 523 | # time-stamp-start: "scriptversion=" 524 | # time-stamp-format: "%:y-%02m-%02d.%02H" 525 | # time-stamp-time-zone: "UTC" 526 | # time-stamp-end: "; # UTC" 527 | # End: 528 | -------------------------------------------------------------------------------- /build-aux/ltoptions.m4: -------------------------------------------------------------------------------- 1 | # Helper functions for option handling. -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, 4 | # Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 7 ltoptions.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) 15 | 16 | 17 | # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) 18 | # ------------------------------------------ 19 | m4_define([_LT_MANGLE_OPTION], 20 | [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) 21 | 22 | 23 | # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) 24 | # --------------------------------------- 25 | # Set option OPTION-NAME for macro MACRO-NAME, and if there is a 26 | # matching handler defined, dispatch to it. Other OPTION-NAMEs are 27 | # saved as a flag. 28 | m4_define([_LT_SET_OPTION], 29 | [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl 30 | m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), 31 | _LT_MANGLE_DEFUN([$1], [$2]), 32 | [m4_warning([Unknown $1 option `$2'])])[]dnl 33 | ]) 34 | 35 | 36 | # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) 37 | # ------------------------------------------------------------ 38 | # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. 39 | m4_define([_LT_IF_OPTION], 40 | [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) 41 | 42 | 43 | # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) 44 | # ------------------------------------------------------- 45 | # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME 46 | # are set. 47 | m4_define([_LT_UNLESS_OPTIONS], 48 | [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 49 | [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), 50 | [m4_define([$0_found])])])[]dnl 51 | m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 52 | ])[]dnl 53 | ]) 54 | 55 | 56 | # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) 57 | # ---------------------------------------- 58 | # OPTION-LIST is a space-separated list of Libtool options associated 59 | # with MACRO-NAME. If any OPTION has a matching handler declared with 60 | # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about 61 | # the unknown option and exit. 62 | m4_defun([_LT_SET_OPTIONS], 63 | [# Set options 64 | m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 65 | [_LT_SET_OPTION([$1], _LT_Option)]) 66 | 67 | m4_if([$1],[LT_INIT],[ 68 | dnl 69 | dnl Simply set some default values (i.e off) if boolean options were not 70 | dnl specified: 71 | _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no 72 | ]) 73 | _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no 74 | ]) 75 | dnl 76 | dnl If no reference was made to various pairs of opposing options, then 77 | dnl we run the default mode handler for the pair. For example, if neither 78 | dnl `shared' nor `disable-shared' was passed, we enable building of shared 79 | dnl archives by default: 80 | _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) 81 | _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) 82 | _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) 83 | _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], 84 | [_LT_ENABLE_FAST_INSTALL]) 85 | ]) 86 | ])# _LT_SET_OPTIONS 87 | 88 | 89 | ## --------------------------------- ## 90 | ## Macros to handle LT_INIT options. ## 91 | ## --------------------------------- ## 92 | 93 | # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) 94 | # ----------------------------------------- 95 | m4_define([_LT_MANGLE_DEFUN], 96 | [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) 97 | 98 | 99 | # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) 100 | # ----------------------------------------------- 101 | m4_define([LT_OPTION_DEFINE], 102 | [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl 103 | ])# LT_OPTION_DEFINE 104 | 105 | 106 | # dlopen 107 | # ------ 108 | LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes 109 | ]) 110 | 111 | AU_DEFUN([AC_LIBTOOL_DLOPEN], 112 | [_LT_SET_OPTION([LT_INIT], [dlopen]) 113 | AC_DIAGNOSE([obsolete], 114 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 115 | put the `dlopen' option into LT_INIT's first parameter.]) 116 | ]) 117 | 118 | dnl aclocal-1.4 backwards compatibility: 119 | dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) 120 | 121 | 122 | # win32-dll 123 | # --------- 124 | # Declare package support for building win32 dll's. 125 | LT_OPTION_DEFINE([LT_INIT], [win32-dll], 126 | [enable_win32_dll=yes 127 | 128 | case $host in 129 | *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) 130 | AC_CHECK_TOOL(AS, as, false) 131 | AC_CHECK_TOOL(DLLTOOL, dlltool, false) 132 | AC_CHECK_TOOL(OBJDUMP, objdump, false) 133 | ;; 134 | esac 135 | 136 | test -z "$AS" && AS=as 137 | _LT_DECL([], [AS], [1], [Assembler program])dnl 138 | 139 | test -z "$DLLTOOL" && DLLTOOL=dlltool 140 | _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl 141 | 142 | test -z "$OBJDUMP" && OBJDUMP=objdump 143 | _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl 144 | ])# win32-dll 145 | 146 | AU_DEFUN([AC_LIBTOOL_WIN32_DLL], 147 | [AC_REQUIRE([AC_CANONICAL_HOST])dnl 148 | _LT_SET_OPTION([LT_INIT], [win32-dll]) 149 | AC_DIAGNOSE([obsolete], 150 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 151 | put the `win32-dll' option into LT_INIT's first parameter.]) 152 | ]) 153 | 154 | dnl aclocal-1.4 backwards compatibility: 155 | dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) 156 | 157 | 158 | # _LT_ENABLE_SHARED([DEFAULT]) 159 | # ---------------------------- 160 | # implement the --enable-shared flag, and supports the `shared' and 161 | # `disable-shared' LT_INIT options. 162 | # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. 163 | m4_define([_LT_ENABLE_SHARED], 164 | [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl 165 | AC_ARG_ENABLE([shared], 166 | [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], 167 | [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], 168 | [p=${PACKAGE-default} 169 | case $enableval in 170 | yes) enable_shared=yes ;; 171 | no) enable_shared=no ;; 172 | *) 173 | enable_shared=no 174 | # Look at the argument we got. We use all the common list separators. 175 | lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," 176 | for pkg in $enableval; do 177 | IFS="$lt_save_ifs" 178 | if test "X$pkg" = "X$p"; then 179 | enable_shared=yes 180 | fi 181 | done 182 | IFS="$lt_save_ifs" 183 | ;; 184 | esac], 185 | [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) 186 | 187 | _LT_DECL([build_libtool_libs], [enable_shared], [0], 188 | [Whether or not to build shared libraries]) 189 | ])# _LT_ENABLE_SHARED 190 | 191 | LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) 192 | LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) 193 | 194 | # Old names: 195 | AC_DEFUN([AC_ENABLE_SHARED], 196 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) 197 | ]) 198 | 199 | AC_DEFUN([AC_DISABLE_SHARED], 200 | [_LT_SET_OPTION([LT_INIT], [disable-shared]) 201 | ]) 202 | 203 | AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) 204 | AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) 205 | 206 | dnl aclocal-1.4 backwards compatibility: 207 | dnl AC_DEFUN([AM_ENABLE_SHARED], []) 208 | dnl AC_DEFUN([AM_DISABLE_SHARED], []) 209 | 210 | 211 | 212 | # _LT_ENABLE_STATIC([DEFAULT]) 213 | # ---------------------------- 214 | # implement the --enable-static flag, and support the `static' and 215 | # `disable-static' LT_INIT options. 216 | # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. 217 | m4_define([_LT_ENABLE_STATIC], 218 | [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl 219 | AC_ARG_ENABLE([static], 220 | [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], 221 | [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], 222 | [p=${PACKAGE-default} 223 | case $enableval in 224 | yes) enable_static=yes ;; 225 | no) enable_static=no ;; 226 | *) 227 | enable_static=no 228 | # Look at the argument we got. We use all the common list separators. 229 | lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," 230 | for pkg in $enableval; do 231 | IFS="$lt_save_ifs" 232 | if test "X$pkg" = "X$p"; then 233 | enable_static=yes 234 | fi 235 | done 236 | IFS="$lt_save_ifs" 237 | ;; 238 | esac], 239 | [enable_static=]_LT_ENABLE_STATIC_DEFAULT) 240 | 241 | _LT_DECL([build_old_libs], [enable_static], [0], 242 | [Whether or not to build static libraries]) 243 | ])# _LT_ENABLE_STATIC 244 | 245 | LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) 246 | LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) 247 | 248 | # Old names: 249 | AC_DEFUN([AC_ENABLE_STATIC], 250 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) 251 | ]) 252 | 253 | AC_DEFUN([AC_DISABLE_STATIC], 254 | [_LT_SET_OPTION([LT_INIT], [disable-static]) 255 | ]) 256 | 257 | AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) 258 | AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) 259 | 260 | dnl aclocal-1.4 backwards compatibility: 261 | dnl AC_DEFUN([AM_ENABLE_STATIC], []) 262 | dnl AC_DEFUN([AM_DISABLE_STATIC], []) 263 | 264 | 265 | 266 | # _LT_ENABLE_FAST_INSTALL([DEFAULT]) 267 | # ---------------------------------- 268 | # implement the --enable-fast-install flag, and support the `fast-install' 269 | # and `disable-fast-install' LT_INIT options. 270 | # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. 271 | m4_define([_LT_ENABLE_FAST_INSTALL], 272 | [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl 273 | AC_ARG_ENABLE([fast-install], 274 | [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], 275 | [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], 276 | [p=${PACKAGE-default} 277 | case $enableval in 278 | yes) enable_fast_install=yes ;; 279 | no) enable_fast_install=no ;; 280 | *) 281 | enable_fast_install=no 282 | # Look at the argument we got. We use all the common list separators. 283 | lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," 284 | for pkg in $enableval; do 285 | IFS="$lt_save_ifs" 286 | if test "X$pkg" = "X$p"; then 287 | enable_fast_install=yes 288 | fi 289 | done 290 | IFS="$lt_save_ifs" 291 | ;; 292 | esac], 293 | [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) 294 | 295 | _LT_DECL([fast_install], [enable_fast_install], [0], 296 | [Whether or not to optimize for fast installation])dnl 297 | ])# _LT_ENABLE_FAST_INSTALL 298 | 299 | LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) 300 | LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) 301 | 302 | # Old names: 303 | AU_DEFUN([AC_ENABLE_FAST_INSTALL], 304 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) 305 | AC_DIAGNOSE([obsolete], 306 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 307 | the `fast-install' option into LT_INIT's first parameter.]) 308 | ]) 309 | 310 | AU_DEFUN([AC_DISABLE_FAST_INSTALL], 311 | [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) 312 | AC_DIAGNOSE([obsolete], 313 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 314 | the `disable-fast-install' option into LT_INIT's first parameter.]) 315 | ]) 316 | 317 | dnl aclocal-1.4 backwards compatibility: 318 | dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) 319 | dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) 320 | 321 | 322 | # _LT_WITH_PIC([MODE]) 323 | # -------------------- 324 | # implement the --with-pic flag, and support the `pic-only' and `no-pic' 325 | # LT_INIT options. 326 | # MODE is either `yes' or `no'. If omitted, it defaults to `both'. 327 | m4_define([_LT_WITH_PIC], 328 | [AC_ARG_WITH([pic], 329 | [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], 330 | [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], 331 | [lt_p=${PACKAGE-default} 332 | case $withval in 333 | yes|no) pic_mode=$withval ;; 334 | *) 335 | pic_mode=default 336 | # Look at the argument we got. We use all the common list separators. 337 | lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," 338 | for lt_pkg in $withval; do 339 | IFS="$lt_save_ifs" 340 | if test "X$lt_pkg" = "X$lt_p"; then 341 | pic_mode=yes 342 | fi 343 | done 344 | IFS="$lt_save_ifs" 345 | ;; 346 | esac], 347 | [pic_mode=default]) 348 | 349 | test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) 350 | 351 | _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl 352 | ])# _LT_WITH_PIC 353 | 354 | LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) 355 | LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) 356 | 357 | # Old name: 358 | AU_DEFUN([AC_LIBTOOL_PICMODE], 359 | [_LT_SET_OPTION([LT_INIT], [pic-only]) 360 | AC_DIAGNOSE([obsolete], 361 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 362 | put the `pic-only' option into LT_INIT's first parameter.]) 363 | ]) 364 | 365 | dnl aclocal-1.4 backwards compatibility: 366 | dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) 367 | 368 | ## ----------------- ## 369 | ## LTDL_INIT Options ## 370 | ## ----------------- ## 371 | 372 | m4_define([_LTDL_MODE], []) 373 | LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], 374 | [m4_define([_LTDL_MODE], [nonrecursive])]) 375 | LT_OPTION_DEFINE([LTDL_INIT], [recursive], 376 | [m4_define([_LTDL_MODE], [recursive])]) 377 | LT_OPTION_DEFINE([LTDL_INIT], [subproject], 378 | [m4_define([_LTDL_MODE], [subproject])]) 379 | 380 | m4_define([_LTDL_TYPE], []) 381 | LT_OPTION_DEFINE([LTDL_INIT], [installable], 382 | [m4_define([_LTDL_TYPE], [installable])]) 383 | LT_OPTION_DEFINE([LTDL_INIT], [convenience], 384 | [m4_define([_LTDL_TYPE], [convenience])]) 385 | -------------------------------------------------------------------------------- /build-aux/ltsugar.m4: -------------------------------------------------------------------------------- 1 | # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. 4 | # Written by Gary V. Vaughan, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # serial 6 ltsugar.m4 11 | 12 | # This is to help aclocal find these macros, as it can't see m4_define. 13 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) 14 | 15 | 16 | # lt_join(SEP, ARG1, [ARG2...]) 17 | # ----------------------------- 18 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their 19 | # associated separator. 20 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier 21 | # versions in m4sugar had bugs. 22 | m4_define([lt_join], 23 | [m4_if([$#], [1], [], 24 | [$#], [2], [[$2]], 25 | [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) 26 | m4_define([_lt_join], 27 | [m4_if([$#$2], [2], [], 28 | [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) 29 | 30 | 31 | # lt_car(LIST) 32 | # lt_cdr(LIST) 33 | # ------------ 34 | # Manipulate m4 lists. 35 | # These macros are necessary as long as will still need to support 36 | # Autoconf-2.59 which quotes differently. 37 | m4_define([lt_car], [[$1]]) 38 | m4_define([lt_cdr], 39 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], 40 | [$#], 1, [], 41 | [m4_dquote(m4_shift($@))])]) 42 | m4_define([lt_unquote], $1) 43 | 44 | 45 | # lt_append(MACRO-NAME, STRING, [SEPARATOR]) 46 | # ------------------------------------------ 47 | # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. 48 | # Note that neither SEPARATOR nor STRING are expanded; they are appended 49 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). 50 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different 51 | # than defined and empty). 52 | # 53 | # This macro is needed until we can rely on Autoconf 2.62, since earlier 54 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. 55 | m4_define([lt_append], 56 | [m4_define([$1], 57 | m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) 58 | 59 | 60 | 61 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) 62 | # ---------------------------------------------------------- 63 | # Produce a SEP delimited list of all paired combinations of elements of 64 | # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list 65 | # has the form PREFIXmINFIXSUFFIXn. 66 | # Needed until we can rely on m4_combine added in Autoconf 2.62. 67 | m4_define([lt_combine], 68 | [m4_if(m4_eval([$# > 3]), [1], 69 | [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl 70 | [[m4_foreach([_Lt_prefix], [$2], 71 | [m4_foreach([_Lt_suffix], 72 | ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, 73 | [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) 74 | 75 | 76 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) 77 | # ----------------------------------------------------------------------- 78 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited 79 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. 80 | m4_define([lt_if_append_uniq], 81 | [m4_ifdef([$1], 82 | [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], 83 | [lt_append([$1], [$2], [$3])$4], 84 | [$5])], 85 | [lt_append([$1], [$2], [$3])$4])]) 86 | 87 | 88 | # lt_dict_add(DICT, KEY, VALUE) 89 | # ----------------------------- 90 | m4_define([lt_dict_add], 91 | [m4_define([$1($2)], [$3])]) 92 | 93 | 94 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) 95 | # -------------------------------------------- 96 | m4_define([lt_dict_add_subkey], 97 | [m4_define([$1($2:$3)], [$4])]) 98 | 99 | 100 | # lt_dict_fetch(DICT, KEY, [SUBKEY]) 101 | # ---------------------------------- 102 | m4_define([lt_dict_fetch], 103 | [m4_ifval([$3], 104 | m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), 105 | m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) 106 | 107 | 108 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) 109 | # ----------------------------------------------------------------- 110 | m4_define([lt_if_dict_fetch], 111 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], 112 | [$5], 113 | [$6])]) 114 | 115 | 116 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) 117 | # -------------------------------------------------------------- 118 | m4_define([lt_dict_filter], 119 | [m4_if([$5], [], [], 120 | [lt_join(m4_quote(m4_default([$4], [[, ]])), 121 | lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), 122 | [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl 123 | ]) 124 | -------------------------------------------------------------------------------- /build-aux/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 3337 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.2]) 16 | m4_define([LT_PACKAGE_REVISION], [1.3337]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.2' 20 | macro_revision='1.3337' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /build-aux/lt~obsolete.m4: -------------------------------------------------------------------------------- 1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004. 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # serial 5 lt~obsolete.m4 11 | 12 | # These exist entirely to fool aclocal when bootstrapping libtool. 13 | # 14 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) 15 | # which have later been changed to m4_define as they aren't part of the 16 | # exported API, or moved to Autoconf or Automake where they belong. 17 | # 18 | # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN 19 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us 20 | # using a macro with the same name in our local m4/libtool.m4 it'll 21 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define 22 | # and doesn't know about Autoconf macros at all.) 23 | # 24 | # So we provide this file, which has a silly filename so it's always 25 | # included after everything else. This provides aclocal with the 26 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything 27 | # because those macros already exist, or will be overwritten later. 28 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 29 | # 30 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. 31 | # Yes, that means every name once taken will need to remain here until 32 | # we give up compatibility with versions before 1.7, at which point 33 | # we need to keep only those names which we still refer to. 34 | 35 | # This is to help aclocal find these macros, as it can't see m4_define. 36 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) 37 | 38 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) 39 | m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) 40 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) 41 | m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) 42 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) 43 | m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) 44 | m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) 45 | m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) 46 | m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) 47 | m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) 48 | m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) 49 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) 50 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) 51 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) 52 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) 53 | m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) 54 | m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) 55 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) 56 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) 57 | m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) 58 | m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) 59 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) 60 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) 61 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) 62 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) 63 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) 64 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) 65 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) 66 | m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) 67 | m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) 68 | m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) 69 | m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) 70 | m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) 71 | m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) 72 | m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) 73 | m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) 74 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) 75 | m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) 76 | m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) 77 | m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) 78 | m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) 79 | m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) 80 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) 81 | m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) 82 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) 83 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) 84 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) 85 | m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) 86 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) 87 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) 88 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) 89 | m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) 90 | m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) 91 | m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) 92 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) 93 | m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) 94 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) 95 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) 96 | m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) 97 | m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) 98 | m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) 99 | -------------------------------------------------------------------------------- /build-aux/missing: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Common wrapper for a few potentially missing GNU programs. 3 | 4 | scriptversion=2012-06-26.16; # UTC 5 | 6 | # Copyright (C) 1996-2013 Free Software Foundation, Inc. 7 | # Originally written by Fran,cois Pinard , 1996. 8 | 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; either version 2, or (at your option) 12 | # any later version. 13 | 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | 22 | # As a special exception to the GNU General Public License, if you 23 | # distribute this file as part of a program that contains a 24 | # configuration script generated by Autoconf, you may include it under 25 | # the same distribution terms that you use for the rest of that program. 26 | 27 | if test $# -eq 0; then 28 | echo 1>&2 "Try '$0 --help' for more information" 29 | exit 1 30 | fi 31 | 32 | case $1 in 33 | 34 | --is-lightweight) 35 | # Used by our autoconf macros to check whether the available missing 36 | # script is modern enough. 37 | exit 0 38 | ;; 39 | 40 | --run) 41 | # Back-compat with the calling convention used by older automake. 42 | shift 43 | ;; 44 | 45 | -h|--h|--he|--hel|--help) 46 | echo "\ 47 | $0 [OPTION]... PROGRAM [ARGUMENT]... 48 | 49 | Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due 50 | to PROGRAM being missing or too old. 51 | 52 | Options: 53 | -h, --help display this help and exit 54 | -v, --version output version information and exit 55 | 56 | Supported PROGRAM values: 57 | aclocal autoconf autoheader autom4te automake makeinfo 58 | bison yacc flex lex help2man 59 | 60 | Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and 61 | 'g' are ignored when checking the name. 62 | 63 | Send bug reports to ." 64 | exit $? 65 | ;; 66 | 67 | -v|--v|--ve|--ver|--vers|--versi|--versio|--version) 68 | echo "missing $scriptversion (GNU Automake)" 69 | exit $? 70 | ;; 71 | 72 | -*) 73 | echo 1>&2 "$0: unknown '$1' option" 74 | echo 1>&2 "Try '$0 --help' for more information" 75 | exit 1 76 | ;; 77 | 78 | esac 79 | 80 | # Run the given program, remember its exit status. 81 | "$@"; st=$? 82 | 83 | # If it succeeded, we are done. 84 | test $st -eq 0 && exit 0 85 | 86 | # Also exit now if we it failed (or wasn't found), and '--version' was 87 | # passed; such an option is passed most likely to detect whether the 88 | # program is present and works. 89 | case $2 in --version|--help) exit $st;; esac 90 | 91 | # Exit code 63 means version mismatch. This often happens when the user 92 | # tries to use an ancient version of a tool on a file that requires a 93 | # minimum version. 94 | if test $st -eq 63; then 95 | msg="probably too old" 96 | elif test $st -eq 127; then 97 | # Program was missing. 98 | msg="missing on your system" 99 | else 100 | # Program was found and executed, but failed. Give up. 101 | exit $st 102 | fi 103 | 104 | perl_URL=http://www.perl.org/ 105 | flex_URL=http://flex.sourceforge.net/ 106 | gnu_software_URL=http://www.gnu.org/software 107 | 108 | program_details () 109 | { 110 | case $1 in 111 | aclocal|automake) 112 | echo "The '$1' program is part of the GNU Automake package:" 113 | echo "<$gnu_software_URL/automake>" 114 | echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" 115 | echo "<$gnu_software_URL/autoconf>" 116 | echo "<$gnu_software_URL/m4/>" 117 | echo "<$perl_URL>" 118 | ;; 119 | autoconf|autom4te|autoheader) 120 | echo "The '$1' program is part of the GNU Autoconf package:" 121 | echo "<$gnu_software_URL/autoconf/>" 122 | echo "It also requires GNU m4 and Perl in order to run:" 123 | echo "<$gnu_software_URL/m4/>" 124 | echo "<$perl_URL>" 125 | ;; 126 | esac 127 | } 128 | 129 | give_advice () 130 | { 131 | # Normalize program name to check for. 132 | normalized_program=`echo "$1" | sed ' 133 | s/^gnu-//; t 134 | s/^gnu//; t 135 | s/^g//; t'` 136 | 137 | printf '%s\n' "'$1' is $msg." 138 | 139 | configure_deps="'configure.ac' or m4 files included by 'configure.ac'" 140 | case $normalized_program in 141 | autoconf*) 142 | echo "You should only need it if you modified 'configure.ac'," 143 | echo "or m4 files included by it." 144 | program_details 'autoconf' 145 | ;; 146 | autoheader*) 147 | echo "You should only need it if you modified 'acconfig.h' or" 148 | echo "$configure_deps." 149 | program_details 'autoheader' 150 | ;; 151 | automake*) 152 | echo "You should only need it if you modified 'Makefile.am' or" 153 | echo "$configure_deps." 154 | program_details 'automake' 155 | ;; 156 | aclocal*) 157 | echo "You should only need it if you modified 'acinclude.m4' or" 158 | echo "$configure_deps." 159 | program_details 'aclocal' 160 | ;; 161 | autom4te*) 162 | echo "You might have modified some maintainer files that require" 163 | echo "the 'automa4te' program to be rebuilt." 164 | program_details 'autom4te' 165 | ;; 166 | bison*|yacc*) 167 | echo "You should only need it if you modified a '.y' file." 168 | echo "You may want to install the GNU Bison package:" 169 | echo "<$gnu_software_URL/bison/>" 170 | ;; 171 | lex*|flex*) 172 | echo "You should only need it if you modified a '.l' file." 173 | echo "You may want to install the Fast Lexical Analyzer package:" 174 | echo "<$flex_URL>" 175 | ;; 176 | help2man*) 177 | echo "You should only need it if you modified a dependency" \ 178 | "of a man page." 179 | echo "You may want to install the GNU Help2man package:" 180 | echo "<$gnu_software_URL/help2man/>" 181 | ;; 182 | makeinfo*) 183 | echo "You should only need it if you modified a '.texi' file, or" 184 | echo "any other file indirectly affecting the aspect of the manual." 185 | echo "You might want to install the Texinfo package:" 186 | echo "<$gnu_software_URL/texinfo/>" 187 | echo "The spurious makeinfo call might also be the consequence of" 188 | echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" 189 | echo "want to install GNU make:" 190 | echo "<$gnu_software_URL/make/>" 191 | ;; 192 | *) 193 | echo "You might have modified some files without having the proper" 194 | echo "tools for further handling them. Check the 'README' file, it" 195 | echo "often tells you about the needed prerequisites for installing" 196 | echo "this package. You may also peek at any GNU archive site, in" 197 | echo "case some other package contains this missing '$1' program." 198 | ;; 199 | esac 200 | } 201 | 202 | give_advice "$1" | sed -e '1s/^/WARNING: /' \ 203 | -e '2,$s/^/ /' >&2 204 | 205 | # Propagate the correct exit status (expected to be 127 for a program 206 | # not found, 63 for a program that failed due to version mismatch). 207 | exit $st 208 | 209 | # Local variables: 210 | # eval: (add-hook 'write-file-hooks 'time-stamp) 211 | # time-stamp-start: "scriptversion=" 212 | # time-stamp-format: "%:y-%02m-%02d.%02H" 213 | # time-stamp-time-zone: "UTC" 214 | # time-stamp-end: "; # UTC" 215 | # End: 216 | -------------------------------------------------------------------------------- /build-aux/pkg.m4: -------------------------------------------------------------------------------- 1 | dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- 2 | dnl serial 11 (pkg-config-0.29.1) 3 | dnl 4 | dnl Copyright © 2004 Scott James Remnant . 5 | dnl Copyright © 2012-2015 Dan Nicholson 6 | dnl 7 | dnl This program is free software; you can redistribute it and/or modify 8 | dnl it under the terms of the GNU General Public License as published by 9 | dnl the Free Software Foundation; either version 2 of the License, or 10 | dnl (at your option) any later version. 11 | dnl 12 | dnl This program is distributed in the hope that it will be useful, but 13 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of 14 | dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | dnl General Public License for more details. 16 | dnl 17 | dnl You should have received a copy of the GNU General Public License 18 | dnl along with this program; if not, write to the Free Software 19 | dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 20 | dnl 02111-1307, USA. 21 | dnl 22 | dnl As a special exception to the GNU General Public License, if you 23 | dnl distribute this file as part of a program that contains a 24 | dnl configuration script generated by Autoconf, you may include it under 25 | dnl the same distribution terms that you use for the rest of that 26 | dnl program. 27 | 28 | dnl PKG_PREREQ(MIN-VERSION) 29 | dnl ----------------------- 30 | dnl Since: 0.29 31 | dnl 32 | dnl Verify that the version of the pkg-config macros are at least 33 | dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's 34 | dnl installed version of pkg-config, this checks the developer's version 35 | dnl of pkg.m4 when generating configure. 36 | dnl 37 | dnl To ensure that this macro is defined, also add: 38 | dnl m4_ifndef([PKG_PREREQ], 39 | dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) 40 | dnl 41 | dnl See the "Since" comment for each macro you use to see what version 42 | dnl of the macros you require. 43 | m4_defun([PKG_PREREQ], 44 | [m4_define([PKG_MACROS_VERSION], [0.29.1]) 45 | m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, 46 | [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) 47 | ])dnl PKG_PREREQ 48 | 49 | dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) 50 | dnl ---------------------------------- 51 | dnl Since: 0.16 52 | dnl 53 | dnl Search for the pkg-config tool and set the PKG_CONFIG variable to 54 | dnl first found in the path. Checks that the version of pkg-config found 55 | dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is 56 | dnl used since that's the first version where most current features of 57 | dnl pkg-config existed. 58 | AC_DEFUN([PKG_PROG_PKG_CONFIG], 59 | [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) 60 | m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) 61 | m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) 62 | AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) 63 | AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) 64 | AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) 65 | 66 | if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then 67 | AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) 68 | fi 69 | if test -n "$PKG_CONFIG"; then 70 | _pkg_min_version=m4_default([$1], [0.9.0]) 71 | AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) 72 | if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then 73 | AC_MSG_RESULT([yes]) 74 | else 75 | AC_MSG_RESULT([no]) 76 | PKG_CONFIG="" 77 | fi 78 | fi[]dnl 79 | ])dnl PKG_PROG_PKG_CONFIG 80 | 81 | dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) 82 | dnl ------------------------------------------------------------------- 83 | dnl Since: 0.18 84 | dnl 85 | dnl Check to see whether a particular set of modules exists. Similar to 86 | dnl PKG_CHECK_MODULES(), but does not set variables or print errors. 87 | dnl 88 | dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) 89 | dnl only at the first occurence in configure.ac, so if the first place 90 | dnl it's called might be skipped (such as if it is within an "if", you 91 | dnl have to call PKG_CHECK_EXISTS manually 92 | AC_DEFUN([PKG_CHECK_EXISTS], 93 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 94 | if test -n "$PKG_CONFIG" && \ 95 | AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then 96 | m4_default([$2], [:]) 97 | m4_ifvaln([$3], [else 98 | $3])dnl 99 | fi]) 100 | 101 | dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) 102 | dnl --------------------------------------------- 103 | dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting 104 | dnl pkg_failed based on the result. 105 | m4_define([_PKG_CONFIG], 106 | [if test -n "$$1"; then 107 | pkg_cv_[]$1="$$1" 108 | elif test -n "$PKG_CONFIG"; then 109 | PKG_CHECK_EXISTS([$3], 110 | [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` 111 | test "x$?" != "x0" && pkg_failed=yes ], 112 | [pkg_failed=yes]) 113 | else 114 | pkg_failed=untried 115 | fi[]dnl 116 | ])dnl _PKG_CONFIG 117 | 118 | dnl _PKG_SHORT_ERRORS_SUPPORTED 119 | dnl --------------------------- 120 | dnl Internal check to see if pkg-config supports short errors. 121 | AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], 122 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) 123 | if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then 124 | _pkg_short_errors_supported=yes 125 | else 126 | _pkg_short_errors_supported=no 127 | fi[]dnl 128 | ])dnl _PKG_SHORT_ERRORS_SUPPORTED 129 | 130 | 131 | dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], 132 | dnl [ACTION-IF-NOT-FOUND]) 133 | dnl -------------------------------------------------------------- 134 | dnl Since: 0.4.0 135 | dnl 136 | dnl Note that if there is a possibility the first call to 137 | dnl PKG_CHECK_MODULES might not happen, you should be sure to include an 138 | dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac 139 | AC_DEFUN([PKG_CHECK_MODULES], 140 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 141 | AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl 142 | AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl 143 | 144 | pkg_failed=no 145 | AC_MSG_CHECKING([for $1]) 146 | 147 | _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) 148 | _PKG_CONFIG([$1][_LIBS], [libs], [$2]) 149 | 150 | m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS 151 | and $1[]_LIBS to avoid the need to call pkg-config. 152 | See the pkg-config man page for more details.]) 153 | 154 | if test $pkg_failed = yes; then 155 | AC_MSG_RESULT([no]) 156 | _PKG_SHORT_ERRORS_SUPPORTED 157 | if test $_pkg_short_errors_supported = yes; then 158 | $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` 159 | else 160 | $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` 161 | fi 162 | # Put the nasty error message in config.log where it belongs 163 | echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD 164 | 165 | m4_default([$4], [AC_MSG_ERROR( 166 | [Package requirements ($2) were not met: 167 | 168 | $$1_PKG_ERRORS 169 | 170 | Consider adjusting the PKG_CONFIG_PATH environment variable if you 171 | installed software in a non-standard prefix. 172 | 173 | _PKG_TEXT])[]dnl 174 | ]) 175 | elif test $pkg_failed = untried; then 176 | AC_MSG_RESULT([no]) 177 | m4_default([$4], [AC_MSG_FAILURE( 178 | [The pkg-config script could not be found or is too old. Make sure it 179 | is in your PATH or set the PKG_CONFIG environment variable to the full 180 | path to pkg-config. 181 | 182 | _PKG_TEXT 183 | 184 | To get pkg-config, see .])[]dnl 185 | ]) 186 | else 187 | $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS 188 | $1[]_LIBS=$pkg_cv_[]$1[]_LIBS 189 | AC_MSG_RESULT([yes]) 190 | $3 191 | fi[]dnl 192 | ])dnl PKG_CHECK_MODULES 193 | 194 | 195 | dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], 196 | dnl [ACTION-IF-NOT-FOUND]) 197 | dnl --------------------------------------------------------------------- 198 | dnl Since: 0.29 199 | dnl 200 | dnl Checks for existence of MODULES and gathers its build flags with 201 | dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags 202 | dnl and VARIABLE-PREFIX_LIBS from --libs. 203 | dnl 204 | dnl Note that if there is a possibility the first call to 205 | dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to 206 | dnl include an explicit call to PKG_PROG_PKG_CONFIG in your 207 | dnl configure.ac. 208 | AC_DEFUN([PKG_CHECK_MODULES_STATIC], 209 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 210 | _save_PKG_CONFIG=$PKG_CONFIG 211 | PKG_CONFIG="$PKG_CONFIG --static" 212 | PKG_CHECK_MODULES($@) 213 | PKG_CONFIG=$_save_PKG_CONFIG[]dnl 214 | ])dnl PKG_CHECK_MODULES_STATIC 215 | 216 | 217 | dnl PKG_INSTALLDIR([DIRECTORY]) 218 | dnl ------------------------- 219 | dnl Since: 0.27 220 | dnl 221 | dnl Substitutes the variable pkgconfigdir as the location where a module 222 | dnl should install pkg-config .pc files. By default the directory is 223 | dnl $libdir/pkgconfig, but the default can be changed by passing 224 | dnl DIRECTORY. The user can override through the --with-pkgconfigdir 225 | dnl parameter. 226 | AC_DEFUN([PKG_INSTALLDIR], 227 | [m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) 228 | m4_pushdef([pkg_description], 229 | [pkg-config installation directory @<:@]pkg_default[@:>@]) 230 | AC_ARG_WITH([pkgconfigdir], 231 | [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, 232 | [with_pkgconfigdir=]pkg_default) 233 | AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) 234 | m4_popdef([pkg_default]) 235 | m4_popdef([pkg_description]) 236 | ])dnl PKG_INSTALLDIR 237 | 238 | 239 | dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) 240 | dnl -------------------------------- 241 | dnl Since: 0.27 242 | dnl 243 | dnl Substitutes the variable noarch_pkgconfigdir as the location where a 244 | dnl module should install arch-independent pkg-config .pc files. By 245 | dnl default the directory is $datadir/pkgconfig, but the default can be 246 | dnl changed by passing DIRECTORY. The user can override through the 247 | dnl --with-noarch-pkgconfigdir parameter. 248 | AC_DEFUN([PKG_NOARCH_INSTALLDIR], 249 | [m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) 250 | m4_pushdef([pkg_description], 251 | [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) 252 | AC_ARG_WITH([noarch-pkgconfigdir], 253 | [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, 254 | [with_noarch_pkgconfigdir=]pkg_default) 255 | AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) 256 | m4_popdef([pkg_default]) 257 | m4_popdef([pkg_description]) 258 | ])dnl PKG_NOARCH_INSTALLDIR 259 | 260 | 261 | dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, 262 | dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) 263 | dnl ------------------------------------------- 264 | dnl Since: 0.28 265 | dnl 266 | dnl Retrieves the value of the pkg-config variable for the given module. 267 | AC_DEFUN([PKG_CHECK_VAR], 268 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl 269 | AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl 270 | 271 | _PKG_CONFIG([$1], [variable="][$3]["], [$2]) 272 | AS_VAR_COPY([$1], [pkg_cv_][$1]) 273 | 274 | AS_VAR_IF([$1], [""], [$5], [$4])dnl 275 | ])dnl PKG_CHECK_VAR 276 | -------------------------------------------------------------------------------- /build-aux/test-driver: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # test-driver - basic testsuite driver script. 3 | 4 | scriptversion=2012-06-27.10; # UTC 5 | 6 | # Copyright (C) 2011-2013 Free Software Foundation, Inc. 7 | # 8 | # This program is free software; you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation; either version 2, or (at your option) 11 | # any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with this program. If not, see . 20 | 21 | # As a special exception to the GNU General Public License, if you 22 | # distribute this file as part of a program that contains a 23 | # configuration script generated by Autoconf, you may include it under 24 | # the same distribution terms that you use for the rest of that program. 25 | 26 | # This file is maintained in Automake, please report 27 | # bugs to or send patches to 28 | # . 29 | 30 | # Make unconditional expansion of undefined variables an error. This 31 | # helps a lot in preventing typo-related bugs. 32 | set -u 33 | 34 | usage_error () 35 | { 36 | echo "$0: $*" >&2 37 | print_usage >&2 38 | exit 2 39 | } 40 | 41 | print_usage () 42 | { 43 | cat <$log_file 2>&1 96 | estatus=$? 97 | if test $enable_hard_errors = no && test $estatus -eq 99; then 98 | estatus=1 99 | fi 100 | 101 | case $estatus:$expect_failure in 102 | 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; 103 | 0:*) col=$grn res=PASS recheck=no gcopy=no;; 104 | 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; 105 | 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; 106 | *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; 107 | *:*) col=$red res=FAIL recheck=yes gcopy=yes;; 108 | esac 109 | 110 | # Report outcome to console. 111 | echo "${col}${res}${std}: $test_name" 112 | 113 | # Register the test result, and other relevant metadata. 114 | echo ":test-result: $res" > $trs_file 115 | echo ":global-test-result: $res" >> $trs_file 116 | echo ":recheck: $recheck" >> $trs_file 117 | echo ":copy-in-global-log: $gcopy" >> $trs_file 118 | 119 | # Local Variables: 120 | # mode: shell-script 121 | # sh-indentation: 2 122 | # eval: (add-hook 'write-file-hooks 'time-stamp) 123 | # time-stamp-start: "scriptversion=" 124 | # time-stamp-format: "%:y-%02m-%02d.%02H" 125 | # time-stamp-time-zone: "UTC" 126 | # time-stamp-end: "; # UTC" 127 | # End: 128 | -------------------------------------------------------------------------------- /config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define if --enable-debug option is found. */ 4 | #undef DEBUG 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_DLFCN_H 8 | 9 | /* Define to 1 if you have the `dup2' function. */ 10 | #undef HAVE_DUP2 11 | 12 | /* Define to 1 if you have the header file. */ 13 | #undef HAVE_FCNTL_H 14 | 15 | /* Define to 1 if you have the `fork' function. */ 16 | #undef HAVE_FORK 17 | 18 | /* Define to 1 if you have the `gettimeofday' function. */ 19 | #undef HAVE_GETTIMEOFDAY 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_INTTYPES_H 23 | 24 | /* Define to 1 if you have the `cuda' library (-lcuda). */ 25 | #undef HAVE_LIBCUDA 26 | 27 | /* Define to 1 if you have the `cudart' library (-lcudart). */ 28 | #undef HAVE_LIBCUDART 29 | 30 | /* Define to 1 if you have the `dl' library (-ldl). */ 31 | #undef HAVE_LIBDL 32 | 33 | /* Define to 1 if you have the `pthread' library (-lpthread). */ 34 | #undef HAVE_LIBPTHREAD 35 | 36 | /* Define to 1 if your system has a GNU libc compatible `malloc' function, and 37 | to 0 otherwise. */ 38 | #undef HAVE_MALLOC 39 | 40 | /* Define to 1 if you have the header file. */ 41 | #undef HAVE_MEMORY_H 42 | 43 | /* Define to 1 if you have the `mempcpy' function. */ 44 | #undef HAVE_MEMPCPY 45 | 46 | /* Define to 1 if you have the `mkfifo' function. */ 47 | #undef HAVE_MKFIFO 48 | 49 | /* Define to 1 if you have the header file. */ 50 | #undef HAVE_STDINT_H 51 | 52 | /* Define to 1 if you have the header file. */ 53 | #undef HAVE_STDLIB_H 54 | 55 | /* Define to 1 if you have the header file. */ 56 | #undef HAVE_STRINGS_H 57 | 58 | /* Define to 1 if you have the header file. */ 59 | #undef HAVE_STRING_H 60 | 61 | /* Define to 1 if you have the `strtol' function. */ 62 | #undef HAVE_STRTOL 63 | 64 | /* Define to 1 if you have the header file. */ 65 | #undef HAVE_SYS_STAT_H 66 | 67 | /* Define to 1 if you have the header file. */ 68 | #undef HAVE_SYS_TIME_H 69 | 70 | /* Define to 1 if you have the header file. */ 71 | #undef HAVE_SYS_TYPES_H 72 | 73 | /* Define to 1 if you have the header file. */ 74 | #undef HAVE_UNISTD_H 75 | 76 | /* Define to 1 if you have the `vfork' function. */ 77 | #undef HAVE_VFORK 78 | 79 | /* Define to 1 if you have the header file. */ 80 | #undef HAVE_VFORK_H 81 | 82 | /* Define to 1 if `fork' works. */ 83 | #undef HAVE_WORKING_FORK 84 | 85 | /* Define to 1 if `vfork' works. */ 86 | #undef HAVE_WORKING_VFORK 87 | 88 | /* Define to the sub-directory in which libtool stores uninstalled libraries. 89 | */ 90 | #undef LT_OBJDIR 91 | 92 | /* Define to 1 if your C compiler doesn't accept -c and -o together. */ 93 | #undef NO_MINUS_C_MINUS_O 94 | 95 | /* Name of package */ 96 | #undef PACKAGE 97 | 98 | /* Define to the address where bug reports for this package should be sent. */ 99 | #undef PACKAGE_BUGREPORT 100 | 101 | /* Define to the full name of this package. */ 102 | #undef PACKAGE_NAME 103 | 104 | /* Define to the full name and version of this package. */ 105 | #undef PACKAGE_STRING 106 | 107 | /* Define to the one symbol short name of this package. */ 108 | #undef PACKAGE_TARNAME 109 | 110 | /* Define to the home page for this package. */ 111 | #undef PACKAGE_URL 112 | 113 | /* Define to the version of this package. */ 114 | #undef PACKAGE_VERSION 115 | 116 | /* Define to 1 if you have the ANSI C header files. */ 117 | #undef STDC_HEADERS 118 | 119 | /* Version number of package */ 120 | #undef VERSION 121 | 122 | /* Define to `__inline__' or `__inline' if that's what the C compiler 123 | calls it, or to nothing if 'inline' is not supported under any name. */ 124 | #ifndef __cplusplus 125 | #undef inline 126 | #endif 127 | 128 | /* Define to rpl_malloc if the replacement function should be used. */ 129 | #undef malloc 130 | 131 | /* Define to `int' if does not define. */ 132 | #undef pid_t 133 | 134 | /* Define to `unsigned int' if does not define. */ 135 | #undef size_t 136 | 137 | /* Define to `int' if does not define. */ 138 | #undef ssize_t 139 | 140 | /* Define as `fork' if `vfork' does not work. */ 141 | #undef vfork 142 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.69]) 2 | 3 | AC_INIT([mrcuda], [7.0.0], [markthub.p.aa@m.titech.ac.jp]) 4 | AC_CONFIG_AUX_DIR([build-aux]) 5 | AM_INIT_AUTOMAKE 6 | AC_CONFIG_MACRO_DIR([build-aux]) 7 | AC_CONFIG_HEADERS([config.h]) 8 | AC_CONFIG_FILES([Makefile src/Makefile tests/Makefile scripts/Makefile]) 9 | AC_ENABLE_SHARED(yes) 10 | AC_ENABLE_STATIC(no) 11 | 12 | AC_ARG_WITH([rcuda], [ 13 | AS_HELP_STRING([--with-rcuda=[RCUDA_PATH]], 14 | [optionally specify the installation path of rCUDA.] 15 | ) 16 | ]) 17 | 18 | AC_ARG_WITH([nvcc], [ 19 | AS_HELP_STRING([--with-nvcc=[nvcc]], 20 | [optionally specify nvcc you want to use.] 21 | )], [ 22 | AC_SUBST(NVCC, "${with_nvcc}") 23 | ] 24 | ) 25 | 26 | AC_ARG_WITH([libcudart], [ 27 | AS_HELP_STRING([--with-libcudart=[libcudart]], 28 | [optionally specify CUDA libcudart you want to use.] 29 | )], [ 30 | AC_SUBST(NVIDIA_LIBCUDART, "${with_libcudart}") 31 | ] 32 | ) 33 | 34 | AC_ARG_ENABLE([debug], 35 | AS_HELP_STRING([--enable-debug], [Enable debug output])) 36 | AS_IF([test "x$enable_debug" = "xyes"], [ 37 | AC_DEFINE(DEBUG, 1, [Define if --enable-debug option is found.]) 38 | ]) 39 | 40 | # Checks for programs 41 | AM_PROG_AR 42 | AC_PROG_CC 43 | AM_PROG_CC_C_O 44 | AC_PROG_INSTALL 45 | AC_PROG_LN_S 46 | AM_PATH_PYTHON([2.7]) 47 | 48 | if test x"${NVCC}" == x""; then 49 | AC_PATH_PROG(NVCC, nvcc, no) 50 | if test x"${NVCC}" == x"no"; then 51 | AC_MSG_ERROR([Cannot find nvcc.]) 52 | fi 53 | fi 54 | 55 | if test x"${NVIDIA_LIBCUDART}" == x""; then 56 | AC_PATH_PROG(NVIDIA_LIBCUDART, [libcudart.so], no, [$LD_LIBRARY_PATH$PATH_SEPARATOR$LIBRARY_PATH]) 57 | if test x"${NVIDIA_LIBCUDART}" == x"no"; then 58 | AC_MSG_ERROR([Cannot find CUDA's libcudart.so.]) 59 | fi 60 | fi 61 | 62 | AC_PATH_PROG(RCUDA_RCUDACOMMIB, rCUDAcommIB.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH]) 63 | if test x"${RCUDA_RCUDACOMMIB}" == x"no"; then 64 | AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.]) 65 | fi 66 | 67 | AC_PATH_PROG(RCUDA_RCUDACOMMTCP, rCUDAcommTCP.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH]) 68 | if test x"${RCUDA_RCUDACOMMTCP}" == x"no"; then 69 | AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.]) 70 | fi 71 | 72 | AC_PATH_PROG(RCUDA_LIBCUDART, libcudart.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH]) 73 | if test x"${RCUDA_LIBCUDART}" == x"no"; then 74 | AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.]) 75 | fi 76 | 77 | # Checks for modules 78 | PKG_CHECK_MODULES([CHECK], [check >= 0.9.4]) 79 | PKG_CHECK_MODULES([DEPS], [glib-2.0 >= 2.24.1]) 80 | LT_INIT 81 | 82 | # Checks for libraries. 83 | # FIXME: Replace `main' with a function in `-lcuda': 84 | AC_CHECK_LIB([cuda], [cuCtxCreate]) 85 | # FIXME: Replace `main' with a function in `-lcudart': 86 | AC_CHECK_LIB([cudart], [cudaMemcpy]) 87 | # FIXME: Replace `main' with a function in `-ldl': 88 | AC_CHECK_LIB([dl], [dlsym]) 89 | # FIXME: Replace `main' with a function in `-lpthread': 90 | AC_CHECK_LIB([pthread], [pthread_mutex_init]) 91 | 92 | # Checks for header files. 93 | AC_CHECK_HEADERS([fcntl.h stdlib.h string.h sys/time.h unistd.h]) 94 | 95 | # Checks for typedefs, structures, and compiler characteristics. 96 | AC_C_INLINE 97 | AC_TYPE_PID_T 98 | AC_TYPE_SIZE_T 99 | AC_TYPE_SSIZE_T 100 | 101 | # Checks for library functions. 102 | AC_FUNC_FORK 103 | AC_FUNC_MALLOC 104 | AC_CHECK_FUNCS([dup2 gettimeofday mempcpy mkfifo strtol]) 105 | 106 | AC_OUTPUT 107 | -------------------------------------------------------------------------------- /notes/func-list.txt: -------------------------------------------------------------------------------- 1 | extern __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void); 2 | extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func); 3 | extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol(const void *symbol, const void *src, size_t count, size_t offset __dv(0), enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)); 4 | extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind); 5 | extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, unsigned int flags); 6 | extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, size_t count); 7 | extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr); 8 | extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, size_t size, size_t offset); 9 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size); 10 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr); 11 | extern __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), cudaStream_t stream __dv(0)); 12 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void); 13 | extern __host__ cudaError_t CUDARTAPI cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)); 14 | extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f); 15 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device); 16 | extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream); 17 | extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total); 18 | extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device); 19 | extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags( unsigned int flags ); 20 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device); 21 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count); 22 | 23 | /* Maybe we don't need these, hopefully */ 24 | CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); 25 | CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev); 26 | CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev); 27 | CUresult CUDAAPI cuDeviceGetCount(int *count); 28 | CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal); 29 | -------------------------------------------------------------------------------- /results/memcpybw-mhelper.out: -------------------------------------------------------------------------------- 1 | prog lib count size_per_call time 2 | cudamemcpy mrcuda 0 1024 23.707000 3 | cudamemcpy mrcuda 0 2048 23.586000 4 | cudamemcpy mrcuda 0 4096 24.085000 5 | cudamemcpy mrcuda 0 8192 27.142000 6 | cudamemcpy mrcuda 0 16384 32.360000 7 | cudamemcpy mrcuda 0 32768 43.379000 8 | cudamemcpy mrcuda 0 65536 65.852000 9 | cudamemcpy mrcuda 0 131072 107.484000 10 | cudamemcpy mrcuda 0 262144 190.487000 11 | cudamemcpy mrcuda 0 524288 360.121000 12 | cudamemcpy mrcuda 0 1048576 740.892000 13 | cudamemcpy mrcuda 0 2097152 1502.484000 14 | cudamemcpy mrcuda 0 4194304 3162.210000 15 | cudamemcpy mrcuda 0 8388608 6589.956000 16 | cudamemcpy mrcuda 0 16777216 14099.772000 17 | cudamemcpy mrcuda 0 33554432 28670.813000 18 | cudamemcpy mrcuda 0 67108864 57351.585000 19 | cudamemcpy mrcuda 0 134217728 114402.483000 20 | cudamemcpy mrcuda 0 268435456 228174.487000 21 | cudamemcpy mrcuda 0 536870912 456561.788000 22 | cudamemcpy mrcuda 1 1024 25.535000 23 | cudamemcpy mrcuda 1 2048 24.536000 24 | cudamemcpy mrcuda 1 4096 24.919000 25 | cudamemcpy mrcuda 1 8192 28.170000 26 | cudamemcpy mrcuda 1 16384 33.451000 27 | cudamemcpy mrcuda 1 32768 44.960000 28 | cudamemcpy mrcuda 1 65536 67.520000 29 | cudamemcpy mrcuda 1 131072 109.729000 30 | cudamemcpy mrcuda 1 262144 192.256000 31 | cudamemcpy mrcuda 1 524288 363.553000 32 | cudamemcpy mrcuda 1 1048576 774.701000 33 | cudamemcpy mrcuda 1 2097152 1615.745000 34 | cudamemcpy mrcuda 1 4194304 3289.496000 35 | cudamemcpy mrcuda 1 8388608 6818.549000 36 | cudamemcpy mrcuda 1 16777216 14308.595000 37 | cudamemcpy mrcuda 1 33554432 28948.875000 38 | cudamemcpy mrcuda 1 67108864 57716.634000 39 | cudamemcpy mrcuda 1 134217728 114734.186000 40 | cudamemcpy mrcuda 1 268435456 228719.460000 41 | cudamemcpy mrcuda 1 536870912 457770.822000 42 | cudamemcpy mrcuda 2 1024 24.210000 43 | cudamemcpy mrcuda 2 2048 23.541000 44 | cudamemcpy mrcuda 2 4096 23.846000 45 | cudamemcpy mrcuda 2 8192 27.095000 46 | cudamemcpy mrcuda 2 16384 32.419000 47 | cudamemcpy mrcuda 2 32768 43.485000 48 | cudamemcpy mrcuda 2 65536 65.377000 49 | cudamemcpy mrcuda 2 131072 107.146000 50 | cudamemcpy mrcuda 2 262144 190.121000 51 | cudamemcpy mrcuda 2 524288 360.972000 52 | cudamemcpy mrcuda 2 1048576 769.155000 53 | cudamemcpy mrcuda 2 2097152 1610.051000 54 | cudamemcpy mrcuda 2 4194304 3281.864000 55 | cudamemcpy mrcuda 2 8388608 6782.289000 56 | cudamemcpy mrcuda 2 16777216 14296.611000 57 | cudamemcpy mrcuda 2 33554432 28939.121000 58 | cudamemcpy mrcuda 2 67108864 57711.919000 59 | cudamemcpy mrcuda 2 134217728 115116.285000 60 | cudamemcpy mrcuda 2 268435456 229418.731000 61 | cudamemcpy mrcuda 2 536870912 458720.893000 62 | cudamemcpy mrcuda 3 1024 24.432000 63 | cudamemcpy mrcuda 3 2048 23.433000 64 | cudamemcpy mrcuda 3 4096 23.725000 65 | cudamemcpy mrcuda 3 8192 26.918000 66 | cudamemcpy mrcuda 3 16384 32.127000 67 | cudamemcpy mrcuda 3 32768 43.121000 68 | cudamemcpy mrcuda 3 65536 65.396000 69 | cudamemcpy mrcuda 3 131072 107.548000 70 | cudamemcpy mrcuda 3 262144 190.862000 71 | cudamemcpy mrcuda 3 524288 363.774000 72 | cudamemcpy mrcuda 3 1048576 733.667000 73 | cudamemcpy mrcuda 3 2097152 1561.821000 74 | cudamemcpy mrcuda 3 4194304 3264.504000 75 | cudamemcpy mrcuda 3 8388608 6770.502000 76 | cudamemcpy mrcuda 3 16777216 14221.611000 77 | cudamemcpy mrcuda 3 33554432 28949.865000 78 | cudamemcpy mrcuda 3 67108864 57812.058000 79 | cudamemcpy mrcuda 3 134217728 115184.516000 80 | cudamemcpy mrcuda 3 268435456 229561.263000 81 | cudamemcpy mrcuda 3 536870912 458857.855000 82 | cudamemcpy mrcuda 4 1024 23.803000 83 | cudamemcpy mrcuda 4 2048 23.921000 84 | cudamemcpy mrcuda 4 4096 24.169000 85 | cudamemcpy mrcuda 4 8192 27.698000 86 | cudamemcpy mrcuda 4 16384 32.877000 87 | cudamemcpy mrcuda 4 32768 43.996000 88 | cudamemcpy mrcuda 4 65536 66.135000 89 | cudamemcpy mrcuda 4 131072 107.902000 90 | cudamemcpy mrcuda 4 262144 190.761000 91 | cudamemcpy mrcuda 4 524288 357.221000 92 | cudamemcpy mrcuda 4 1048576 734.587000 93 | cudamemcpy mrcuda 4 2097152 1501.452000 94 | cudamemcpy mrcuda 4 4194304 3157.658000 95 | cudamemcpy mrcuda 4 8388608 6724.800000 96 | cudamemcpy mrcuda 4 16777216 14084.432000 97 | cudamemcpy mrcuda 4 33554432 28745.811000 98 | cudamemcpy mrcuda 4 67108864 57456.041000 99 | cudamemcpy mrcuda 4 134217728 114529.454000 100 | cudamemcpy mrcuda 4 268435456 228555.257000 101 | cudamemcpy mrcuda 4 536870912 456908.974000 102 | cudamemcpy mrcuda 5 1024 24.740000 103 | cudamemcpy mrcuda 5 2048 24.142000 104 | cudamemcpy mrcuda 5 4096 24.591000 105 | cudamemcpy mrcuda 5 8192 27.863000 106 | cudamemcpy mrcuda 5 16384 33.229000 107 | cudamemcpy mrcuda 5 32768 44.054000 108 | cudamemcpy mrcuda 5 65536 66.328000 109 | cudamemcpy mrcuda 5 131072 108.365000 110 | cudamemcpy mrcuda 5 262144 190.897000 111 | cudamemcpy mrcuda 5 524288 362.404000 112 | cudamemcpy mrcuda 5 1048576 768.185000 113 | cudamemcpy mrcuda 5 2097152 1609.308000 114 | cudamemcpy mrcuda 5 4194304 3284.896000 115 | cudamemcpy mrcuda 5 8388608 6900.502000 116 | cudamemcpy mrcuda 5 16777216 14266.649000 117 | cudamemcpy mrcuda 5 33554432 28960.732000 118 | cudamemcpy mrcuda 5 67108864 57796.792000 119 | cudamemcpy mrcuda 5 134217728 115037.879000 120 | cudamemcpy mrcuda 5 268435456 229401.095000 121 | cudamemcpy mrcuda 5 536870912 458578.661000 122 | cudamemcpy mrcuda 6 1024 23.720000 123 | cudamemcpy mrcuda 6 2048 23.777000 124 | cudamemcpy mrcuda 6 4096 24.075000 125 | cudamemcpy mrcuda 6 8192 27.386000 126 | cudamemcpy mrcuda 6 16384 32.648000 127 | cudamemcpy mrcuda 6 32768 43.408000 128 | cudamemcpy mrcuda 6 65536 65.562000 129 | cudamemcpy mrcuda 6 131072 107.481000 130 | cudamemcpy mrcuda 6 262144 190.448000 131 | cudamemcpy mrcuda 6 524288 361.721000 132 | cudamemcpy mrcuda 6 1048576 775.303000 133 | cudamemcpy mrcuda 6 2097152 1602.592000 134 | cudamemcpy mrcuda 6 4194304 3262.345000 135 | cudamemcpy mrcuda 6 8388608 6864.071000 136 | cudamemcpy mrcuda 6 16777216 14261.377000 137 | cudamemcpy mrcuda 6 33554432 28879.013000 138 | cudamemcpy mrcuda 6 67108864 57695.091000 139 | cudamemcpy mrcuda 6 134217728 114821.480000 140 | cudamemcpy mrcuda 6 268435456 228831.121000 141 | cudamemcpy mrcuda 6 536870912 457399.061000 142 | cudamemcpy mrcuda 7 1024 24.467000 143 | cudamemcpy mrcuda 7 2048 23.916000 144 | cudamemcpy mrcuda 7 4096 24.277000 145 | cudamemcpy mrcuda 7 8192 27.448000 146 | cudamemcpy mrcuda 7 16384 32.568000 147 | cudamemcpy mrcuda 7 32768 43.604000 148 | cudamemcpy mrcuda 7 65536 65.713000 149 | cudamemcpy mrcuda 7 131072 108.453000 150 | cudamemcpy mrcuda 7 262144 191.636000 151 | cudamemcpy mrcuda 7 524288 361.887000 152 | cudamemcpy mrcuda 7 1048576 774.704000 153 | cudamemcpy mrcuda 7 2097152 1610.828000 154 | cudamemcpy mrcuda 7 4194304 3275.267000 155 | cudamemcpy mrcuda 7 8388608 6902.061000 156 | cudamemcpy mrcuda 7 16777216 14316.970000 157 | cudamemcpy mrcuda 7 33554432 29001.885000 158 | cudamemcpy mrcuda 7 67108864 57918.016000 159 | cudamemcpy mrcuda 7 134217728 115225.240000 160 | cudamemcpy mrcuda 7 268435456 229559.663000 161 | cudamemcpy mrcuda 7 536870912 458412.680000 162 | cudamemcpy mrcuda 8 1024 24.201000 163 | cudamemcpy mrcuda 8 2048 23.573000 164 | cudamemcpy mrcuda 8 4096 24.008000 165 | cudamemcpy mrcuda 8 8192 27.317000 166 | cudamemcpy mrcuda 8 16384 32.523000 167 | cudamemcpy mrcuda 8 32768 43.500000 168 | cudamemcpy mrcuda 8 65536 65.584000 169 | cudamemcpy mrcuda 8 131072 107.949000 170 | cudamemcpy mrcuda 8 262144 190.804000 171 | cudamemcpy mrcuda 8 524288 364.756000 172 | cudamemcpy mrcuda 8 1048576 724.120000 173 | cudamemcpy mrcuda 8 2097152 1550.997000 174 | cudamemcpy mrcuda 8 4194304 3218.520000 175 | cudamemcpy mrcuda 8 8388608 6666.086000 176 | cudamemcpy mrcuda 8 16777216 14107.568000 177 | cudamemcpy mrcuda 8 33554432 28795.421000 178 | cudamemcpy mrcuda 8 67108864 57638.160000 179 | cudamemcpy mrcuda 8 134217728 114926.156000 180 | cudamemcpy mrcuda 8 268435456 229095.012000 181 | cudamemcpy mrcuda 8 536870912 457751.274000 182 | cudamemcpy mrcuda 9 1024 23.594000 183 | cudamemcpy mrcuda 9 2048 24.050000 184 | cudamemcpy mrcuda 9 4096 24.403000 185 | cudamemcpy mrcuda 9 8192 27.678000 186 | cudamemcpy mrcuda 9 16384 32.980000 187 | cudamemcpy mrcuda 9 32768 43.657000 188 | cudamemcpy mrcuda 9 65536 65.738000 189 | cudamemcpy mrcuda 9 131072 107.550000 190 | cudamemcpy mrcuda 9 262144 190.718000 191 | cudamemcpy mrcuda 9 524288 363.023000 192 | cudamemcpy mrcuda 9 1048576 729.539000 193 | cudamemcpy mrcuda 9 2097152 1505.266000 194 | cudamemcpy mrcuda 9 4194304 3158.483000 195 | cudamemcpy mrcuda 9 8388608 6669.139000 196 | cudamemcpy mrcuda 9 16777216 14043.886000 197 | cudamemcpy mrcuda 9 33554432 28737.694000 198 | cudamemcpy mrcuda 9 67108864 57545.285000 199 | cudamemcpy mrcuda 9 134217728 114763.498000 200 | cudamemcpy mrcuda 9 268435456 229028.840000 201 | cudamemcpy mrcuda 9 536870912 457445.702000 202 | cudamemcpy native 0 1024 4.898000 203 | cudamemcpy native 0 2048 5.050000 204 | cudamemcpy native 0 4096 5.473000 205 | cudamemcpy native 0 8192 6.790000 206 | cudamemcpy native 0 16384 9.148000 207 | cudamemcpy native 0 32768 13.953000 208 | cudamemcpy native 0 65536 23.594000 209 | cudamemcpy native 0 131072 36.605000 210 | cudamemcpy native 0 262144 66.875000 211 | cudamemcpy native 0 524288 127.410000 212 | cudamemcpy native 0 1048576 249.071000 213 | cudamemcpy native 0 2097152 423.865000 214 | cudamemcpy native 0 4194304 773.866000 215 | cudamemcpy native 0 8388608 1464.670000 216 | cudamemcpy native 0 16777216 2860.749000 217 | cudamemcpy native 0 33554432 5636.800000 218 | cudamemcpy native 0 67108864 11195.433000 219 | cudamemcpy native 0 134217728 22313.544000 220 | cudamemcpy native 0 268435456 44545.160000 221 | cudamemcpy native 0 536870912 88965.398000 222 | cudamemcpy native 1 1024 4.922000 223 | cudamemcpy native 1 2048 5.040000 224 | cudamemcpy native 1 4096 5.501000 225 | cudamemcpy native 1 8192 6.835000 226 | cudamemcpy native 1 16384 9.161000 227 | cudamemcpy native 1 32768 13.996000 228 | cudamemcpy native 1 65536 23.676000 229 | cudamemcpy native 1 131072 36.652000 230 | cudamemcpy native 1 262144 66.976000 231 | cudamemcpy native 1 524288 127.428000 232 | cudamemcpy native 1 1048576 248.992000 233 | cudamemcpy native 1 2097152 423.326000 234 | cudamemcpy native 1 4194304 772.537000 235 | cudamemcpy native 1 8388608 1464.817000 236 | cudamemcpy native 1 16777216 2855.708000 237 | cudamemcpy native 1 33554432 5633.196000 238 | cudamemcpy native 1 67108864 11194.047000 239 | cudamemcpy native 1 134217728 22306.524000 240 | cudamemcpy native 1 268435456 44531.106000 241 | cudamemcpy native 1 536870912 88942.431000 242 | cudamemcpy native 2 1024 4.927000 243 | cudamemcpy native 2 2048 5.030000 244 | cudamemcpy native 2 4096 5.484000 245 | cudamemcpy native 2 8192 6.796000 246 | cudamemcpy native 2 16384 9.124000 247 | cudamemcpy native 2 32768 13.966000 248 | cudamemcpy native 2 65536 23.586000 249 | cudamemcpy native 2 131072 36.658000 250 | cudamemcpy native 2 262144 66.871000 251 | cudamemcpy native 2 524288 127.386000 252 | cudamemcpy native 2 1048576 276.266000 253 | cudamemcpy native 2 2097152 449.664000 254 | cudamemcpy native 2 4194304 800.574000 255 | cudamemcpy native 2 8388608 1490.734000 256 | cudamemcpy native 2 16777216 2890.811000 257 | cudamemcpy native 2 33554432 5670.958000 258 | cudamemcpy native 2 67108864 11224.960000 259 | cudamemcpy native 2 134217728 22320.847000 260 | cudamemcpy native 2 268435456 44527.648000 261 | cudamemcpy native 2 536870912 88956.920000 262 | cudamemcpy native 3 1024 4.914000 263 | cudamemcpy native 3 2048 5.064000 264 | cudamemcpy native 3 4096 5.478000 265 | cudamemcpy native 3 8192 6.788000 266 | cudamemcpy native 3 16384 9.145000 267 | cudamemcpy native 3 32768 13.995000 268 | cudamemcpy native 3 65536 23.598000 269 | cudamemcpy native 3 131072 36.605000 270 | cudamemcpy native 3 262144 66.904000 271 | cudamemcpy native 3 524288 127.529000 272 | cudamemcpy native 3 1048576 249.258000 273 | cudamemcpy native 3 2097152 423.666000 274 | cudamemcpy native 3 4194304 772.672000 275 | cudamemcpy native 3 8388608 1464.225000 276 | cudamemcpy native 3 16777216 2858.230000 277 | cudamemcpy native 3 33554432 5633.703000 278 | cudamemcpy native 3 67108864 11185.736000 279 | cudamemcpy native 3 134217728 22286.092000 280 | cudamemcpy native 3 268435456 44494.399000 281 | cudamemcpy native 3 536870912 88906.943000 282 | cudamemcpy native 4 1024 4.929000 283 | cudamemcpy native 4 2048 5.058000 284 | cudamemcpy native 4 4096 5.505000 285 | cudamemcpy native 4 8192 6.804000 286 | cudamemcpy native 4 16384 9.168000 287 | cudamemcpy native 4 32768 13.978000 288 | cudamemcpy native 4 65536 23.640000 289 | cudamemcpy native 4 131072 36.665000 290 | cudamemcpy native 4 262144 66.956000 291 | cudamemcpy native 4 524288 127.496000 292 | cudamemcpy native 4 1048576 249.023000 293 | cudamemcpy native 4 2097152 423.554000 294 | cudamemcpy native 4 4194304 772.551000 295 | cudamemcpy native 4 8388608 1463.784000 296 | cudamemcpy native 4 16777216 2858.779000 297 | cudamemcpy native 4 33554432 5634.908000 298 | cudamemcpy native 4 67108864 11197.449000 299 | cudamemcpy native 4 134217728 22302.144000 300 | cudamemcpy native 4 268435456 44528.614000 301 | cudamemcpy native 4 536870912 88931.180000 302 | cudamemcpy native 5 1024 4.916000 303 | cudamemcpy native 5 2048 5.056000 304 | cudamemcpy native 5 4096 5.495000 305 | cudamemcpy native 5 8192 6.817000 306 | cudamemcpy native 5 16384 9.193000 307 | cudamemcpy native 5 32768 14.084000 308 | cudamemcpy native 5 65536 23.629000 309 | cudamemcpy native 5 131072 36.595000 310 | cudamemcpy native 5 262144 66.844000 311 | cudamemcpy native 5 524288 127.363000 312 | cudamemcpy native 5 1048576 248.870000 313 | cudamemcpy native 5 2097152 423.485000 314 | cudamemcpy native 5 4194304 772.666000 315 | cudamemcpy native 5 8388608 1464.365000 316 | cudamemcpy native 5 16777216 2859.676000 317 | cudamemcpy native 5 33554432 5636.493000 318 | cudamemcpy native 5 67108864 11201.132000 319 | cudamemcpy native 5 134217728 22306.450000 320 | cudamemcpy native 5 268435456 44522.188000 321 | cudamemcpy native 5 536870912 88964.001000 322 | cudamemcpy native 6 1024 4.880000 323 | cudamemcpy native 6 2048 5.054000 324 | cudamemcpy native 6 4096 5.494000 325 | cudamemcpy native 6 8192 6.780000 326 | cudamemcpy native 6 16384 9.165000 327 | cudamemcpy native 6 32768 13.966000 328 | cudamemcpy native 6 65536 23.590000 329 | cudamemcpy native 6 131072 36.656000 330 | cudamemcpy native 6 262144 67.015000 331 | cudamemcpy native 6 524288 127.506000 332 | cudamemcpy native 6 1048576 249.662000 333 | cudamemcpy native 6 2097152 424.632000 334 | cudamemcpy native 6 4194304 773.746000 335 | cudamemcpy native 6 8388608 1468.640000 336 | cudamemcpy native 6 16777216 2864.502000 337 | cudamemcpy native 6 33554432 5649.572000 338 | cudamemcpy native 6 67108864 11222.734000 339 | cudamemcpy native 6 134217728 22363.116000 340 | cudamemcpy native 6 268435456 44651.063000 341 | cudamemcpy native 6 536870912 89210.271000 342 | cudamemcpy native 7 1024 4.906000 343 | cudamemcpy native 7 2048 5.061000 344 | cudamemcpy native 7 4096 5.507000 345 | cudamemcpy native 7 8192 6.801000 346 | cudamemcpy native 7 16384 9.163000 347 | cudamemcpy native 7 32768 14.023000 348 | cudamemcpy native 7 65536 23.634000 349 | cudamemcpy native 7 131072 36.718000 350 | cudamemcpy native 7 262144 67.052000 351 | cudamemcpy native 7 524288 128.877000 352 | cudamemcpy native 7 1048576 249.558000 353 | cudamemcpy native 7 2097152 424.701000 354 | cudamemcpy native 7 4194304 773.827000 355 | cudamemcpy native 7 8388608 1468.707000 356 | cudamemcpy native 7 16777216 2867.657000 357 | cudamemcpy native 7 33554432 5652.540000 358 | cudamemcpy native 7 67108864 11232.904000 359 | cudamemcpy native 7 134217728 22372.873000 360 | cudamemcpy native 7 268435456 44661.230000 361 | cudamemcpy native 7 536870912 89251.043000 362 | cudamemcpy native 8 1024 4.858000 363 | cudamemcpy native 8 2048 5.027000 364 | cudamemcpy native 8 4096 5.465000 365 | cudamemcpy native 8 8192 6.807000 366 | cudamemcpy native 8 16384 9.141000 367 | cudamemcpy native 8 32768 13.981000 368 | cudamemcpy native 8 65536 23.640000 369 | cudamemcpy native 8 131072 36.645000 370 | cudamemcpy native 8 262144 67.007000 371 | cudamemcpy native 8 524288 127.678000 372 | cudamemcpy native 8 1048576 249.723000 373 | cudamemcpy native 8 2097152 425.068000 374 | cudamemcpy native 8 4194304 775.541000 375 | cudamemcpy native 8 8388608 1469.898000 376 | cudamemcpy native 8 16777216 2869.008000 377 | cudamemcpy native 8 33554432 5658.383000 378 | cudamemcpy native 8 67108864 11239.155000 379 | cudamemcpy native 8 134217728 22381.782000 380 | cudamemcpy native 8 268435456 44677.469000 381 | cudamemcpy native 8 536870912 89271.732000 382 | cudamemcpy native 9 1024 4.917000 383 | cudamemcpy native 9 2048 5.055000 384 | cudamemcpy native 9 4096 5.491000 385 | cudamemcpy native 9 8192 6.795000 386 | cudamemcpy native 9 16384 9.172000 387 | cudamemcpy native 9 32768 13.993000 388 | cudamemcpy native 9 65536 23.643000 389 | cudamemcpy native 9 131072 36.683000 390 | cudamemcpy native 9 262144 66.824000 391 | cudamemcpy native 9 524288 127.519000 392 | cudamemcpy native 9 1048576 276.583000 393 | cudamemcpy native 9 2097152 450.542000 394 | cudamemcpy native 9 4194304 799.578000 395 | cudamemcpy native 9 8388608 1491.421000 396 | cudamemcpy native 9 16777216 2892.257000 397 | cudamemcpy native 9 33554432 5671.093000 398 | cudamemcpy native 9 67108864 11226.631000 399 | cudamemcpy native 9 134217728 22329.188000 400 | cudamemcpy native 9 268435456 44524.088000 401 | cudamemcpy native 9 536870912 88925.813000 402 | -------------------------------------------------------------------------------- /results/nullker-mhelper.out: -------------------------------------------------------------------------------- 1 | prog lib count num_calls time 2 | nullker mrcuda 0 1024 13.909000 3 | nullker mrcuda 0 2048 27.694000 4 | nullker mrcuda 0 4096 55.480000 5 | nullker mrcuda 0 8192 110.551000 6 | nullker mrcuda 0 16384 220.698000 7 | nullker mrcuda 0 32768 376.599000 8 | nullker mrcuda 0 65536 745.303000 9 | nullker mrcuda 0 131072 1487.305000 10 | nullker mrcuda 0 262144 2981.731000 11 | nullker mrcuda 0 524288 5951.194000 12 | nullker mrcuda 0 1048576 11905.031000 13 | nullker mrcuda 0 2097152 23794.678000 14 | nullker mrcuda 0 4194304 47580.513000 15 | nullker mrcuda 0 8388608 95185.737000 16 | nullker mrcuda 0 16777216 190206.546000 17 | nullker mrcuda 1 1024 13.948000 18 | nullker mrcuda 1 2048 27.847000 19 | nullker mrcuda 1 4096 55.602000 20 | nullker mrcuda 1 8192 110.650000 21 | nullker mrcuda 1 16384 221.341000 22 | nullker mrcuda 1 32768 383.100000 23 | nullker mrcuda 1 65536 746.129000 24 | nullker mrcuda 1 131072 1496.601000 25 | nullker mrcuda 1 262144 2995.116000 26 | nullker mrcuda 1 524288 5985.191000 27 | nullker mrcuda 1 1048576 11979.065000 28 | nullker mrcuda 1 2097152 23947.264000 29 | nullker mrcuda 1 4194304 47903.340000 30 | nullker mrcuda 1 8388608 95821.928000 31 | nullker mrcuda 1 16777216 191631.296000 32 | nullker mrcuda 2 1024 11.762000 33 | nullker mrcuda 2 2048 23.572000 34 | nullker mrcuda 2 4096 46.782000 35 | nullker mrcuda 2 8192 92.995000 36 | nullker mrcuda 2 16384 186.496000 37 | nullker mrcuda 2 32768 373.267000 38 | nullker mrcuda 2 65536 745.067000 39 | nullker mrcuda 2 131072 1491.059000 40 | nullker mrcuda 2 262144 2975.860000 41 | nullker mrcuda 2 524288 5957.743000 42 | nullker mrcuda 2 1048576 11907.501000 43 | nullker mrcuda 2 2097152 23853.835000 44 | nullker mrcuda 2 4194304 47667.329000 45 | nullker mrcuda 2 8388608 95264.386000 46 | nullker mrcuda 2 16777216 190606.974000 47 | nullker mrcuda 3 1024 13.931000 48 | nullker mrcuda 3 2048 27.930000 49 | nullker mrcuda 3 4096 55.193000 50 | nullker mrcuda 3 8192 109.578000 51 | nullker mrcuda 3 16384 220.805000 52 | nullker mrcuda 3 32768 376.383000 53 | nullker mrcuda 3 65536 744.637000 54 | nullker mrcuda 3 131072 1473.593000 55 | nullker mrcuda 3 262144 2953.483000 56 | nullker mrcuda 3 524288 5922.429000 57 | nullker mrcuda 3 1048576 11851.660000 58 | nullker mrcuda 3 2097152 23698.863000 59 | nullker mrcuda 3 4194304 47307.894000 60 | nullker mrcuda 3 8388608 94766.222000 61 | nullker mrcuda 3 16777216 189753.937000 62 | nullker mrcuda 4 1024 12.148000 63 | nullker mrcuda 4 2048 24.513000 64 | nullker mrcuda 4 4096 48.559000 65 | nullker mrcuda 4 8192 96.675000 66 | nullker mrcuda 4 16384 194.782000 67 | nullker mrcuda 4 32768 387.978000 68 | nullker mrcuda 4 65536 779.653000 69 | nullker mrcuda 4 131072 1559.794000 70 | nullker mrcuda 4 262144 3126.936000 71 | nullker mrcuda 4 524288 6238.395000 72 | nullker mrcuda 4 1048576 12470.579000 73 | nullker mrcuda 4 2097152 24921.762000 74 | nullker mrcuda 4 4194304 49788.988000 75 | nullker mrcuda 4 8388608 99457.578000 76 | nullker mrcuda 4 16777216 199119.432000 77 | nullker mrcuda 5 1024 14.360000 78 | nullker mrcuda 5 2048 28.614000 79 | nullker mrcuda 5 4096 57.200000 80 | nullker mrcuda 5 8192 113.692000 81 | nullker mrcuda 5 16384 227.503000 82 | nullker mrcuda 5 32768 388.961000 83 | nullker mrcuda 5 65536 767.622000 84 | nullker mrcuda 5 131072 1537.665000 85 | nullker mrcuda 5 262144 3072.827000 86 | nullker mrcuda 5 524288 6149.966000 87 | nullker mrcuda 5 1048576 12304.688000 88 | nullker mrcuda 5 2097152 24605.251000 89 | nullker mrcuda 5 4194304 49200.814000 90 | nullker mrcuda 5 8388608 98456.404000 91 | nullker mrcuda 5 16777216 196701.645000 92 | nullker mrcuda 6 1024 14.162000 93 | nullker mrcuda 6 2048 28.289000 94 | nullker mrcuda 6 4096 56.480000 95 | nullker mrcuda 6 8192 112.021000 96 | nullker mrcuda 6 16384 223.856000 97 | nullker mrcuda 6 32768 391.024000 98 | nullker mrcuda 6 65536 755.072000 99 | nullker mrcuda 6 131072 1515.169000 100 | nullker mrcuda 6 262144 3039.992000 101 | nullker mrcuda 6 524288 6077.132000 102 | nullker mrcuda 6 1048576 12119.706000 103 | nullker mrcuda 6 2097152 24327.747000 104 | nullker mrcuda 6 4194304 48537.433000 105 | nullker mrcuda 6 8388608 97030.690000 106 | nullker mrcuda 6 16777216 194138.853000 107 | nullker mrcuda 7 1024 13.810000 108 | nullker mrcuda 7 2048 27.852000 109 | nullker mrcuda 7 4096 55.083000 110 | nullker mrcuda 7 8192 109.188000 111 | nullker mrcuda 7 16384 218.314000 112 | nullker mrcuda 7 32768 376.721000 113 | nullker mrcuda 7 65536 735.194000 114 | nullker mrcuda 7 131072 1481.617000 115 | nullker mrcuda 7 262144 2954.298000 116 | nullker mrcuda 7 524288 5911.131000 117 | nullker mrcuda 7 1048576 11806.652000 118 | nullker mrcuda 7 2097152 23656.850000 119 | nullker mrcuda 7 4194304 47241.286000 120 | nullker mrcuda 7 8388608 94611.828000 121 | nullker mrcuda 7 16777216 189050.351000 122 | nullker mrcuda 8 1024 14.095000 123 | nullker mrcuda 8 2048 28.081000 124 | nullker mrcuda 8 4096 55.970000 125 | nullker mrcuda 8 8192 110.973000 126 | nullker mrcuda 8 16384 222.415000 127 | nullker mrcuda 8 32768 390.757000 128 | nullker mrcuda 8 65536 751.369000 129 | nullker mrcuda 8 131072 1505.198000 130 | nullker mrcuda 8 262144 3009.886000 131 | nullker mrcuda 8 524288 6017.065000 132 | nullker mrcuda 8 1048576 12057.644000 133 | nullker mrcuda 8 2097152 24091.687000 134 | nullker mrcuda 8 4194304 48175.926000 135 | nullker mrcuda 8 8388608 96237.943000 136 | nullker mrcuda 8 16777216 192701.657000 137 | nullker mrcuda 9 1024 14.060000 138 | nullker mrcuda 9 2048 28.105000 139 | nullker mrcuda 9 4096 56.126000 140 | nullker mrcuda 9 8192 111.257000 141 | nullker mrcuda 9 16384 222.588000 142 | nullker mrcuda 9 32768 388.535000 143 | nullker mrcuda 9 65536 749.658000 144 | nullker mrcuda 9 131072 1509.148000 145 | nullker mrcuda 9 262144 3006.658000 146 | nullker mrcuda 9 524288 6021.239000 147 | nullker mrcuda 9 1048576 12045.826000 148 | nullker mrcuda 9 2097152 24086.604000 149 | nullker mrcuda 9 4194304 48086.814000 150 | nullker mrcuda 9 8388608 96193.359000 151 | nullker mrcuda 9 16777216 192125.235000 152 | nullker native 0 1024 1.693000 153 | nullker native 0 2048 3.328000 154 | nullker native 0 4096 6.596000 155 | nullker native 0 8192 12.316000 156 | nullker native 0 16384 24.535000 157 | nullker native 0 32768 49.045000 158 | nullker native 0 65536 98.111000 159 | nullker native 0 131072 196.670000 160 | nullker native 0 262144 392.933000 161 | nullker native 0 524288 787.145000 162 | nullker native 0 1048576 1578.461000 163 | nullker native 0 2097152 3150.883000 164 | nullker native 0 4194304 6305.144000 165 | nullker native 0 8388608 12608.160000 166 | nullker native 0 16777216 25167.428000 167 | nullker native 1 1024 1.930000 168 | nullker native 1 2048 3.870000 169 | nullker native 1 4096 7.645000 170 | nullker native 1 8192 14.576000 171 | nullker native 1 16384 29.061000 172 | nullker native 1 32768 59.444000 173 | nullker native 1 65536 116.105000 174 | nullker native 1 131072 232.417000 175 | nullker native 1 262144 395.501000 176 | nullker native 1 524288 782.965000 177 | nullker native 1 1048576 1572.530000 178 | nullker native 1 2097152 3140.093000 179 | nullker native 1 4194304 6273.480000 180 | nullker native 1 8388608 12540.560000 181 | nullker native 1 16777216 25076.293000 182 | nullker native 2 1024 1.931000 183 | nullker native 2 2048 3.850000 184 | nullker native 2 4096 7.639000 185 | nullker native 2 8192 14.556000 186 | nullker native 2 16384 29.103000 187 | nullker native 2 32768 58.037000 188 | nullker native 2 65536 116.313000 189 | nullker native 2 131072 229.299000 190 | nullker native 2 262144 392.810000 191 | nullker native 2 524288 784.319000 192 | nullker native 2 1048576 1571.662000 193 | nullker native 2 2097152 3142.692000 194 | nullker native 2 4194304 6281.323000 195 | nullker native 2 8388608 12563.783000 196 | nullker native 2 16777216 25143.079000 197 | nullker native 3 1024 1.925000 198 | nullker native 3 2048 3.851000 199 | nullker native 3 4096 7.658000 200 | nullker native 3 8192 14.506000 201 | nullker native 3 16384 29.193000 202 | nullker native 3 32768 58.076000 203 | nullker native 3 65536 116.383000 204 | nullker native 3 131072 230.686000 205 | nullker native 3 262144 391.538000 206 | nullker native 3 524288 781.478000 207 | nullker native 3 1048576 1567.546000 208 | nullker native 3 2097152 3139.788000 209 | nullker native 3 4194304 6269.944000 210 | nullker native 3 8388608 12546.773000 211 | nullker native 3 16777216 25069.748000 212 | nullker native 4 1024 1.924000 213 | nullker native 4 2048 3.862000 214 | nullker native 4 4096 7.633000 215 | nullker native 4 8192 14.547000 216 | nullker native 4 16384 29.111000 217 | nullker native 4 32768 58.003000 218 | nullker native 4 65536 116.376000 219 | nullker native 4 131072 232.829000 220 | nullker native 4 262144 398.024000 221 | nullker native 4 524288 784.508000 222 | nullker native 4 1048576 1573.480000 223 | nullker native 4 2097152 3139.734000 224 | nullker native 4 4194304 6275.518000 225 | nullker native 4 8388608 12546.614000 226 | nullker native 4 16777216 25070.691000 227 | nullker native 5 1024 1.670000 228 | nullker native 5 2048 3.293000 229 | nullker native 5 4096 6.543000 230 | nullker native 5 8192 12.294000 231 | nullker native 5 16384 24.577000 232 | nullker native 5 32768 48.953000 233 | nullker native 5 65536 97.914000 234 | nullker native 5 131072 195.743000 235 | nullker native 5 262144 392.773000 236 | nullker native 5 524288 783.770000 237 | nullker native 5 1048576 1574.795000 238 | nullker native 5 2097152 3143.471000 239 | nullker native 5 4194304 6282.858000 240 | nullker native 5 8388608 12580.392000 241 | nullker native 5 16777216 25153.583000 242 | nullker native 6 1024 1.916000 243 | nullker native 6 2048 3.872000 244 | nullker native 6 4096 7.634000 245 | nullker native 6 8192 14.544000 246 | nullker native 6 16384 29.210000 247 | nullker native 6 32768 58.203000 248 | nullker native 6 65536 116.366000 249 | nullker native 6 131072 232.485000 250 | nullker native 6 262144 393.469000 251 | nullker native 6 524288 783.498000 252 | nullker native 6 1048576 1571.326000 253 | nullker native 6 2097152 3140.647000 254 | nullker native 6 4194304 6264.444000 255 | nullker native 6 8388608 12527.807000 256 | nullker native 6 16777216 25067.070000 257 | nullker native 7 1024 1.926000 258 | nullker native 7 2048 3.839000 259 | nullker native 7 4096 7.660000 260 | nullker native 7 8192 14.561000 261 | nullker native 7 16384 29.069000 262 | nullker native 7 32768 58.140000 263 | nullker native 7 65536 116.144000 264 | nullker native 7 131072 228.217000 265 | nullker native 7 262144 392.149000 266 | nullker native 7 524288 783.238000 267 | nullker native 7 1048576 1572.047000 268 | nullker native 7 2097152 3140.495000 269 | nullker native 7 4194304 6267.781000 270 | nullker native 7 8388608 12546.230000 271 | nullker native 7 16777216 25100.963000 272 | nullker native 8 1024 1.688000 273 | nullker native 8 2048 3.300000 274 | nullker native 8 4096 6.539000 275 | nullker native 8 8192 12.336000 276 | nullker native 8 16384 24.511000 277 | nullker native 8 32768 48.958000 278 | nullker native 8 65536 97.993000 279 | nullker native 8 131072 195.946000 280 | nullker native 8 262144 392.690000 281 | nullker native 8 524288 784.694000 282 | nullker native 8 1048576 1573.828000 283 | nullker native 8 2097152 3146.828000 284 | nullker native 8 4194304 6291.588000 285 | nullker native 8 8388608 12585.252000 286 | nullker native 8 16777216 25207.118000 287 | nullker native 9 1024 1.916000 288 | nullker native 9 2048 3.869000 289 | nullker native 9 4096 7.611000 290 | nullker native 9 8192 14.613000 291 | nullker native 9 16384 29.131000 292 | nullker native 9 32768 58.367000 293 | nullker native 9 65536 116.631000 294 | nullker native 9 131072 233.111000 295 | nullker native 9 262144 395.504000 296 | nullker native 9 524288 785.364000 297 | nullker native 9 1048576 1573.954000 298 | nullker native 9 2097152 3150.368000 299 | nullker native 9 4194304 6289.309000 300 | nullker native 9 8388608 12580.401000 301 | nullker native 9 16777216 25135.924000 302 | -------------------------------------------------------------------------------- /scripts/Makefile.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS = mrcudaexec 2 | mrcudaexec_SOURCES = mrcudaexec.py.template 3 | 4 | mrcudaexec$(EXEEXT): mrcudaexec.py.template 5 | cp $< mrcudaexec$(EXEEXT) 6 | ${SED} -i -- 's/{{ RCUDA_LIBCUDART }}/$(shell echo "${RCUDA_LIBCUDART}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT) 7 | ${SED} -i -- 's/{{ NVIDIA_LIBCUDART }}/$(shell echo "${NVIDIA_LIBCUDART}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT) 8 | ${SED} -i -- 's/{{ MRCUDA_LIBPATH }}/$(shell echo "${libdir}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT) 9 | 10 | -------------------------------------------------------------------------------- /scripts/lib64/libcudart.so: -------------------------------------------------------------------------------- 1 | ../../build/src/.libs/libcudart.so -------------------------------------------------------------------------------- /scripts/lib64/libcudart.so.6.5: -------------------------------------------------------------------------------- 1 | libcudart.so -------------------------------------------------------------------------------- /scripts/lib64/rCUDAcommIB.so: -------------------------------------------------------------------------------- 1 | /home/pak/Projects/rCUDAv15.07-CUDA7.0/lib/rCUDAcommIB.so -------------------------------------------------------------------------------- /scripts/lib64/rCUDAcommTCP.so: -------------------------------------------------------------------------------- 1 | /home/pak/Projects/rCUDAv15.07-CUDA7.0/lib/rCUDAcommTCP.so -------------------------------------------------------------------------------- /scripts/mrcudaexec.py.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | from optparse import OptionParser 7 | 8 | framework_directory = os.path.dirname(os.path.abspath(__file__)) 9 | framework_lib_directory = '{{ MRCUDA_LIBPATH }}' 10 | 11 | def parse_args(): 12 | parser = OptionParser(usage = '%prog -s SERVER [options] -- PROGRAM') 13 | parser.add_option('-t', '--network-type', dest = 'network_type', 14 | choices = ['IB', 'TCP',], 15 | default = 'TCP', 16 | help = 'type of network for rCUDA (IB, TCP) [default = TCP].' 17 | ) 18 | parser.add_option('-n', '--number-of-devices', type = 'int', 19 | default = 1, 20 | dest = 'number_of_devices', 21 | help = 'number of GPU devices to be used [default = 1].' 22 | ) 23 | parser.add_option('-s', '--server', 24 | dest = 'server_address', 25 | help = 'rCUDA server address.', 26 | ) 27 | parser.add_option('-p', '--port', type = 'int', 28 | default = 8308, 29 | dest = 'port', 30 | help = 'Port number [default = 8308].', 31 | ) 32 | parser.add_option('-f', '--server-file', 33 | dest = 'server_file', 34 | help = 'rCUDA server file.' 35 | ) 36 | parser.add_option('--rcuda-libcudart', 37 | dest = 'rcuda_libcudart', 38 | default = '{{ RCUDA_LIBCUDART }}', 39 | help = 'rCUDA\'s libcudart.so path [default = \'{{ RCUDA_LIBCUDART }}\']' 40 | ) 41 | parser.add_option('--nvidia-libcudart', 42 | dest = 'nvidia_libcudart', 43 | default = '{{ NVIDIA_LIBCUDART }}', 44 | help = 'NVIDIA\'s libcudart.so path [default = \'{{ NVIDIA_LIBCUDART }}\']' 45 | ) 46 | parser.add_option('--switch-threshold', type = 'int', 47 | dest = 'switch_threshold', 48 | default = 0, 49 | help = 'Switching threshold value (positive integer) [default = 0]' 50 | ) 51 | parser.add_option('--sock-path', 52 | dest = 'sock_path', 53 | default = '/tmp/mrcuda.sock', 54 | help = 'Switching socket path [default = /tmp/mrcuda.sock]' 55 | ) 56 | parser.add_option('--mhelper-path', 57 | dest = 'mhelper_path', 58 | default = os.path.join(framework_directory, 'mhelper'), 59 | help = 'mhelper\'s path [default = %s]' % (os.path.join(framework_directory, 'mhelper'),) 60 | ) 61 | 62 | options, args = parser.parse_args() 63 | if not options.server_address and not options.server_file: 64 | parser.error('either -s or -f option is required.') 65 | elif options.server_address and options.server_file: 66 | parser.error('-s and -f options cannot be used at the same time.') 67 | if options.switch_threshold != 'RCUDA' and options.switch_threshold != 'NVIDIA': 68 | try: 69 | int(options.switch_threshold) 70 | except ValueError: 71 | parser.error('Only a positive integer, "RCUDA", or "NVIDIA" are allowed as a value of --switch-threshold.') 72 | if len(args) == 0: 73 | parser.error('Please specify PROGRAM to execute.') 74 | return options, args 75 | 76 | def main(options, args): 77 | ld_lib_path = framework_lib_directory 78 | 79 | program_args = args 80 | 81 | environment = os.environ.copy() 82 | if 'LD_LIBRARY_PATH' in environment: 83 | ld_lib_path = ld_lib_path + ':' + environment.get('LD_LIBRARY_PATH') 84 | 85 | environment['LD_LIBRARY_PATH'] = ld_lib_path 86 | environment['RCUDAPROTO'] = options.network_type 87 | environment['RCUDA_DEVICE_COUNT'] = str(options.number_of_devices) 88 | environment['MRCUDA_NVIDIA_LIB_PATH'] = options.nvidia_libcudart 89 | environment['MRCUDA_RCUDA_LIB_PATH'] = options.rcuda_libcudart 90 | environment['MRCUDA_SOCK_PATH'] = options.sock_path 91 | environment['MHELPER_PATH'] = options.mhelper_path 92 | 93 | i = 0 94 | if options.server_address: 95 | while i < options.number_of_devices: 96 | environment['RCUDA_DEVICE_' + str(i)] = options.server_address + '@' + str(options.port) + ':' + str(i) 97 | environment['MRCUDA_SWITCH_THRESHOLD_' + str(i)] = str(options.switch_threshold) 98 | i += 1 99 | else: 100 | f = open(options.server_file, 'r') 101 | server_lists = f.readlines() 102 | f.close() 103 | 104 | server_lists = [s.strip() for s in server_lists if len(s.strip()) > 0] 105 | if len(server_lists) == 0: 106 | raise Exception('Server file does not contain any server information.') 107 | 108 | j = 0 109 | while i < options.number_of_devices: 110 | server_info = server_lists[j].split('|') 111 | if len(server_info) != 2: 112 | raise Exception('Server file is not well-formed.') 113 | environment['RCUDA_DEVICE_' + str(i)] = server_info[0] 114 | environment['MRCUDA_SWITCH_THRESHOLD_' + str(i)] = server_info[1] 115 | j += 1 116 | if j >= len(server_lists): 117 | j = 0 118 | i += 1 119 | 120 | p = subprocess.Popen( 121 | program_args, 122 | env = environment 123 | ) 124 | p.wait() 125 | 126 | if __name__ == '__main__': 127 | options, args = parse_args() 128 | main(options, args) 129 | 130 | -------------------------------------------------------------------------------- /scripts/plotters/overhead.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.lines as mlines 3 | import matplotlib.markers as mmarkers 4 | import matplotlib.font_manager 5 | from matplotlib import rcParams 6 | 7 | rcParams['mathtext.fontset'] = 'custom' 8 | 9 | import numpy as np 10 | 11 | import csv 12 | import argparse 13 | import math 14 | 15 | COLOR = ['b', 'g', 'r', 'c', 'm', 'y',] 16 | 17 | def parseargs(): 18 | """ 19 | Manage the program arguments. 20 | """ 21 | parser = argparse.ArgumentParser( 22 | description = 'mrCUDA overhead benchmark result plotter' 23 | ) 24 | parser.add_argument('type', 25 | choices = ('memsync', 'memsync-bw', 'mhelper-nullker', 'mhelper-memcpybw', 'record-replay',), 26 | help = 'Overhead type' 27 | ) 28 | parser.add_argument('resultfile', type = argparse.FileType('r'), 29 | help = 'Result file (csv)' 30 | ) 31 | return parser.parse_args() 32 | 33 | def read_memsync_input(input_file): 34 | # All time is in ms. 35 | # All sizes are in B. 36 | reader = csv.DictReader(input_file, delimiter = ' ') 37 | result = list() 38 | for row in reader: 39 | row['total_size'] = int(row['total_size']) 40 | row['num_regions'] = int(row['num_regions']) 41 | # Filter out some results to reduce size 42 | if math.log(row['num_regions'], 2) % 2 == 1: 43 | continue 44 | row['memsync_time'] = float(row['memsync_time']) 45 | row['rcuda_time'] = float(row['rcuda_time']) 46 | row['local_time'] = float(row['local_time']) 47 | row['nvidia_time'] = float(row['nvidia_time']) 48 | row['other_time'] = float(row['other_time']) 49 | row['size_per_region'] = float(row['total_size']) / float(row['num_regions']) 50 | row['bw'] = row['total_size'] / row['nvidia_time'] * (10 ** -3) # MB / s 51 | result.append(row) 52 | return result 53 | 54 | def plot_memsync(input_data): 55 | properties = { 56 | 'bw_coef': 0.04721 * (10 ** 6), # 1 / s 57 | 'bw_max': 4778.505 * (10 ** 6), # B / s 58 | 'memsync_coef': 5.686 * (10 ** -11), # s / B 59 | 'memsync_const': 0, # s 60 | } 61 | 62 | group_dict = dict() 63 | predicted_dict = dict() 64 | for data in input_data: 65 | if data['num_regions'] not in group_dict: 66 | group_dict[data['num_regions']] = [list(), list(),] 67 | group_data = group_dict[data['num_regions']] 68 | group_data[0].append(data['size_per_region']) 69 | group_data[1].append(data['local_time'] / 1000) 70 | 71 | if data['num_regions'] not in predicted_dict: 72 | predicted_dict[data['num_regions']] = dict() 73 | if data['size_per_region'] not in predicted_dict[data['num_regions']]: 74 | predicted_dict[data['num_regions']][data['size_per_region']] = data['num_regions'] * (properties['memsync_coef'] * data['size_per_region'] + properties['memsync_const'] + data['size_per_region'] / min(properties['bw_max'], properties['bw_coef'] * data['size_per_region'])) 75 | 76 | legend_list = list() 77 | i = 0 78 | for num_regions, group_data in sorted(group_dict.items(), key = lambda item: item[0]): 79 | plt.scatter(group_data[0], group_data[1], 80 | c = COLOR[i % len(COLOR)], 81 | marker = 'o' if i < len(COLOR) else '+', 82 | s = 40 83 | ) 84 | x, y = zip(*sorted(predicted_dict[num_regions].items(), key = lambda item: item[0])) 85 | p = plt.plot(x, y, COLOR[i % len(COLOR)], linewidth = 4) 86 | legend_list.append((p[0], '$\mathbf{2^{%d}}$ regions' % (math.log(num_regions, 2),),)) 87 | i += 1 88 | 89 | p = mlines.Line2D([], [], color = 'black', linewidth = 4) 90 | legend_list.append((p, 'Predicted',)) 91 | p = mlines.Line2D([], [], color = 'black', marker = 'o', markersize = 16, linestyle = 'None') 92 | legend_list.append((p, 'Measured',)) 93 | 94 | legend_list.reverse() 95 | 96 | plt.legend(zip(*legend_list)[0], zip(*legend_list)[1], 97 | loc = 'upper left', 98 | prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold') 99 | ) 100 | plt.xscale('log', basex = 2) 101 | plt.yscale('log', basey = 10) 102 | plt.xlim(xmin = 0) 103 | plt.ylim(ymin = 0) 104 | 105 | plt.xlabel('$\mathbf{data\_size_i}$ (B)', size = 40, weight = 'bold') 106 | plt.ylabel('Time (s)', size = 40, weight = 'bold') 107 | 108 | plt.xticks(size = 35, weight = 'bold') 109 | plt.yticks(size = 35, weight = 'bold') 110 | 111 | plt.show() 112 | 113 | def plot_memsync_bw(input_data): 114 | properties = { 115 | 'bw_coef': 0.04721 * (10 ** 6), # 1 / s 116 | 'bw_max': 4778.505 * (10 ** 6), # B / s 117 | 'memsync_coef': 5.686 * (10 ** -11), # s / B 118 | 'memsync_const': 0, # s 119 | } 120 | 121 | measured_data = [(row['size_per_region'], row['bw'],) for row in input_data] 122 | predicted_data = [(size_per_region, min(properties['bw_max'], properties['bw_coef'] * size_per_region) * (10 ** -6),) for size_per_region in sorted(set(zip(*measured_data)[0]))] 123 | 124 | legend_list = list() 125 | p = plt.scatter( 126 | zip(*measured_data)[0], 127 | zip(*measured_data)[1], 128 | c = COLOR[0], 129 | marker = 'o', 130 | s = 40 131 | ) 132 | legend_list.append((p, 'Measured',)) 133 | x, y = zip(*predicted_data) 134 | plt.plot(x, y, COLOR[0], linewidth = 4) 135 | p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4) 136 | legend_list.append((p, 'Predicted',)) 137 | 138 | plt.legend(zip(*legend_list)[0], zip(*legend_list)[1], 139 | loc = 'upper left', 140 | prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold') 141 | ) 142 | plt.xscale('log', basex = 2) 143 | plt.yscale('log', basey = 10) 144 | plt.xlim(xmin = 0) 145 | plt.ylim(ymin = 0) 146 | 147 | plt.xlabel('Size per region (B)', size = 30, weight = 'bold') 148 | plt.ylabel('Bandwidth (MB / s)', size = 30, weight = 'bold') 149 | 150 | plt.xticks(size = 25, weight = 'bold') 151 | plt.yticks(size = 25, weight = 'bold') 152 | 153 | plt.show() 154 | 155 | def read_mhelper_input(input_file): 156 | # All time is in ms. 157 | # All sizes are in B. 158 | reader = csv.DictReader(input_file, delimiter = ' ') 159 | result = list() 160 | for row in reader: 161 | row['count'] = int(row['count']) 162 | row['time'] = float(row['time']) 163 | if 'num_calls' in row: 164 | row['num_calls'] = int(row['num_calls']) 165 | else: 166 | row['size_per_call'] = int(row['size_per_call']) 167 | result.append(row) 168 | return result 169 | 170 | def plot_mhelper_nullker(input_data): 171 | properties = { 172 | 'coefd': 6.87138 * (10 ** -10), #s 173 | 'coefc': 9.98263 * (10 ** -6), # s 174 | 'const': 0.00293373, # s 175 | } 176 | 177 | native_data = dict() 178 | mrcuda_data = dict() 179 | for data in input_data: 180 | if data['lib'] == 'native': 181 | data_dict = native_data 182 | else: 183 | data_dict = mrcuda_data 184 | if data['num_calls'] not in data_dict: 185 | data_dict[data['num_calls']] = list() 186 | data_dict[data['num_calls']].append(data['time']) 187 | 188 | x_values = list() 189 | y_values = list() 190 | 191 | for num_calls in native_data.iterkeys(): 192 | avg_time = np.average(native_data[num_calls]) 193 | for time in mrcuda_data[num_calls]: 194 | x_values.append(num_calls) 195 | y_values.append((time - avg_time) * (10 ** -3)) # seconds 196 | 197 | legend_list = list() 198 | 199 | p = plt.scatter( 200 | x_values, 201 | y_values, 202 | c = COLOR[0], 203 | marker = 'o', 204 | s = 40 205 | ) 206 | legend_list.append((p, 'Measured',)) 207 | 208 | x_values = sorted(set(x_values)) 209 | y_values = [properties['coefc'] * x + properties['const'] for x in x_values] 210 | 211 | plt.plot(x_values, y_values, COLOR[0], linewidth = 4) 212 | p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4) 213 | legend_list.append((p, 'Predicted',)) 214 | 215 | plt.legend(zip(*legend_list)[0], zip(*legend_list)[1], 216 | loc = 'upper left', 217 | prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold') 218 | ) 219 | plt.xscale('log', basex = 2) 220 | plt.yscale('log', basey = 10) 221 | plt.xlim(xmin = 0) 222 | plt.ylim(ymin = 0) 223 | 224 | plt.xlabel('Number of calls', size = 30, weight = 'bold') 225 | plt.ylabel('Time (s)', size = 30, weight = 'bold') 226 | 227 | plt.xticks(size = 25, weight = 'bold') 228 | plt.yticks(size = 25, weight = 'bold') 229 | 230 | plt.show() 231 | 232 | def plot_mhelper_memcpybw(input_data): 233 | properties = { 234 | 'coefd': 6.87138 * (10 ** -10), #s 235 | 'coefc': 9.98263 * (10 ** -6), # s 236 | 'const': 0.00293373, # s 237 | 'num_calls': 1000, 238 | } 239 | 240 | native_data = dict() 241 | mrcuda_data = dict() 242 | for data in input_data: 243 | if data['lib'] == 'native': 244 | data_dict = native_data 245 | else: 246 | data_dict = mrcuda_data 247 | if data['size_per_call'] not in data_dict: 248 | data_dict[data['size_per_call']] = list() 249 | data_dict[data['size_per_call']].append(data['time']) 250 | 251 | x_values = list() 252 | y_values = list() 253 | 254 | for size_per_call in native_data.iterkeys(): 255 | avg_time = np.average(native_data[size_per_call]) 256 | for time in mrcuda_data[size_per_call]: 257 | x_values.append(size_per_call) 258 | y_values.append((time - avg_time) * (10 ** -3)) # seconds 259 | 260 | legend_list = list() 261 | 262 | p = plt.scatter( 263 | x_values, 264 | y_values, 265 | c = COLOR[0], 266 | marker = 'o', 267 | s = 40 268 | ) 269 | legend_list.append((p, 'Measured',)) 270 | 271 | x_values = sorted(set(x_values)) 272 | y_values = [properties['coefd'] * x * properties['num_calls'] + properties['coefc'] * properties['num_calls'] + properties['const'] for x in x_values] 273 | 274 | plt.plot(x_values, y_values, COLOR[0], linewidth = 4) 275 | p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4) 276 | legend_list.append((p, 'Predicted',)) 277 | 278 | plt.legend(zip(*legend_list)[0], zip(*legend_list)[1], 279 | loc = 'upper left', 280 | prop = matplotlib.font_manager.FontProperties(size = 40, weight = 'bold') 281 | ) 282 | plt.xscale('log', basex = 2) 283 | plt.yscale('log', basey = 10) 284 | plt.xlim(xmin = 0) 285 | plt.ylim(ymin = 0) 286 | 287 | plt.xlabel('Size per calls (B)', size = 40, weight = 'bold') 288 | plt.ylabel('Time (s)', size = 40, weight = 'bold') 289 | 290 | plt.xticks(size = 35, weight = 'bold') 291 | plt.yticks(size = 35, weight = 'bold') 292 | 293 | plt.show() 294 | 295 | def read_record_replay_input(input_file): 296 | # All time is in s. 297 | reader = csv.DictReader(input_file, delimiter = ',') 298 | result = list() 299 | for row in reader: 300 | if row['mrcuda_switch num_replay']: 301 | row['mrcuda_record time'] = float(row['mrcuda_record time']) 302 | row['mrcuda_switch time'] = float(row['mrcuda_switch time']) 303 | row['mrcuda_sync_mem time'] = float(row['mrcuda_sync_mem time']) 304 | row['mrcuda_replay time'] = row['mrcuda_switch time'] - row['mrcuda_sync_mem time'] 305 | row['mrcuda_switch num_replay'] = int(row['mrcuda_switch num_replay']) 306 | result.append(row) 307 | return result 308 | 309 | def plot_record_replay(input_data): 310 | properties = { 311 | 'record_coef': 2.825 * (10 ** -7), # s 312 | 'record_const': 0.3437 * (10 ** -3), # s 313 | 'replay_coef': 1.031 * (10 ** -6), # s 314 | 'replay_const': 1.2437, # s 315 | } 316 | 317 | fig, ax1 = plt.subplots() 318 | ax2 = ax1.twinx() 319 | 320 | legend_list = list() 321 | 322 | x_values = [row['mrcuda_switch num_replay'] for row in input_data] 323 | 324 | p = ax1.scatter( 325 | x_values, 326 | [row['mrcuda_record time'] for row in input_data], 327 | c = COLOR[0], 328 | marker = 'o', 329 | s = 40 330 | ) 331 | legend_list.append((p, 'Record Overhead (Measured)',)) 332 | 333 | p = ax2.scatter( 334 | x_values, 335 | [row['mrcuda_replay time'] for row in input_data], 336 | c = COLOR[1], 337 | marker = 'o', 338 | s = 40 339 | ) 340 | legend_list.append((p, 'Replay Overhead (Measured)',)) 341 | 342 | x_values = sorted(set(x_values)) 343 | 344 | ax1.plot( 345 | x_values, 346 | [properties['record_coef'] * x + properties['record_const'] for x in x_values], 347 | COLOR[0], 348 | linewidth = 4 349 | ) 350 | p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4) 351 | legend_list.append((p, 'Record Overhead (Predicted)',)) 352 | 353 | ax2.plot( 354 | x_values, 355 | [properties['replay_coef'] * x + properties['replay_const'] for x in x_values], 356 | COLOR[1], 357 | linewidth = 4 358 | ) 359 | p = mlines.Line2D([], [], color = COLOR[1], linewidth = 4) 360 | legend_list.append((p, 'Replay Overhead (Predicted)',)) 361 | 362 | plt.legend(zip(*legend_list)[0], zip(*legend_list)[1], 363 | loc = 'lower right', 364 | prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold') 365 | ) 366 | #plt.xscale('log', basex = 2) 367 | #plt.yscale('log', basey = 10) 368 | ax1.set_xlim(xmin = 0) 369 | ax2.set_xlim(xmin = 0) 370 | ax1.set_ylim(ymin = 0) 371 | ax2.set_ylim(ymin = 0) 372 | 373 | ax1.set_xlabel('num_record (x10,000)', size = 30, weight = 'bold') 374 | ax1.set_ylabel('Record Time (ms)', size = 30, weight = 'bold') 375 | ax2.set_ylabel('Replay Time (s)', size = 30, weight = 'bold') 376 | 377 | ax1.set_xticklabels(['%d' % (int(label) / 10000,) for label in ax1.get_xticks().tolist()]) 378 | 379 | for label in ax1.get_xticklabels(): 380 | label.set_fontsize(25) 381 | label.set_fontweight('bold') 382 | 383 | ax1.set_yticklabels(['%d' % (float(label) * 1000,) for label in ax1.get_yticks().tolist()]) 384 | 385 | for label in ax1.get_yticklabels(): 386 | label.set_fontsize(25) 387 | label.set_fontweight('bold') 388 | for label in ax2.get_yticklabels(): 389 | label.set_fontsize(25) 390 | label.set_fontweight('bold') 391 | 392 | plt.show() 393 | 394 | def main(): 395 | """ 396 | Main function. 397 | """ 398 | args = parseargs() 399 | 400 | if args.type == 'memsync': 401 | input_data = read_memsync_input(args.resultfile) 402 | plot_memsync(input_data) 403 | elif args.type == 'memsync-bw': 404 | input_data = read_memsync_input(args.resultfile) 405 | plot_memsync_bw(input_data) 406 | elif args.type == 'mhelper-nullker': 407 | input_data = read_mhelper_input(args.resultfile) 408 | plot_mhelper_nullker(input_data) 409 | elif args.type == 'mhelper-memcpybw': 410 | input_data = read_mhelper_input(args.resultfile) 411 | plot_mhelper_memcpybw(input_data) 412 | elif args.type == 'record-replay': 413 | input_data = read_record_replay_input(args.resultfile) 414 | plot_record_replay(input_data) 415 | 416 | if __name__ == "__main__": 417 | main() 418 | 419 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS = mhelper 2 | mhelper_SOURCES = mhelper.c intercomm_mem.c 3 | mhelper_LDFLAGS = -lcuda -lcudart $(DEPS_LIBS) 4 | mhelper_CPPFLAGS = -I/usr/local/cuda/include $(DEPS_CFLAGS) 5 | 6 | lib_LTLIBRARIES = libcudart.la 7 | libcudart_la_SOURCES = comm.c interface.c mrcuda.c record.c intercomm.c intercomm_mem.c intercomm_interface.c 8 | libcudart_la_LDFLAGS = -avoid-version -shared -ldl $(DEPS_LIBS) 9 | libcudart_la_CPPFLAGS = -I/usr/local/cuda/include $(DEPS_CFLAGS) 10 | 11 | lib_LIBRARIES = libcomm.a 12 | libcomm_a_SOURCES = comm.c comm.h common.h 13 | libcomm_a_CPPFLAGS = -lpthread -pthread $(DEPS_CFLAGS) 14 | 15 | install-exec-hook: 16 | ${LN_S} ${RCUDA_RCUDACOMMIB} ${libdir} 17 | ${LN_S} ${RCUDA_RCUDACOMMTCP} ${libdir} 18 | ${LN_S} ${libdir}/libcudart.so ${libdir}/libcudart.so.7.0 19 | 20 | -------------------------------------------------------------------------------- /src/comm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "comm.h" 11 | 12 | #define LISTEN_BACKLOG 1 13 | 14 | typedef struct __MRCUDAComm 15 | { 16 | pthread_t listeningThread; 17 | 18 | char *path; 19 | void (*callback)(void); 20 | 21 | int fd; 22 | } __MRCUDAComm; 23 | 24 | static __MRCUDAComm __mrcudaCommObj; 25 | 26 | 27 | /** 28 | * Terminate the socket. 29 | */ 30 | static void __mrcuda_comm_fini() 31 | { 32 | DPRINTF("ENTER __mrcuda_comm_fini.\n"); 33 | close(__mrcudaCommObj.fd); 34 | unlink(__mrcudaCommObj.path); 35 | free(__mrcudaCommObj.path); 36 | DPRINTF("EXIT __mrcuda_comm_fini.\n"); 37 | } 38 | 39 | /** 40 | * This function creates a FIFO file specified by the path. 41 | * If it fails to do so for any reasons, it returns the error number; otherwise, return 0 42 | * @param path path of the FIFO file to be created. 43 | * @return 0 if success, otherwise the error number. 44 | */ 45 | static int __mrcuda_comm_init(char *path) 46 | { 47 | DPRINTF("ENTER __mrcuda_comm_init.\n"); 48 | 49 | DPRINTF("__mrcuda_comm_init allocates __mrcudaCommObj.path\n"); 50 | if((__mrcudaCommObj.path = (char *)malloc(strlen(path) + 1)) == NULL) 51 | goto __mrcuda_comm_init_err_1; 52 | 53 | DPRINTF("__mrcuda_comm_init strcpy path.\n"); 54 | strcpy(__mrcudaCommObj.path, path); 55 | 56 | DPRINTF("__mrcuda_comm_init mkfifo.\n"); 57 | if(mkfifo(__mrcudaCommObj.path, 0666) == -1) 58 | goto __mrcuda_comm_init_err_2; 59 | 60 | DPRINTF("EXIT SUCCESS __mrcuda_comm_init.\n"); 61 | return 0; 62 | 63 | __mrcuda_comm_init_err_2: 64 | free(__mrcudaCommObj.path); 65 | __mrcuda_comm_init_err_1: 66 | DPRINTF("EXIT FAILURE __mrcuda_comm_init.\n"); 67 | return -1; 68 | } 69 | 70 | 71 | /** 72 | * This is the main loop for repeatedly listening to a signal. 73 | * If it receives a correct signal, it terminates the socket and calls the callback. 74 | * This function should be called from a different thread since it blocks the execution. 75 | */ 76 | static void *__mrcuda_comm_listening_main_loop(void *arg) 77 | { 78 | DPRINTF("ENTER __mrcuda_comm_listening_main_loop.\n"); 79 | 80 | #define BUF_SIZE 1 81 | 82 | char buf[BUF_SIZE]; 83 | ssize_t readSize; 84 | 85 | DPRINTF("__mrcuda_comm_init open file.\n"); 86 | if((__mrcudaCommObj.fd = open(__mrcudaCommObj.path, O_RDONLY)) == -1) 87 | goto __mrcuda_comm_listening_main_loop_err_1; 88 | 89 | while(1) 90 | { 91 | DPRINTF("__mrcuda_comm_listening_main_loop is waiting.\n"); 92 | if((readSize = read(__mrcudaCommObj.fd, buf, BUF_SIZE)) == -1) 93 | goto __mrcuda_comm_listening_main_loop_err_1; 94 | DPRINTF("__mrcuda_comm_listening_main_loop received a signal.\n"); 95 | if(strncmp(buf, "1", BUF_SIZE) == 0) 96 | { 97 | DPRINTF("__mrcuda_comm_listening_main_loop calls the callback.\n"); 98 | __mrcudaCommObj.callback(); 99 | break; 100 | } 101 | } 102 | 103 | __mrcuda_comm_listening_main_loop_err_1: 104 | __mrcuda_comm_fini(); 105 | 106 | DPRINTF("EXIT __mrcuda_comm_listening_main_loop.\n"); 107 | 108 | #undef BUF_SIZE 109 | } 110 | 111 | /** 112 | * This function starts listening to a signal that tells the system to switch to native CUDA. 113 | * After it receives the signal, this function calls the callback and terminates the socket. 114 | * This function executes the listening process in a different thread; thus, it returns almost immediately. 115 | * Note: if the signal is not well form, this function will simply skips that signal and not calls the callback. 116 | * @param path path for creating a new UNIX socket for listening to the signal. 117 | * @param callback the function that will be called after received a signal. 118 | * @return 0 if success, the error number otherwise. 119 | */ 120 | int mrcuda_comm_listen_for_signal(char *path, void (*callback)(void)) 121 | { 122 | DPRINTF("ENTER mrcuda_comm_listen_for_signal.\n"); 123 | int ret = 0; 124 | if((ret = __mrcuda_comm_init(path)) != 0) 125 | return ret; 126 | __mrcudaCommObj.callback = callback; 127 | 128 | DPRINTF("mrcuda_comm_listen_for_signal creates a thread.\n"); 129 | if((ret = pthread_create(&(__mrcudaCommObj.listeningThread), NULL, &__mrcuda_comm_listening_main_loop, NULL)) != 0) 130 | __mrcuda_comm_fini(); 131 | 132 | 133 | DPRINTF("EXIT mrcuda_comm_listen_for_signal.\n"); 134 | return ret; 135 | } 136 | 137 | -------------------------------------------------------------------------------- /src/comm.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA_COMM__HEADER__ 2 | #define __MRCUDA_COMM__HEADER__ 3 | 4 | #include "common.h" 5 | 6 | /** 7 | * This function starts listening to a signal that tells the system to switch to native CUDA. 8 | * After it receives the signal, this function calls the callback and terminates the socket. 9 | * This function executes the listening process in a different thread; thus, it returns almost immediately. 10 | * Note: if the signal is not well form, this function will simply skips that signal and not calls the callback. 11 | * @param path path for creating a new UNIX socket for listening to the signal. 12 | * @param callback the function that will be called after received a signal. 13 | * @return 0 if success, the error number otherwise. 14 | */ 15 | int mrcuda_comm_listen_for_signal(char *path, void (*callback)(void)); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA_COMMON__HEADER__ 2 | #define __MRCUDA_COMMON__HEADER__ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #if DEBUG 11 | #define DPRINTF(fmt, ...) \ 12 | do {fprintf(stderr, "FILE: " __FILE__ ", LINE: %d, " fmt, __LINE__, ##__VA_ARGS__);} while(0) 13 | #else 14 | #define DPRINTF(fmt, ...) \ 15 | do {;;} while(0) 16 | #endif 17 | 18 | #define REPORT_ERROR_AND_EXIT(...) \ 19 | do { \ 20 | perror("FATAL ERROR"); \ 21 | fprintf(stderr, __VA_ARGS__); \ 22 | exit(EXIT_FAILURE); \ 23 | } while(0) 24 | 25 | #define STARTTIMMER() \ 26 | struct timeval t1, t2; \ 27 | gettimeofday(&t1, NULL); 28 | 29 | #define ENDTIMMER(acctime) \ 30 | gettimeofday(&t2, NULL); \ 31 | acctime += (t2.tv_sec - t1.tv_sec) * 1000.0 + (t2.tv_usec - t1.tv_usec) / 1000.0; 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /src/intercomm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "intercomm.h" 8 | #include "datatypes.h" 9 | 10 | /** 11 | * Create a helper process and assign the mrcudaGPU to it. 12 | * @param mrcudaGPU the GPU information to assign to the created process. 13 | * @param helperProgPath the path to the helper application. 14 | * @param gpuID the ID of the GPU the helper application will use. 15 | * @return a ptr to the created process on success; NULL otherwise. 16 | */ 17 | MHelperProcess_t *mhelper_create(MRCUDAGPU_t *mrcudaGPU, const char *helperProgPath, int gpuID) 18 | { 19 | int rPipePair[2], wPipePair[2]; 20 | MHelperProcess_t *mhelperProcess; 21 | pid_t pid; 22 | char gpuIDStr[15]; 23 | 24 | if (pipe(rPipePair) != 0) 25 | goto __mhelper_create_err_0; 26 | if (pipe(wPipePair) != 0) 27 | goto __mhelper_create_err_1; 28 | if ((mhelperProcess = malloc(sizeof(MHelperProcess_t))) == NULL) 29 | goto __mhelper_create_err_2; 30 | pid = fork(); 31 | if (pid == 0) { // child process 32 | close(wPipePair[1]); 33 | close(rPipePair[0]); 34 | dup2(wPipePair[0], fileno(stdin)); 35 | dup2(rPipePair[1], fileno(stdout)); 36 | sprintf(gpuIDStr, "%d", gpuID); 37 | execl(helperProgPath, helperProgPath, gpuIDStr, "\0"); 38 | perror("Helper Program Exec"); 39 | _exit(EXIT_FAILURE); 40 | } 41 | else if (pid < 0) // error; cannot fork 42 | goto __mhelper_create_err_3; 43 | else { // parent process 44 | close(wPipePair[0]); 45 | close(rPipePair[1]); 46 | mhelperProcess->readPipe = rPipePair[0]; 47 | mhelperProcess->writePipe = wPipePair[1]; 48 | mhelperProcess->pid = pid; 49 | if (mhelper_int_init(&(mhelperProcess->handle), mhelperProcess) != 0) 50 | goto __mhelper_create_err_3; 51 | mrcudaGPU->mhelperProcess = mhelperProcess; 52 | return mhelperProcess; 53 | } 54 | 55 | __mhelper_create_err_3: 56 | free(mhelperProcess); 57 | __mhelper_create_err_2: 58 | close(wPipePair[0]); 59 | close(wPipePair[1]); 60 | __mhelper_create_err_1: 61 | close(rPipePair[0]); 62 | close(rPipePair[1]); 63 | __mhelper_create_err_0: 64 | return NULL; 65 | } 66 | 67 | /** 68 | * Destroy the helper process. 69 | * @param process the process to be destroyed. 70 | * @return 0 on success; another number otherwise. 71 | */ 72 | int mhelper_destroy(MHelperProcess_t *process) 73 | { 74 | int ret = kill(process->pid, SIGQUIT); 75 | if (ret == 0) 76 | free(process); 77 | return ret; 78 | } 79 | 80 | /** 81 | * Ask the process to execute the command. 82 | * @param process the process that will execute the specified command. 83 | * @param command the command to be executed on the process. 84 | * @return the result of the execution. 85 | */ 86 | MHelperResult_t mhelper_call(MHelperProcess_t *process, MHelperCommand_t command) 87 | { 88 | ssize_t n; 89 | size_t remainingSize = sizeof(MHelperCommand_t); 90 | char *buf = (char *)&command; 91 | MHelperResult_t result; 92 | 93 | while (remainingSize > 0) { 94 | n = write(process->writePipe, buf, remainingSize); 95 | if (n < 0) 96 | goto __mhelper_call_err_0; 97 | remainingSize -= n; 98 | buf += n; 99 | } 100 | 101 | remainingSize = sizeof(MHelperResult_t); 102 | buf = (char *)&result; 103 | while (remainingSize > 0) { 104 | n = read(process->readPipe, buf, remainingSize); 105 | if (n < 0) 106 | goto __mhelper_call_err_0; 107 | remainingSize -= n; 108 | buf += n; 109 | } 110 | if (result.id != command.id || result.type != command.type) 111 | goto __mhelper_call_err_0; 112 | return result; 113 | 114 | __mhelper_call_err_0: 115 | result.id = command.id; 116 | result.type = command.type; 117 | result.internalError = -1; 118 | result.cudaError = cudaSuccess; 119 | return result; 120 | } 121 | 122 | /** 123 | * Generate a unique ID for a command to be used with the specified mrcudaGPU. 124 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t 125 | * @return a unique ID 126 | */ 127 | int mhelper_generate_command_id(MRCUDAGPU_t *mrcudaGPU) 128 | { 129 | return (rand() << 4) | mrcudaGPU->virtualNumber; 130 | } 131 | 132 | -------------------------------------------------------------------------------- /src/intercomm.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA_INTERCOMM__HEADER__ 2 | #define __MRCUDA_INTERCOMM__HEADER__ 3 | 4 | #include "datatypes.h" 5 | #include "intercomm_mem.h" 6 | 7 | /** 8 | * Create a helper process and assign the mrcudaGPU to it. 9 | * @param mrcudaGPU the GPU information to assign to the created process. 10 | * @param helperProgPath the path to the helper application. 11 | * @param gpuID the ID of the GPU the helper application will use. 12 | * @return a ptr to the created process on success; NULL otherwise. 13 | */ 14 | MHelperProcess_t *mhelper_create(MRCUDAGPU_t *mrcudaGPU, const char *helperProgPath, int gpuID); 15 | 16 | /** 17 | * Destroy the helper process. 18 | * @param process the process to be destroyed. 19 | * @return 0 on success; another number otherwise. 20 | */ 21 | int mhelper_destroy(MHelperProcess_t *process); 22 | 23 | /** 24 | * Ask the process to execute the command. 25 | * @param process the process that will execute the specified command. 26 | * @param command the command to be executed on the process. 27 | * @return the result of the execution. 28 | */ 29 | MHelperResult_t mhelper_call(MHelperProcess_t *process, MHelperCommand_t command); 30 | 31 | /** 32 | * Generate a unique ID for a command to be used with the specified mrcudaGPU. 33 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t 34 | * @return a unique ID 35 | */ 36 | int mhelper_generate_command_id(MRCUDAGPU_t *mrcudaGPU); 37 | 38 | #endif /* __MRCUDA_INTERCOMM__HEADER__ */ 39 | 40 | -------------------------------------------------------------------------------- /src/intercomm_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRUCDA_INTERCOMM_INTERFACE__HEADER__ 2 | #define __MRCUDA_INTERCOMM_INTERFACE__HEADER__ 3 | 4 | #include 5 | 6 | #include "datatypes.h" 7 | 8 | /** 9 | * Initialize a handler with a helper process. 10 | * @param handler output of initialized handler. 11 | * @param process a ptr to a helper process. 12 | * @return 0 on success; -1 otherwise. 13 | */ 14 | int mhelper_int_init(MRCUDASym_t **handler, MHelperProcess_t *process); 15 | 16 | 17 | /* Interfaces */ 18 | 19 | /** 20 | * Create a context on the helper process. 21 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t a context will be created on. 22 | * @return 0 on success; -1 otherwise. 23 | */ 24 | int mhelper_int_cuCtxCreate_internal(MRCUDAGPU_t *mrcudaGPU); 25 | 26 | void **mhelper_int_cudaRegisterFatBinary(void *fatCubin); 27 | void **mhelper_int_cudaRegisterFatBinary_internal(MRCUDAGPU_t *mrcudaGPU, void *fatCubin); 28 | 29 | void mhelper_int_cudaUnregisterFatBinary(void **fatCubinHandle); 30 | void mhelper_int_cudaUnregisterFatBinary_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle); 31 | 32 | void mhelper_int_cudaRegisterVar(void **fatCubinHandle, char *hostVar, char *deviceAddress, const char *deviceName, int ext, int size, int constant, int global); 33 | void mhelper_int_cudaRegisterVar_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, char *hostVar, char *deviceAddress, const char *deviceName, int ext, int size, int constant, int global); 34 | 35 | void mhelper_int_cudaRegisterTexture(void **fatCubinHandle, const struct textureReference *hostVar, const void **deviceAddress, const char *deviceName, int dim, int norm, int ext); 36 | void mhelper_int_cudaRegisterTexture_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, const struct textureReference *hostVar, const void **deviceAddress, const char *deviceName, int dim, int norm, int ext); 37 | 38 | void mhelper_int_cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize); 39 | void mhelper_int_cudaRegisterFunction_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize); 40 | 41 | cudaError_t mhelper_int_cudaLaunch(const void *func); 42 | cudaError_t mhelper_int_cudaLaunch_internal(MRCUDAGPU_t *mrcudaGPU, const void *func); 43 | 44 | cudaError_t mhelper_int_cudaHostAlloc(void **pHost, size_t size, unsigned int flags); 45 | 46 | cudaError_t mhelper_int_cudaDeviceReset(void); 47 | cudaError_t mhelper_int_cudaDeviceReset_internal(MRCUDAGPU_t *mrcudaGPU); 48 | 49 | cudaError_t mhelper_int_cudaDeviceSynchronize(void); 50 | cudaError_t mhelper_int_cudaDeviceSynchronize_internal(MRCUDAGPU_t *mrcudaGPU); 51 | 52 | cudaError_t mhelper_int_cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device); 53 | cudaError_t mhelper_int_cudaGetDeviceProperties_internal(MRCUDAGPU_t *mrcudaGPU, struct cudaDeviceProp *prop, int device); 54 | 55 | cudaError_t mhelper_int_cudaMalloc(void **devPtr, size_t size); 56 | cudaError_t mhelper_int_cudaMalloc_internal(MRCUDAGPU_t *mrcudaGPU, void **devPtr, size_t size); 57 | 58 | cudaError_t mhelper_int_cudaFreeHost(void *ptr); 59 | 60 | cudaError_t mhelper_int_cudaFree(void *devPtr); 61 | cudaError_t mhelper_int_cudaFree_internal(MRCUDAGPU_t *mrcudaGPU, void *devPtr); 62 | 63 | cudaError_t mhelper_int_cudaMemcpyToSymbolAsync(const void *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream); 64 | cudaError_t mhelper_int_cudaMemcpyToSymbolAsync_internal(MRCUDAGPU_t *mrcudaGPU, const void *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream); 65 | 66 | cudaError_t mhelper_int_cudaMemcpyFromSymbolAsync(void *dst, const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream); 67 | cudaError_t mhelper_int_cudaMemcpyFromSymbolAsync_internal(MRCUDAGPU_t *mrcudaGPU, void *dst, const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream); 68 | 69 | cudaError_t mhelper_int_cudaSetupArgument(const void *arg, size_t size, size_t offset); 70 | cudaError_t mhelper_int_cudaSetupArgument_internal(MRCUDAGPU_t *mrcudaGPU, const void *arg, size_t size, size_t offset); 71 | 72 | cudaError_t mhelper_int_cudaStreamSynchronize(cudaStream_t stream); 73 | cudaError_t mhelper_int_cudaStreamSynchronize_internal(MRCUDAGPU_t *mrcudaGPU, cudaStream_t stream); 74 | 75 | cudaError_t mhelper_int_cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream); 76 | cudaError_t mhelper_int_cudaConfigureCall_internal(MRCUDAGPU_t *mrcudaGPU, dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream); 77 | 78 | cudaError_t mhelper_int_cudaGetLastError(void); 79 | cudaError_t mhelper_int_cudaGetLastError_internal(MRCUDAGPU_t *mrcudaGPU); 80 | 81 | cudaError_t mhelper_int_cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind); 82 | cudaError_t mhelper_int_cudaMemcpy_internal(MRCUDAGPU_t *mrcudaGPU, void *dst, const void *src, size_t count, enum cudaMemcpyKind kind); 83 | 84 | cudaError_t mhelper_int_cudaSetDevice(int device); 85 | cudaError_t mhelper_int_cudaSetDevice_internal(MRCUDAGPU_t *mrcudaGPU, int device); 86 | 87 | cudaError_t mhelper_int_cudaStreamCreate(cudaStream_t *pStream); 88 | cudaError_t mhelper_int_cudaStreamCreate_internal(MRCUDAGPU_t *mrcudaGPU, cudaStream_t *pStream); 89 | 90 | #endif /* __MRCUDA_INTERCOMM_INTERFACE__HEADER__ */ 91 | 92 | -------------------------------------------------------------------------------- /src/intercomm_mem.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | #include "datatypes.h" 9 | #include "intercomm_mem.h" 10 | 11 | #define DEV_RANDOM "/dev/urandom" 12 | 13 | static int initRand = 0; 14 | 15 | /** 16 | * Generate a key to be associated with a shared memory region. 17 | * @return a key. 18 | */ 19 | static key_t generate_key() 20 | { 21 | FILE *f; 22 | unsigned int seed; 23 | size_t remainingSize = sizeof(unsigned int); 24 | size_t readSize = 0; 25 | 26 | if (!initRand) { 27 | f = fopen(DEV_RANDOM, "r"); 28 | while (remainingSize > 0) { 29 | if ((readSize = fread(&seed, remainingSize, 1, f)) == 0) 30 | REPORT_ERROR_AND_EXIT("Cannot read from " DEV_RANDOM ".\n"); 31 | remainingSize -= readSize; 32 | } 33 | fclose(f); 34 | srand(seed); 35 | initRand = !initRand; 36 | } 37 | return (key_t)rand(); 38 | } 39 | 40 | /** 41 | * Malloc memory on shared-memory region. 42 | * @param size the size of memory to be allocated. 43 | * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise. 44 | */ 45 | MRCUDASharedMemLocalInfo_t *mhelper_mem_malloc(size_t size) 46 | { 47 | MRCUDASharedMemLocalInfo_t *sharedMemInfo = calloc(1, sizeof(MRCUDASharedMemLocalInfo_t)); 48 | if (sharedMemInfo == NULL) 49 | goto __mhelper_mem_malloc_err_0; 50 | sharedMemInfo->sharedMem.key = generate_key(); 51 | if ((sharedMemInfo->shmid = shmget(sharedMemInfo->sharedMem.key, size, IPC_CREAT | IPC_EXCL | 0600)) <= 0) 52 | goto __mhelper_mem_malloc_err_1; 53 | if ((sharedMemInfo->startAddr = shmat(sharedMemInfo->shmid, NULL, 0)) == NULL) 54 | goto __mhelper_mem_malloc_err_2; 55 | sharedMemInfo->sharedMem.size = size; 56 | return sharedMemInfo; 57 | 58 | __mhelper_mem_malloc_err_2: 59 | shmctl(sharedMemInfo->shmid, IPC_RMID, NULL); 60 | __mhelper_mem_malloc_err_1: 61 | free(sharedMemInfo); 62 | __mhelper_mem_malloc_err_0: 63 | return NULL; 64 | } 65 | 66 | /** 67 | * Detach and destroy the shared region specified by the sharedMemInfo. 68 | * @param sharedMemInfo the information of the shared region. 69 | * @return 0 on success; other number otherwise. 70 | */ 71 | int mhelper_mem_free(MRCUDASharedMemLocalInfo_t *sharedMemInfo) 72 | { 73 | int ret = shmctl(sharedMemInfo->shmid, IPC_RMID, NULL); 74 | if (ret == 0) 75 | free(sharedMemInfo); 76 | return ret; 77 | } 78 | 79 | /** 80 | * Get the memory region associated with the specified sharedMem. 81 | * @param sharedMem the minimum information of the shared region. 82 | * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise. 83 | */ 84 | MRCUDASharedMemLocalInfo_t *mhelper_mem_get(MRCUDASharedMem_t sharedMem) 85 | { 86 | MRCUDASharedMemLocalInfo_t *sharedMemInfo = calloc(1, sizeof(MRCUDASharedMemLocalInfo_t)); 87 | if (sharedMemInfo == NULL) 88 | goto __mhelper_mem_get_err_0; 89 | if ((sharedMemInfo->shmid = shmget(sharedMem.key, sharedMem.size, 0666)) <= 0) 90 | goto __mhelper_mem_get_err_1; 91 | if ((sharedMemInfo->startAddr = shmat(sharedMemInfo->shmid, NULL, 0)) == NULL) 92 | goto __mhelper_mem_get_err_1; 93 | sharedMemInfo->sharedMem = sharedMem; 94 | return sharedMemInfo; 95 | 96 | __mhelper_mem_get_err_1: 97 | free(sharedMemInfo); 98 | __mhelper_mem_get_err_0: 99 | return NULL; 100 | } 101 | 102 | /** 103 | * Detach the shared region specified by the sharedMemInfo. 104 | * @param sharedMemInfo the information of the shared region. 105 | * @return 0 on success; another number otherwise. 106 | */ 107 | int mhelper_mem_detach(MRCUDASharedMemLocalInfo_t *sharedMemInfo) 108 | { 109 | return shmdt(sharedMemInfo->startAddr); 110 | } 111 | 112 | -------------------------------------------------------------------------------- /src/intercomm_mem.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA_INTERCOMM_MEM__HEADER__ 2 | #define __MRCUDA_INTERCOMM_MEM__HEADER__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "datatypes.h" 9 | 10 | /** 11 | * Malloc memory on shared-memory region. 12 | * @param size the size of memory to be allocated. 13 | * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise. 14 | */ 15 | MRCUDASharedMemLocalInfo_t *mhelper_mem_malloc(size_t size); 16 | 17 | /** 18 | * Detach and destroy the shared region specified by the sharedMemInfo. 19 | * @param sharedMemInfo the information of the shared region. 20 | * @return 0 on success; other number otherwise. 21 | */ 22 | int mhelper_mem_free(MRCUDASharedMemLocalInfo_t *sharedMemInfo); 23 | 24 | /** 25 | * Get the memory region associated with the specified sharedMem. 26 | * @param sharedMem the minimum information of the shared region. 27 | * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise. 28 | */ 29 | MRCUDASharedMemLocalInfo_t *mhelper_mem_get(MRCUDASharedMem_t sharedMem); 30 | 31 | /** 32 | * Detach the shared region specified by the sharedMemInfo. 33 | * @param sharedMemInfo the information of the shared region. 34 | * @return 0 on success; another number otherwise. 35 | */ 36 | int mhelper_mem_detach(MRCUDASharedMemLocalInfo_t *sharedMemInfo); 37 | 38 | #endif /* __MRCUDA_INTERCOMM_MEM__HEADER__ */ 39 | 40 | -------------------------------------------------------------------------------- /src/mrcuda.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA__HEADER__ 2 | #define __MRCUDA__HEADER__ 3 | 4 | #include 5 | #include 6 | #include "common.h" 7 | #include "datatypes.h" 8 | 9 | extern MRCUDASym_t *mrcudaSymNvidia; 10 | extern MRCUDASym_t *mrcudaSymRCUDA; 11 | 12 | extern int mrcudaNumGPUs; 13 | extern MRCUDAGPU_t *mrcudaGPUList; 14 | 15 | extern GHashTable *mrcudaGPUThreadMap; 16 | 17 | extern MRCUDAState_e mrcudaState; 18 | 19 | /** 20 | * Get the GPU assigned to the calling thread. 21 | * @return a pointer to the assigned GPU. 22 | */ 23 | MRCUDAGPU_t *mrcuda_get_current_gpu(); 24 | 25 | /** 26 | * Set the GPU assigned to the calling thread. 27 | * @param device virtual device ID. 28 | */ 29 | void mrcuda_set_current_gpu(int device); 30 | 31 | 32 | /** 33 | * Initialize mrCUDA. 34 | * Print error and terminate the program if an error occurs. 35 | */ 36 | void mrcuda_init(); 37 | 38 | /** 39 | * Finalize mrCUDA. 40 | */ 41 | int mrcuda_fini(); 42 | 43 | /** 44 | * Switch the specified mrcudaGPU from rCUDA to native. 45 | * @param mrcudaGPU a ptr to the mrcudaGPU to be switched. 46 | * @param toGPUNumber the native GPU number to be moved to. 47 | */ 48 | void mrcuda_switch(MRCUDAGPU_t *mrcudaGPU, int toGPUNumber); 49 | 50 | /** 51 | * Create a barrier such that subsequent calls are blocked until the barrier is released. 52 | * @param mrcudaGPU a ptr to the GPU a barrier will be created on. 53 | */ 54 | void mrcuda_function_call_lock(MRCUDAGPU_t *mrcudaGPU); 55 | 56 | /** 57 | * Release the barrier; thus, allow subsequent calls to be processed normally. 58 | * @param mrcudaGPU a ptr to the GPU the barrier will be released. 59 | */ 60 | void mrcuda_function_call_release(MRCUDAGPU_t *mrcudaGPU); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/record.h: -------------------------------------------------------------------------------- 1 | #ifndef __MRCUDA_RECORD__HEADER__ 2 | #define __MRCUDA_RECORD__HEADER__ 3 | 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | #include "datatypes.h" 9 | 10 | extern double recordAccTime; 11 | extern double memsyncAccTime; 12 | extern double memsyncrCUDAAccTime; 13 | extern double memsyncNvidiaAccTime; 14 | extern int memsyncNumCalls; 15 | extern double memsyncSize; 16 | 17 | extern MRecordGPU_t *mrecordGPUList; 18 | 19 | /** 20 | * Initialize the record/replay module. 21 | * Exit and report error if found. 22 | */ 23 | void mrcuda_record_init(); 24 | 25 | /** 26 | * Finalize the record/replay module. 27 | */ 28 | void mrcuda_record_fini(); 29 | 30 | /** 31 | * Record a cudaRegisterFatBinary call. 32 | */ 33 | void mrcuda_record_cudaRegisterFatBinary(MRCUDAGPU_t *mrcudaGPU, void* fatCubin, void **fatCubinHandle); 34 | 35 | /** 36 | * Record a cudaRegisterFunction call. 37 | */ 38 | void mrcuda_record_cudaRegisterFunction( 39 | MRCUDAGPU_t *mrcudaGPU, 40 | void **fatCubinHandle, 41 | const char *hostFun, 42 | char *deviceFun, 43 | const char *deviceName, 44 | int thread_limit, 45 | uint3 *tid, 46 | uint3 *bid, 47 | dim3 *bDim, 48 | dim3 *gDim, 49 | int *wSize 50 | ); 51 | 52 | /** 53 | * Record a cudaRegisterVar call. 54 | */ 55 | void mrcuda_record_cudaRegisterVar( 56 | MRCUDAGPU_t *mrcudaGPU, 57 | void **fatCubinHandle, 58 | char *hostVar, 59 | char *deviceAddress, 60 | const char *deviceName, 61 | int ext, 62 | int size, 63 | int constant, 64 | int global 65 | ); 66 | 67 | /** 68 | * Record a cudaRegisterTexture call. 69 | */ 70 | void mrcuda_record_cudaRegisterTexture( 71 | MRCUDAGPU_t *mrcudaGPU, 72 | void **fatCubinHandle, 73 | const struct textureReference *hostVar, 74 | const void **deviceAddress, 75 | const char *deviceName, 76 | int dim, 77 | int norm, 78 | int ext 79 | ); 80 | 81 | /** 82 | * Record a cudaUnregisterFatBinary call. 83 | */ 84 | void mrcuda_record_cudaUnregisterFatBinary(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle); 85 | 86 | /** 87 | * Record a cudaMalloc call. 88 | */ 89 | void mrcuda_record_cudaMalloc(MRCUDAGPU_t *mrcudaGPU, void **devPtr, size_t size); 90 | 91 | /** 92 | * Record a cudaFree call. 93 | */ 94 | void mrcuda_record_cudaFree(MRCUDAGPU_t *mrcudaGPU, void *devPtr); 95 | 96 | /** 97 | * Record a cudaBindTexture call. 98 | */ 99 | void mrcuda_record_cudaBindTexture( 100 | MRCUDAGPU_t *mrcudaGPU, 101 | size_t *offset, 102 | const struct textureReference *texref, 103 | const void *devPtr, 104 | const struct cudaChannelFormatDesc *desc, 105 | size_t size 106 | ); 107 | 108 | /** 109 | * Record a cudaStreamCreate call. 110 | */ 111 | void mrcuda_record_cudaStreamCreate(MRCUDAGPU_t *mrcudaGPU, cudaStream_t *pStream); 112 | 113 | /** 114 | * Record a cudaHostAlloc call. 115 | * The dual function of this call is mrcuda_replay_cudaFreeHost. 116 | */ 117 | void mrcuda_record_cudaHostAlloc(MRCUDAGPU_t *mrcudaGPU, void **pHost, size_t size, unsigned int flags); 118 | 119 | /** 120 | * Record a cudaSetDeviceFlags call. 121 | */ 122 | void mrcuda_record_cudaSetDeviceFlags(MRCUDAGPU_t *mrcudaGPU, unsigned int flags); 123 | 124 | 125 | /** 126 | * Replay a cudaRegisterFatBinary call. 127 | */ 128 | void mrcuda_replay_cudaRegisterFatBinary(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 129 | 130 | /** 131 | * Replay a cudaRegisterFunction call. 132 | */ 133 | void mrcuda_replay_cudaRegisterFunction(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 134 | 135 | /** 136 | * Replay a cudaRegisterVar call. 137 | */ 138 | void mrcuda_replay_cudaRegisterVar(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 139 | 140 | /** 141 | * Replay a cudaRegisterTexture call. 142 | */ 143 | void mrcuda_replay_cudaRegisterTexture(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 144 | 145 | /** 146 | * Replay a cudaUnregisterFatBinary call. 147 | */ 148 | void mrcuda_replay_cudaUnregisterFatBinary(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 149 | 150 | /** 151 | * Replay a cudaMalloc call. 152 | */ 153 | void mrcuda_replay_cudaMalloc(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 154 | 155 | /** 156 | * Replay a cudaFree call. 157 | */ 158 | void mrcuda_replay_cudaFree(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 159 | 160 | /** 161 | * Replay a cudaBindTexture call. 162 | */ 163 | void mrcuda_replay_cudaBindTexture(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 164 | 165 | /** 166 | * Replay a cudaStreamCreate call. 167 | */ 168 | void mrcuda_replay_cudaStreamCreate(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 169 | 170 | /** 171 | * Replay a cudaFreeHost call. 172 | * This function looks for the library used for allocating the ptr. 173 | * The dual function of this call is mrcuda_record_cudaHostAlloc. 174 | */ 175 | MRCUDASym_t *mrcuda_replay_cudaFreeHost(MRCUDAGPU_t *mrcudaGPU, void *ptr); 176 | 177 | /** 178 | * Replay a cudaSetDeviceFlags call. 179 | */ 180 | void mrcuda_replay_cudaSetDeviceFlags(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record); 181 | 182 | /** 183 | * Download the content of active memory regions to the native device. 184 | * Exit and report error if an error is found. 185 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t that the sync mem will be performed on. 186 | */ 187 | void mrcuda_sync_mem(MRCUDAGPU_t *mrcudaGPU); 188 | 189 | /** 190 | * Simulate cuda streams on the native CUDA so that the number of streams are equaled to that of rCUDA. 191 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t that the simulate stream will be performed on. 192 | */ 193 | void mrcuda_simulate_stream(MRCUDAGPU_t *mrcudaGPU); 194 | 195 | /** 196 | * Simulate cuCtxCreate on the specified gpuID. 197 | * If mrcudaGPU->status == MRCUDA_GPU_STATUS_HELPER, ask the helper to handle the command. 198 | * @param mrcudaGPU a ptr to a MRCUDAGPU_t. 199 | * @param gpuID the ID of the GPU a context will be created on. 200 | * @return 0 on success; -1 otherwise. 201 | */ 202 | int mrcuda_simulate_cuCtxCreate(MRCUDAGPU_t *mrcudaGPU, int gpuID); 203 | 204 | #endif 205 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | TESTS = check_comm 2 | check_PROGRAMS = check_comm 3 | check_comm_SOURCES = check_comm.c $(top_builddir)/src/comm.h 4 | check_comm_CFLAGS = @CHECK_CFLAGS@ -pthread 5 | check_comm_LDADD = $(top_builddir)/src/libcomm.a @CHECK_LIBS@ 6 | check_comm_LDFLAGS = -pthread -lpthread 7 | 8 | -------------------------------------------------------------------------------- /tests/check_comm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "../src/comm.h" 11 | 12 | static int __getSignalFlag = 0; 13 | 14 | void process_signal(void) 15 | { 16 | __getSignalFlag = 1; 17 | } 18 | 19 | START_TEST(test_mrcuda_comm_listen_for_signal) 20 | { 21 | char *path = "/tmp/mrcuda.pipe"; 22 | int fd; 23 | int ret; 24 | 25 | unlink(path); 26 | ret = mrcuda_comm_listen_for_signal(path, &process_signal); 27 | ck_assert(ret == 0); 28 | 29 | fd = open(path, O_WRONLY); 30 | write(fd, "1", sizeof("1")); 31 | close(fd); 32 | 33 | while(!__getSignalFlag) 34 | sleep(1); 35 | } 36 | END_TEST 37 | 38 | Suite *comm_suit(void) 39 | { 40 | Suite *s; 41 | TCase *tc_core; 42 | 43 | s = suite_create("Comm"); 44 | 45 | tc_core = tcase_create("Core"); 46 | 47 | tcase_add_test(tc_core, test_mrcuda_comm_listen_for_signal); 48 | suite_add_tcase(s, tc_core); 49 | 50 | return s; 51 | } 52 | 53 | int main(void) 54 | { 55 | int number_failed; 56 | Suite *s; 57 | SRunner *sr; 58 | 59 | s = comm_suit(); 60 | sr = srunner_create(s); 61 | 62 | srunner_run_all(sr, CK_NORMAL); 63 | number_failed = srunner_ntests_failed(sr); 64 | srunner_free(sr); 65 | 66 | return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; 67 | } 68 | -------------------------------------------------------------------------------- /tests/check_record.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../src/record.h" 4 | 5 | START_TEST(test_mrcuda_record_cudaRegisterFatBinary) 6 | { 7 | void *fatCubin = malloc(sizeof(void *) * 10); 8 | mrcuda_record_cudaRegisterFatBinary(fatCubin); 9 | ck_assert(mrcudaRecordHeadPtr != NULL); 10 | ck_assert(mrcudaRecordTailPtr != NULL); 11 | ck_assert(strcmp(mrcudaRecordTailPtr->functionName, "cudaRegisterFatBinary") == 0); 12 | ck_assert(mrcudaRecordTailPtr->replayFunction == &mrcuda_replay_cudaRegisterFatBinary); 13 | ck_assert(mrcudaRecordTailPtr->data.cudaRegisterFatBinary.fatCubin == fatCubin); 14 | free(fatCubin); 15 | } 16 | END_TEST 17 | 18 | Suite *comm_suit(void) 19 | { 20 | Suite *s; 21 | TCase *tc_core; 22 | 23 | s = suite_create("Record"); 24 | 25 | tc_core = tcase_create("Core"); 26 | 27 | tcase_add_test(tc_core, test_mrcuda_record_cudaRegisterFatBinary); 28 | suite_add_tcase(s, tc_core); 29 | 30 | return s; 31 | } 32 | 33 | int main(void) 34 | { 35 | int number_failed; 36 | Suite *s; 37 | SRunner *sr; 38 | 39 | s = comm_suit(); 40 | sr = srunner_create(s); 41 | 42 | srunner_run_all(sr, CK_NORMAL); 43 | number_failed = srunner_ntests_failed(sr); 44 | srunner_free(sr); 45 | 46 | return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; 47 | } 48 | -------------------------------------------------------------------------------- /tests/progs/benchmark.memcpybw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #k=0 4 | #while [ $k -lt 15 ] 5 | #do 6 | # num=`echo "2^$k" | bc` 7 | # j=0 8 | # while [ $j -lt 10 ] 9 | # do 10 | # memsize=`echo "2^(20+$j)" | bc` 11 | # i=0 12 | # while [ $i -lt 10 ] 13 | # do 14 | # echo "mrcuda $memsize $num" 15 | # taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -s rc015 --switch-threshold=1 -- ./memcpybw $memsize $num 16 | # i=`expr $i + 1` 17 | # sleep 1 18 | # done 19 | # j=`expr $j + 1` 20 | # done 21 | # k=`expr $k + 1` 22 | #done 23 | 24 | j=0 25 | while [ $j -lt 20 ] 26 | do 27 | memsize=`echo "2^($j)" | bc` 28 | i=0 29 | while [ $i -lt 10 ] 30 | do 31 | echo "mrcuda $memsize 1" 32 | taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -s rc015 --switch-threshold=1 -- ./memcpybw $memsize 1 33 | i=`expr $i + 1` 34 | sleep 1 35 | done 36 | j=`expr $j + 1` 37 | done 38 | -------------------------------------------------------------------------------- /tests/progs/benchmark.nullker.cudamemcpy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #i=0 4 | #while [ $i -lt 10 ] 5 | #do 6 | # echo "nullker mrcuda $i" 7 | # taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -f ~/src/mrCUDA/scripts/sf.in -n 2 -- ./nullker 8 | # i=`expr $i + 1` 9 | #done 10 | # 11 | #sleep 1 12 | # 13 | #i=0 14 | #while [ $i -lt 10 ] 15 | #do 16 | # echo "nullker native $i" 17 | # taskset 1 ./nullker 18 | # i=`expr $i + 1` 19 | #done 20 | # 21 | #sleep 1 22 | 23 | i=8 24 | while [ $i -lt 10 ] 25 | do 26 | echo "cudamemcpy mrcuda $i" 27 | taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -f ~/src/mrCUDA/scripts/sf.in -n 2 -- ./cudamemcpy 28 | i=`expr $i + 1` 29 | done 30 | 31 | sleep 1 32 | 33 | i=0 34 | while [ $i -lt 10 ] 35 | do 36 | echo "cudamemcpy native $i" 37 | taskset 1 ./cudamemcpy 38 | i=`expr $i + 1` 39 | done 40 | 41 | -------------------------------------------------------------------------------- /tests/progs/cudamemcpy.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define MEMSIZE (1 << 30) 6 | 7 | #define CUDA_SAFE_CALL(func) \ 8 | { \ 9 | if ((func) != cudaSuccess ) { \ 10 | fprintf(stderr, "ERROR\n"); \ 11 | exit(EXIT_FAILURE); \ 12 | } \ 13 | } 14 | 15 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end) 16 | { 17 | return (end->tv_sec - begin->tv_sec) * 1000 18 | + (end->tv_usec - begin->tv_usec) / 1000.0; 19 | } 20 | 21 | __global__ 22 | void null() 23 | { 24 | } 25 | 26 | int main() 27 | { 28 | int i = 0; 29 | struct timeval t1, t2; 30 | dim3 dimBlock( 1, 1 ); 31 | dim3 dimGrid( 1, 1 ); 32 | char *pDev0, *pDev1, *pHost; 33 | 34 | /* Initialize phase to force migration */ 35 | if ((pHost = (char *)malloc(sizeof(char) * MEMSIZE)) == NULL) { 36 | perror("MALLOC ERROR:"); 37 | exit(EXIT_FAILURE); 38 | } 39 | 40 | CUDA_SAFE_CALL(cudaSetDevice(0)); 41 | CUDA_SAFE_CALL(cudaMalloc(&pDev0, sizeof(char) * MEMSIZE)); 42 | CUDA_SAFE_CALL(cudaMemcpy(pDev0, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice)); 43 | while (i < 2000) { 44 | null<<>>(); 45 | i++; 46 | } 47 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 48 | CUDA_SAFE_CALL(cudaMemcpy(pDev0, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice)); 49 | 50 | CUDA_SAFE_CALL(cudaSetDevice(1)); 51 | CUDA_SAFE_CALL(cudaMalloc(&pDev1, sizeof(char) * MEMSIZE)); 52 | CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice)); 53 | i = 0; 54 | while (i < 2000) { 55 | null<<>>(); 56 | i++; 57 | } 58 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 59 | CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice)); 60 | 61 | //CUDA_SAFE_CALL(cudaSetDevice(0)); 62 | /* mhelper benchmark phase */ 63 | for (int iter = 0; iter < 20; iter++) { 64 | int size = sizeof(char) * (1 << (10 + iter)); 65 | gettimeofday(&t1, NULL); 66 | for (int j = 0; j < 1000; j++) 67 | CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, size, cudaMemcpyHostToDevice)); 68 | gettimeofday(&t2, NULL); 69 | printf("%d %f\n", size, get_elapsed_time(&t1, &t2)); 70 | } 71 | 72 | return EXIT_SUCCESS; 73 | } 74 | 75 | -------------------------------------------------------------------------------- /tests/progs/gpuaddr.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define CUDA_SAFE_CALL(x) \ 5 | { \ 6 | if ((x) != cudaSuccess) { \ 7 | fprintf(stderr, "Error!"); \ 8 | exit(EXIT_FAILURE); \ 9 | } \ 10 | } 11 | 12 | int main() 13 | { 14 | float *a; 15 | CUDA_SAFE_CALL(cudaMalloc(&a, sizeof(float))); 16 | printf("a is %p\n", a); 17 | getchar(); 18 | return 0; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /tests/progs/hello.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | const int N = 16; 4 | const int blocksize = 16; 5 | 6 | __global__ 7 | void hello(char *a, int *b) 8 | { 9 | a[threadIdx.x] += b[threadIdx.x]; 10 | } 11 | 12 | int main() 13 | { 14 | char a[N] = "Hello \0\0\0\0\0\0"; 15 | int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 16 | 17 | char *ad; 18 | int *bd; 19 | const int csize = N*sizeof(char); 20 | const int isize = N*sizeof(int); 21 | 22 | printf("%s", a); 23 | 24 | cudaMalloc( (void**)&ad, csize ); 25 | cudaMalloc( (void**)&bd, isize ); 26 | cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); 27 | cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice ); 28 | 29 | dim3 dimBlock( blocksize, 1 ); 30 | dim3 dimGrid( 1, 1 ); 31 | hello<<>>(ad, bd); 32 | cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost ); 33 | cudaFree( ad ); 34 | cudaFree( bd ); 35 | 36 | printf("%s\n", a); 37 | return EXIT_SUCCESS; 38 | } 39 | -------------------------------------------------------------------------------- /tests/progs/hellomul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define CUDA_SAFE_CALL(func) \ 5 | { \ 6 | if ((func) != cudaSuccess ) { \ 7 | fprintf(stderr, "ERROR\n"); \ 8 | exit(EXIT_FAILURE); \ 9 | } \ 10 | } 11 | 12 | const int N = 16; 13 | const int blocksize = 16; 14 | 15 | __global__ 16 | void hello(char *a, int *b) 17 | { 18 | a[threadIdx.x] += b[threadIdx.x]; 19 | } 20 | 21 | __global__ 22 | void null() 23 | { 24 | } 25 | 26 | 27 | int main() 28 | { 29 | int i = 0; 30 | char a[N] = "Hello \0\0\0\0\0\0"; 31 | int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 32 | 33 | char *ad; 34 | int *bd; 35 | const int csize = N*sizeof(char); 36 | const int isize = N*sizeof(int); 37 | dim3 dimBlock( blocksize, 1 ); 38 | dim3 dimGrid( 1, 1 ); 39 | 40 | printf("%s", a); 41 | 42 | CUDA_SAFE_CALL(cudaSetDevice(1)); 43 | CUDA_SAFE_CALL(cudaMalloc( (void**)&ad, csize )); 44 | CUDA_SAFE_CALL(cudaMalloc( (void**)&bd, isize )); 45 | CUDA_SAFE_CALL(cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice )); 46 | CUDA_SAFE_CALL(cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice )); 47 | 48 | CUDA_SAFE_CALL(cudaSetDevice(0)); 49 | while (i < 20) { 50 | null<<>>(); 51 | i++; 52 | } 53 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 54 | 55 | CUDA_SAFE_CALL(cudaSetDevice(1)); 56 | i = 0; 57 | while (i < 20) { 58 | null<<>>(); 59 | i++; 60 | } 61 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 62 | 63 | hello<<>>(ad, bd); 64 | CUDA_SAFE_CALL(cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost )); 65 | 66 | printf("%s\n", a); 67 | return EXIT_SUCCESS; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /tests/progs/matmul_mul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define BS (16) 8 | #define L (16) 9 | #define M (16) 10 | #define N (16) 11 | 12 | __global__ void matmul(float *A, float *B, float *C, 13 | int l, int m, int n) 14 | { 15 | int i, j, k; 16 | float sum; 17 | 18 | i = blockIdx.y * blockDim.y + threadIdx.y; 19 | j = blockIdx.x * blockDim.x + threadIdx.x; 20 | 21 | sum = 0.0; 22 | for (k = 0; k < m; k++) { 23 | sum += A[i * m + k] * B[k * n + j]; 24 | } 25 | C[i*n+j] = sum; 26 | } 27 | 28 | __global__ void thread_matrix(float *A, 29 | int l, int n) 30 | { 31 | int i, j; 32 | 33 | i = blockIdx.y * blockDim.y + threadIdx.y; 34 | j = blockIdx.x * blockDim.x + threadIdx.x; 35 | 36 | A[i * n + j] = i * n + j; 37 | } 38 | 39 | void matmul_cpu(float *A, float *B, float *C, 40 | int l, int m, int n) 41 | { 42 | int i, j, k; 43 | for (i = 0; i < l; i++) { 44 | for (j = 0; j < n; j++) { 45 | float sum = 0.0; 46 | for (k = 0; k < m; k++) { 47 | sum += A[i * m + k] * B[k * n + j]; 48 | } 49 | C[i*n+j] = sum; 50 | } 51 | } 52 | } 53 | 54 | void print_matrix(float *A, int l, int n) 55 | { 56 | int i, j; 57 | for (i = 0; i < l; i++) { 58 | for (j = 0; j < n; j++) { 59 | printf("%f ", A[i * n + j]); 60 | } 61 | printf("\n"); 62 | } 63 | } 64 | 65 | int compare_matrix(float *A, float *B, int l, int n) 66 | { 67 | int i, j; 68 | int ret = 0; 69 | for (i = 0; i < l; i++) { 70 | for (j = 0; j < n; j++) { 71 | if(A[i * n + j] != B[i * n + j]) 72 | ret = -1; 73 | } 74 | } 75 | return ret; 76 | } 77 | 78 | void alloc_matrix(float **m_h, float **m_d, int h, int w) 79 | { 80 | *m_h = (float *)malloc(sizeof(float) * h * w); 81 | cudaMalloc((void **)m_d, sizeof(float) * h * w); 82 | } 83 | 84 | void init_matrix(float *m, int h, int w) 85 | { 86 | int i, j; 87 | for (i = 0; i < h; i++) 88 | for (j = 0; j < w; j++) 89 | m[i * w + j] = (float)(random() % 100); 90 | } 91 | 92 | int check_error(const char *err_msg) 93 | { 94 | cudaError_t err = cudaGetLastError(); 95 | if (err != cudaSuccess) { 96 | fprintf(stderr, "CUDA error: %s: %s.\n", 97 | err_msg, cudaGetErrorString(err)); 98 | return 1; 99 | } 100 | return 0; 101 | } 102 | 103 | double get_elapsed_time(struct timeval *begin, struct timeval *end) 104 | { 105 | return (end->tv_sec - begin->tv_sec) * 1000 106 | + (end->tv_usec - begin->tv_usec) / 1000.0; 107 | } 108 | 109 | int main(int argc, char *argv[]) 110 | { 111 | float *Ad1, *Bd1, *Cd1; 112 | float *Ah1, *Bh1, *Ch1; 113 | float *Ad2, *Bd2, *Cd2; 114 | float *Ah2, *Bh2, *Ch2; 115 | struct timeval t1, t2; 116 | float *C_cpu; 117 | 118 | int num_device = 0; 119 | 120 | if (cudaGetDeviceCount(&num_device) != cudaSuccess || num_device < 2) { 121 | fprintf(stderr, "This program needs at least 2 devices.\n"); 122 | exit(EXIT_FAILURE); 123 | } 124 | 125 | cudaSetDevice(0); 126 | 127 | // prepare matrix A 128 | alloc_matrix(&Ah1, &Ad1, L, M); 129 | init_matrix(Ah1, L, M); 130 | cudaMemcpy(Ad1, Ah1, sizeof(float) * L * M, 131 | cudaMemcpyHostToDevice); 132 | // do it again for matrix B 133 | alloc_matrix(&Bh1, &Bd1, M, N); 134 | init_matrix(Bh1, M, N); 135 | cudaMemcpy(Bd1, Bh1, sizeof(float) * M * N, 136 | cudaMemcpyHostToDevice); 137 | // allocate spaces for matrix C 138 | alloc_matrix(&Ch1, &Cd1, L, N); 139 | 140 | cudaDeviceSynchronize(); 141 | gettimeofday(&t1, NULL); 142 | 143 | // launch matmul kernel 144 | matmul<<>>(Ad1, Bd1, Cd1, L, M, N); 146 | 147 | if (check_error("matmul")) { 148 | exit(EXIT_FAILURE); 149 | } 150 | 151 | cudaDeviceSynchronize(); 152 | gettimeofday(&t2, NULL); 153 | printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2)); 154 | 155 | // obtain the result 156 | cudaMemcpy(Ch1, Cd1, sizeof(float) * L * N, cudaMemcpyDeviceToHost); 157 | C_cpu = (float *)malloc(sizeof(float) * L * N); 158 | matmul_cpu(Ah1, Bh1, C_cpu, L, M, N); 159 | print_matrix(Ch1, L, N); 160 | printf("\n"); 161 | print_matrix(C_cpu, L, N); 162 | printf("\n"); 163 | 164 | if(compare_matrix(Ch1, C_cpu, L, N) >= 0) 165 | printf("OK\n"); 166 | else 167 | printf("ERRRRR\n"); 168 | 169 | free(C_cpu); 170 | 171 | cudaSetDevice(1); 172 | 173 | // prepare matrix A 174 | alloc_matrix(&Ah2, &Ad2, L, M); 175 | init_matrix(Ah2, L, M); 176 | cudaMemcpy(Ad2, Ah2, sizeof(float) * L * M, 177 | cudaMemcpyHostToDevice); 178 | // do it again for matrix B 179 | alloc_matrix(&Bh2, &Bd2, M, N); 180 | init_matrix(Bh2, M, N); 181 | cudaMemcpy(Bd2, Bh2, sizeof(float) * M * N, 182 | cudaMemcpyHostToDevice); 183 | // allocate spaces for matrix C 184 | alloc_matrix(&Ch2, &Cd2, L, N); 185 | 186 | cudaDeviceSynchronize(); 187 | gettimeofday(&t1, NULL); 188 | 189 | // launch matmul kernel 190 | matmul<<>>(Ad2, Bd2, Cd2, L, M, N); 192 | 193 | if (check_error("matmul")) { 194 | exit(EXIT_FAILURE); 195 | } 196 | 197 | cudaDeviceSynchronize(); 198 | gettimeofday(&t2, NULL); 199 | printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2)); 200 | 201 | // obtain the result 202 | cudaMemcpy(Ch2, Cd2, sizeof(float) * L * N, cudaMemcpyDeviceToHost); 203 | C_cpu = (float *)malloc(sizeof(float) * L * N); 204 | matmul_cpu(Ah2, Bh2, C_cpu, L, M, N); 205 | print_matrix(Ch2, L, N); 206 | printf("\n"); 207 | print_matrix(C_cpu, L, N); 208 | printf("\n"); 209 | 210 | if(compare_matrix(Ch2, C_cpu, L, N) >= 0) 211 | printf("OK\n"); 212 | else 213 | printf("ERRRRR\n"); 214 | 215 | free(C_cpu); 216 | 217 | cudaFree(Ad1); 218 | cudaFree(Bd1); 219 | cudaFree(Cd1); 220 | 221 | cudaFree(Ad2); 222 | cudaFree(Bd2); 223 | cudaFree(Cd2); 224 | 225 | return 0; 226 | } 227 | 228 | 229 | 230 | -------------------------------------------------------------------------------- /tests/progs/matmul_par.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define BS (16) 8 | #define L (16) 9 | #define M (16) 10 | #define N (16) 11 | 12 | __global__ void matmul(float *A, float *B, float *C, 13 | int l, int m, int n) 14 | { 15 | int i, j, k; 16 | float sum; 17 | 18 | i = blockIdx.y * blockDim.y + threadIdx.y; 19 | j = blockIdx.x * blockDim.x + threadIdx.x; 20 | 21 | sum = 0.0; 22 | for (k = 0; k < m; k++) { 23 | sum += A[i * m + k] * B[k * n + j]; 24 | } 25 | C[i*n+j] = sum; 26 | } 27 | 28 | __global__ void thread_matrix(float *A, 29 | int l, int n) 30 | { 31 | int i, j; 32 | 33 | i = blockIdx.y * blockDim.y + threadIdx.y; 34 | j = blockIdx.x * blockDim.x + threadIdx.x; 35 | 36 | A[i * n + j] = i * n + j; 37 | } 38 | 39 | void matmul_cpu(float *A, float *B, float *C, 40 | int l, int m, int n) 41 | { 42 | int i, j, k; 43 | for (i = 0; i < l; i++) { 44 | for (j = 0; j < n; j++) { 45 | float sum = 0.0; 46 | for (k = 0; k < m; k++) { 47 | sum += A[i * m + k] * B[k * n + j]; 48 | } 49 | C[i*n+j] = sum; 50 | } 51 | } 52 | } 53 | 54 | void print_matrix(float *A, int l, int n) 55 | { 56 | int i, j; 57 | for (i = 0; i < l; i++) { 58 | for (j = 0; j < n; j++) { 59 | printf("%f ", A[i * n + j]); 60 | } 61 | printf("\n"); 62 | } 63 | } 64 | 65 | int compare_matrix(float *A, float *B, int l, int n) 66 | { 67 | int i, j; 68 | int ret = 0; 69 | for (i = 0; i < l; i++) { 70 | for (j = 0; j < n; j++) { 71 | if(A[i * n + j] != B[i * n + j]) 72 | ret = -1; 73 | } 74 | } 75 | return ret; 76 | } 77 | 78 | void alloc_matrix(float **m_h, float **m_d, int h, int w) 79 | { 80 | *m_h = (float *)malloc(sizeof(float) * h * w); 81 | cudaMalloc((void **)m_d, sizeof(float) * h * w); 82 | } 83 | 84 | void init_matrix(float *m, int h, int w) 85 | { 86 | int i, j; 87 | for (i = 0; i < h; i++) 88 | for (j = 0; j < w; j++) 89 | m[i * w + j] = (float)(random() % 100); 90 | } 91 | 92 | int check_error(const char *err_msg) 93 | { 94 | cudaError_t err = cudaGetLastError(); 95 | if (err != cudaSuccess) { 96 | fprintf(stderr, "CUDA error: %s: %s.\n", 97 | err_msg, cudaGetErrorString(err)); 98 | return 1; 99 | } 100 | return 0; 101 | } 102 | 103 | double get_elapsed_time(struct timeval *begin, struct timeval *end) 104 | { 105 | return (end->tv_sec - begin->tv_sec) * 1000 106 | + (end->tv_usec - begin->tv_usec) / 1000.0; 107 | } 108 | 109 | int main(int argc, char *argv[]) 110 | { 111 | float *Ad, *Bd, *Cd; 112 | float *Ah, *Bh, *Ch; 113 | struct timeval t1, t2; 114 | 115 | // prepare matrix A 116 | alloc_matrix(&Ah, &Ad, L, M); 117 | init_matrix(Ah, L, M); 118 | cudaMemcpy(Ad, Ah, sizeof(float) * L * M, 119 | cudaMemcpyHostToDevice); 120 | // do it again for matrix B 121 | alloc_matrix(&Bh, &Bd, M, N); 122 | init_matrix(Bh, M, N); 123 | cudaMemcpy(Bd, Bh, sizeof(float) * M * N, 124 | cudaMemcpyHostToDevice); 125 | // allocate spaces for matrix C 126 | alloc_matrix(&Ch, &Cd, L, N); 127 | 128 | cudaDeviceSynchronize(); 129 | gettimeofday(&t1, NULL); 130 | 131 | // launch matmul kernel 132 | matmul<<>>(Ad, Bd, Cd, L, M, N); 134 | 135 | if (check_error("matmul")) { 136 | exit(EXIT_FAILURE); 137 | } 138 | 139 | cudaDeviceSynchronize(); 140 | gettimeofday(&t2, NULL); 141 | printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2)); 142 | 143 | // obtain the result 144 | cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost); 145 | float *C_cpu = (float *)malloc(sizeof(float) * L * N); 146 | matmul_cpu(Ah, Bh, C_cpu, L, M, N); 147 | print_matrix(Ch, L, N); 148 | printf("\n"); 149 | print_matrix(C_cpu, L, N); 150 | printf("\n"); 151 | 152 | if(compare_matrix(Ch, C_cpu, L, N) >= 0) 153 | printf("OK\n"); 154 | else 155 | printf("ERRRRR\n"); 156 | 157 | /* Switch to native */ 158 | /*cudaMalloc(NULL, 0); 159 | printf("Switched to native.....\n"); 160 | printf("Press enter to continue...\n"); 161 | getchar();*/ 162 | 163 | /*thread_matrix<<>>(Cd, L, N); 165 | cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost); 166 | print_matrix(Ch, L, N); 167 | printf("\n");*/ 168 | 169 | int i; 170 | for(i = 0; i < 10; i++) 171 | { 172 | if(i == 3) 173 | { 174 | cudaMalloc(NULL, 0); 175 | printf("Switched to native.....\n"); 176 | printf("Press enter to continue...\n"); 177 | getchar(); 178 | } 179 | matmul<<>>(Ad, Bd, Cd, L, M, N); 181 | cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost); 182 | print_matrix(Ch, L, N); 183 | printf("\n"); 184 | print_matrix(C_cpu, L, N); 185 | printf("\n"); 186 | if(compare_matrix(Ch, C_cpu, L, N) >= 0) 187 | printf("OK\n"); 188 | else 189 | printf("ERRRRR\n"); 190 | } 191 | 192 | free(C_cpu); 193 | 194 | cudaFree(Ad); 195 | cudaFree(Bd); 196 | cudaFree(Cd); 197 | 198 | return 0; 199 | } 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /tests/progs/memcpybw.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define CUDA_SAFE_CALL(func) \ 7 | { \ 8 | if ((func) != cudaSuccess ) { \ 9 | fprintf(stderr, "ERROR\n"); \ 10 | exit(EXIT_FAILURE); \ 11 | } \ 12 | } 13 | 14 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end) 15 | { 16 | return (end->tv_sec - begin->tv_sec) * 1000 17 | + (end->tv_usec - begin->tv_usec) / 1000.0; 18 | } 19 | 20 | __global__ 21 | void null() 22 | { 23 | } 24 | 25 | int main(int argc, char *argv[]) 26 | { 27 | int i = 0; 28 | struct timeval t1, t2; 29 | dim3 dimBlock( 1, 1 ); 30 | dim3 dimGrid( 1, 1 ); 31 | char *pDev, *pHost; 32 | char *endpoint; 33 | size_t memsize; 34 | int num; 35 | size_t secSize; 36 | 37 | if (argc < 3) { 38 | fprintf(stderr, "prog memsize num\n"); 39 | exit(EXIT_FAILURE); 40 | } 41 | 42 | memsize = strtol(argv[1], &endpoint, 10); 43 | if (*endpoint != '\0') { 44 | fprintf(stderr, "memsize has to be long int.\n"); 45 | exit(EXIT_FAILURE); 46 | } 47 | 48 | num = (int)strtol(argv[2], &endpoint, 10); 49 | if (*endpoint != '\0') { 50 | fprintf(stderr, "num has to be int.\n"); 51 | exit(EXIT_FAILURE); 52 | } 53 | 54 | secSize = memsize / num; 55 | 56 | /* Initialize phase to force migration */ 57 | if ((pHost = (char *)malloc(sizeof(char) * memsize)) == NULL) { 58 | perror("MALLOC ERROR:"); 59 | exit(EXIT_FAILURE); 60 | } 61 | 62 | CUDA_SAFE_CALL(cudaMalloc(&pDev, sizeof(char) * secSize)); 63 | CUDA_SAFE_CALL(cudaMemcpy(pDev, pHost, sizeof(char) * secSize, cudaMemcpyHostToDevice)); 64 | CUDA_SAFE_CALL(cudaFree(pDev)); 65 | gettimeofday(&t1, NULL); 66 | for (i = 0; i < num; i++) { 67 | CUDA_SAFE_CALL(cudaMalloc(&pDev, sizeof(char) * secSize)); 68 | CUDA_SAFE_CALL(cudaMemcpy(pDev, pHost, sizeof(char) * secSize, cudaMemcpyHostToDevice)); 69 | } 70 | gettimeofday(&t2, NULL); 71 | printf("Elapsed Time: %f\n", get_elapsed_time(&t1, &t2)); 72 | while (i < 2000) { 73 | null<<>>(); 74 | i++; 75 | } 76 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 77 | return EXIT_SUCCESS; 78 | } 79 | 80 | -------------------------------------------------------------------------------- /tests/progs/multigpuaddr.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define CUDA_SAFE_CALL(x) \ 5 | { \ 6 | if ((x) != cudaSuccess) { \ 7 | fprintf(stderr, "Error!"); \ 8 | exit(EXIT_FAILURE); \ 9 | } \ 10 | } 11 | 12 | int main() 13 | { 14 | float *a, *b; 15 | CUDA_SAFE_CALL(cudaSetDevice(0)); 16 | CUDA_SAFE_CALL(cudaMalloc(&a, sizeof(float))); 17 | CUDA_SAFE_CALL(cudaSetDevice(1)); 18 | CUDA_SAFE_CALL(cudaMalloc(&b, sizeof(float))); 19 | printf("a on device 0 is %p\n", a); 20 | printf("b on device 1 is %p\n", b); 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /tests/progs/nullker.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CUDA_SAFE_CALL(func) \ 6 | { \ 7 | if ((func) != cudaSuccess ) { \ 8 | fprintf(stderr, "ERROR\n"); \ 9 | exit(EXIT_FAILURE); \ 10 | } \ 11 | } 12 | 13 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end) 14 | { 15 | return (end->tv_sec - begin->tv_sec) * 1000 16 | + (end->tv_usec - begin->tv_usec) / 1000.0; 17 | } 18 | 19 | __global__ 20 | void null() 21 | { 22 | } 23 | 24 | int main() 25 | { 26 | int i = 0; 27 | struct timeval t1, t2; 28 | dim3 dimBlock( 1, 1 ); 29 | dim3 dimGrid( 1, 1 ); 30 | 31 | /* Initialize phase to force migration */ 32 | CUDA_SAFE_CALL(cudaSetDevice(0)); 33 | while (i < 20) { 34 | null<<>>(); 35 | i++; 36 | } 37 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 38 | CUDA_SAFE_CALL(cudaSetDevice(1)); 39 | i = 0; 40 | while (i < 20) { 41 | null<<>>(); 42 | i++; 43 | } 44 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 45 | 46 | //CUDA_SAFE_CALL(cudaSetDevice(0)); 47 | /* mhelper benchmark phase */ 48 | for (int iter = 0; iter < 15; iter++) { 49 | int j = (1 << (10 + iter)) - 1; 50 | i = 0; 51 | gettimeofday(&t1, NULL); 52 | while (i < j) { 53 | null<<>>(); 54 | i++; 55 | } 56 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 57 | gettimeofday(&t2, NULL); 58 | printf("%d %f\n", j + 1, get_elapsed_time(&t1, &t2)); 59 | } 60 | 61 | return EXIT_SUCCESS; 62 | } 63 | -------------------------------------------------------------------------------- /tests/progs/thread_dev.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void *thread_main2(void *opaque) 8 | { 9 | int *devMem; 10 | int device; 11 | pid_t pid; 12 | pid = syscall(SYS_gettid); 13 | printf("Thread 2: thread %d\n", pid); 14 | cudaGetDevice(&device); 15 | printf("Thread 2: Device %d\n", device); 16 | cudaSetDevice(0); 17 | cudaMalloc(&devMem, sizeof(int) * 16); 18 | printf("Thread 2: Addr %p\n", devMem); 19 | cudaGetDevice(&device); 20 | printf("Thread 2: Device %d\n", device); 21 | return NULL; 22 | } 23 | 24 | void *thread_main1(void *opaque) 25 | { 26 | int *devMem; 27 | int device; 28 | pthread_t t; 29 | pid_t pid; 30 | pid = syscall(SYS_gettid); 31 | printf("Thread 1: thread %d\n", pid); 32 | cudaGetDevice(&device); 33 | printf("Thread 1: Device %d\n", device); 34 | cudaSetDevice(1); 35 | cudaMalloc(&devMem, sizeof(int) * 16); 36 | printf("Thread 1: Addr %p\n", devMem); 37 | cudaGetDevice(&device); 38 | printf("Thread 1: Device %d\n", device); 39 | pthread_create(&t, NULL, thread_main2, NULL); 40 | cudaGetDevice(&device); 41 | printf("Thread 1: Device %d\n", device); 42 | pthread_join(t, NULL); 43 | cudaGetDevice(&device); 44 | printf("Thread 1: Device %d\n", device); 45 | return NULL; 46 | } 47 | 48 | int main() 49 | { 50 | int *devMem; 51 | int device; 52 | pthread_t t; 53 | pid_t pid; 54 | pid = syscall(SYS_gettid); 55 | printf("Main: thread %d\n", pid); 56 | cudaSetDevice(0); 57 | cudaMalloc(&devMem, sizeof(int) * 32); 58 | printf("Main: Addr %p\n", devMem); 59 | pthread_create(&t, NULL, thread_main1, NULL); 60 | cudaGetDevice(&device); 61 | printf("Main: Device %d\n", device); 62 | pthread_join(t, NULL); 63 | cudaGetDevice(&device); 64 | printf("Main: Device %d\n", device); 65 | return 0; 66 | } 67 | --------------------------------------------------------------------------------