├── .gitignore
├── AUTHORS
├── COPYING
├── ChangeLog
├── INSTALL
├── Makefile.am
├── Makefile.in
├── NEWS
├── README
├── aclocal.m4
├── build-aux
    ├── ar-lib
    ├── compile
    ├── config.guess
    ├── config.sub
    ├── depcomp
    ├── install-sh
    ├── libtool.m4
    ├── ltmain.sh
    ├── ltoptions.m4
    ├── ltsugar.m4
    ├── ltversion.m4
    ├── lt~obsolete.m4
    ├── missing
    ├── pkg.m4
    └── test-driver
├── config.h.in
├── configure
├── configure.ac
├── notes
    └── func-list.txt
├── results
    ├── memcpybw-memsync.out
    ├── memcpybw-mhelper.out
    └── nullker-mhelper.out
├── scripts
    ├── Makefile.am
    ├── Makefile.in
    ├── lib64
    │   ├── libcudart.so
    │   ├── libcudart.so.6.5
    │   ├── rCUDAcommIB.so
    │   └── rCUDAcommTCP.so
    ├── mrcudaexec.py.template
    └── plotters
    │   └── overhead.py
├── src
    ├── Makefile.am
    ├── Makefile.in
    ├── comm.c
    ├── comm.h
    ├── common.h
    ├── datatypes.h
    ├── intercomm.c
    ├── intercomm.h
    ├── intercomm_interface.c
    ├── intercomm_interface.h
    ├── intercomm_mem.c
    ├── intercomm_mem.h
    ├── interface.c
    ├── mhelper.c
    ├── mrcuda.c
    ├── mrcuda.h
    ├── record.c
    └── record.h
└── tests
    ├── Makefile.am
    ├── Makefile.in
    ├── check_comm.c
    ├── check_record.c
    └── progs
        ├── benchmark.memcpybw.sh
        ├── benchmark.nullker.cudamemcpy.sh
        ├── cudamemcpy.cu
        ├── gpuaddr.cu
        ├── hello.cu
        ├── hellomul.cu
        ├── matmul_mul.cu
        ├── matmul_par.cu
        ├── memcpybw.cu
        ├── multigpuaddr.cu
        ├── nullker.cu
        └── thread_dev.cu


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.swp
3 | *.swo
4 | src/libcudart.so.5.0
5 | lib64/*
6 | build/*
7 | autom4te.cache
8 | *~
9 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Pak Markthub <markthub.p.aa@m.titech.ac.jp> the creator of this project.
2 | 


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
 1 | 2016-09-28  Pak Markthub <markthub.p.aa@m.titech.ac.jp>
 2 | 
 3 | * all git log before the creation of this ChangeLog
 4 | c617dd9 (HEAD -> installation, origin/installation) Create libcudart.so.7.0 symlink when installing mrCUDA
 5 | c4ae1ca Regenerate all make and configuration scripts on Paris, hopefully it will work fine with other systems
 6 | 1cdc3ea Add missing files
 7 | 6b6cf38 Modify related files in the installation process
 8 | c01849a Add missing files necessary for configure and make
 9 | f1508e4 Add the missing config.h.in
10 | 0bcd211 Add the missing aclocal.m4
11 | c45bd5a Make the generation of the linked filenames of rCUDAcomm*.so more generic
12 | cdf5ed4 Create links to rCUDAcommIB.so and rCUDAcommTCP.so when installing mrCUDA
13 | cb954e8 Make now auto-generates correct mrcudaexec
14 | 77893c0 Check for python2.7 in configure
15 | d396f9d Add options for manually specifying NVIDIA's libcudart and nvcc
16 | 0513643 Detect the installation path of rCUDA's libcudart.so, rCUDAcommIB.so, and rCUDAcommTCP.so
17 | 332f4c9 Use absolute path for nvcc after checking
18 | b838985 Add checking for nvcc
19 | cf9a96f Create configure and its supported files
20 | 3913e41 (origin/multi-gpu, origin/master, origin/HEAD, github/master, multi-gpu, master) Change the labels of memsync plotter.
21 | 7e799ad Modify code so that it can run on Paris and matrixMul and vectorAdd can use mrCUDA
22 | aee10f1 Change the font size of some figures
23 | 6bc4b07 Add benchmark scripts and programs for mrCUDA's overhead
24 | 4bda4d0 Add manual profiling
25 | bcc42b6 Change many labels' sizes
26 | b2ff7c0 Add plot_record_replay to the overhead.py
27 | df634e0 Add plot_mhelper_memcpybw to the overhead.py
28 | ab7707e Add plot_mhelper_nullker to overhead.py
29 | 151283b Get multi-GPU migration benchmark's results
30 | 23dba8e Change the legend size in memsync-bw plot
31 | 801d4a4 Implement memsync-bw plot in the overhead.py
32 | 87ed363 Get memcpybw-memsync benchmark result
33 | 4fa2a15 Add a plotter overhead.py
34 | 2beb2e4 Remove cudaMemcpy and cudaMemcpyToSymbol profiling
35 | 1709ea6 Add manual profiling
36 | a7f915a Fix mhelper does not exit when the main program exited
37 | 342930e Fix cudaLaunch error bugs
38 | 64ecbcb Fix cudaMemcpy bugs in mhelper
39 | 544ee52 Fix mhelper does not set device bugs
40 | 8c79e14 Fix mhelper communication bugs
41 | 5473461 Fix deadlock in cudaSetDevice
42 | d7a6ad7 Implement mhelper.c
43 | 2ee9d72 Implement intercomm_interface.c
44 | 9ab3d67 Implement some interfaces in intercomm_interface
45 | f5f261e Fix runtime error when using switching for single GPU case
46 | aa2abf4 Fix runtime errors when using only rCUDA or native
47 | eddf55e Fix compliation errors
48 | 32bb9b7 Refactor code to support multi-gpu migration
49 | b40b53a Implement __cudaRegisterFatBinary in mhelper
50 | 5cab822 Partially implement mhelper
51 | fff4bf4 Implement intercomm.c
52 | b1dff8a Partially implement intercomm module
53 | 7c32128 Implement intercomm_mem
54 | 3263bce Roughly define data structures and functions
55 | 5274e9e Merge branch 'rcuda-5.0'
56 | de3d5b2 (origin/rcuda-5.0) Implement multi-GPU matmul
57 | 6b13153 Fix multiple reports of the total sizes of cudaMemcpy and cudaMemcpyToSymbol
58 | 65c10b3 Add cudaMemcpy and cudaMemcpyToSymbol profiling
59 | facce7e Add mrcuda_record time
60 | fde0ad0 Remove cudaMemcpyToSymbol replay and use sync symbol instead
61 | 50bdf0a Include mrcuda_replay_cudaMemcpyToSymbol to mrcuda_sync_mem profile
62 | 0aa19c9 Implement manual profiling
63 | 325623b Add mrcudaRecordCache
64 | 953ae29 Implement mrCUDAExec
65 | 7db0609 Add MRCUDA_SWITCH_THRESHOLD support for testing purpose
66 | c7ddadf Add cudaSetDeviceFlags support
67 | a1e7164 mrCUDA works with LAMMPS
68 | 2a765b3 Fix sync_memory bugs
69 | c697d6a Unknown test code
70 | 693042c Hard-code mrcuda_switching when num cudaLaunch reach a certain number
71 | cac9952 Fix segmentation-fault bug
72 | 03e9535 Fix compile-error bugs
73 | b9170c3 Connect modules together
74 | 94cfc0f Partially implement record and replay functions
75 | 2fa6ed4 Partially implement recording system
76 | c0fdd22 Implement and test comm.c
77 | 6335192 Use autotools as the make system
78 | 6bb38d7 Partially implement communication module.
79 | 7146ceb Implement function interfaces that are needed for LAMMPS
80 | c9e90a6 Implement mrcuda_init and mrcuda_fini
81 | 1123ca9 Partially define some headers
82 | 2c0e9f7 Change directory structure
83 | 92fdc95 Modify matmul_par.cu to make successful migratio more obvious
84 | 6ca6d5b Finish implementing rCUDA to native migration mini prototype
85 | 40db4b4 Successfully run hellowolrd concurrently on rCUDA and native
86 | d116474 Create an example of libcudart's hook
87 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
  1 | Installation Instructions
  2 | *************************
  3 | 
  4 | Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
  5 | Inc.
  6 | 
  7 |    Copying and distribution of this file, with or without modification,
  8 | are permitted in any medium without royalty provided the copyright
  9 | notice and this notice are preserved.  This file is offered as-is,
 10 | without warranty of any kind.
 11 | 
 12 | Basic Installation
 13 | ==================
 14 | 
 15 |    Briefly, the shell commands `./configure; make; make install' should
 16 | configure, build, and install this package.  The following
 17 | more-detailed instructions are generic; see the `README' file for
 18 | instructions specific to this package.  Some packages provide this
 19 | `INSTALL' file but do not implement all of the features documented
 20 | below.  The lack of an optional feature in a given package is not
 21 | necessarily a bug.  More recommendations for GNU packages can be found
 22 | in *note Makefile Conventions: (standards)Makefile Conventions.
 23 | 
 24 |    The `configure' shell script attempts to guess correct values for
 25 | various system-dependent variables used during compilation.  It uses
 26 | those values to create a `Makefile' in each directory of the package.
 27 | It may also create one or more `.h' files containing system-dependent
 28 | definitions.  Finally, it creates a shell script `config.status' that
 29 | you can run in the future to recreate the current configuration, and a
 30 | file `config.log' containing compiler output (useful mainly for
 31 | debugging `configure').
 32 | 
 33 |    It can also use an optional file (typically called `config.cache'
 34 | and enabled with `--cache-file=config.cache' or simply `-C') that saves
 35 | the results of its tests to speed up reconfiguring.  Caching is
 36 | disabled by default to prevent problems with accidental use of stale
 37 | cache files.
 38 | 
 39 |    If you need to do unusual things to compile the package, please try
 40 | to figure out how `configure' could check whether to do them, and mail
 41 | diffs or instructions to the address given in the `README' so they can
 42 | be considered for the next release.  If you are using the cache, and at
 43 | some point `config.cache' contains results you don't want to keep, you
 44 | may remove or edit it.
 45 | 
 46 |    The file `configure.ac' (or `configure.in') is used to create
 47 | `configure' by a program called `autoconf'.  You need `configure.ac' if
 48 | you want to change it or regenerate `configure' using a newer version
 49 | of `autoconf'.
 50 | 
 51 |    The simplest way to compile this package is:
 52 | 
 53 |   1. `cd' to the directory containing the package's source code and type
 54 |      `./configure' to configure the package for your system.
 55 | 
 56 |      Running `configure' might take a while.  While running, it prints
 57 |      some messages telling which features it is checking for.
 58 | 
 59 |   2. Type `make' to compile the package.
 60 | 
 61 |   3. Optionally, type `make check' to run any self-tests that come with
 62 |      the package, generally using the just-built uninstalled binaries.
 63 | 
 64 |   4. Type `make install' to install the programs and any data files and
 65 |      documentation.  When installing into a prefix owned by root, it is
 66 |      recommended that the package be configured and built as a regular
 67 |      user, and only the `make install' phase executed with root
 68 |      privileges.
 69 | 
 70 |   5. Optionally, type `make installcheck' to repeat any self-tests, but
 71 |      this time using the binaries in their final installed location.
 72 |      This target does not install anything.  Running this target as a
 73 |      regular user, particularly if the prior `make install' required
 74 |      root privileges, verifies that the installation completed
 75 |      correctly.
 76 | 
 77 |   6. You can remove the program binaries and object files from the
 78 |      source code directory by typing `make clean'.  To also remove the
 79 |      files that `configure' created (so you can compile the package for
 80 |      a different kind of computer), type `make distclean'.  There is
 81 |      also a `make maintainer-clean' target, but that is intended mainly
 82 |      for the package's developers.  If you use it, you may have to get
 83 |      all sorts of other programs in order to regenerate files that came
 84 |      with the distribution.
 85 | 
 86 |   7. Often, you can also type `make uninstall' to remove the installed
 87 |      files again.  In practice, not all packages have tested that
 88 |      uninstallation works correctly, even though it is required by the
 89 |      GNU Coding Standards.
 90 | 
 91 |   8. Some packages, particularly those that use Automake, provide `make
 92 |      distcheck', which can by used by developers to test that all other
 93 |      targets like `make install' and `make uninstall' work correctly.
 94 |      This target is generally not run by end users.
 95 | 
 96 | Compilers and Options
 97 | =====================
 98 | 
 99 |    Some systems require unusual options for compilation or linking that
100 | the `configure' script does not know about.  Run `./configure --help'
101 | for details on some of the pertinent environment variables.
102 | 
103 |    You can give `configure' initial values for configuration parameters
104 | by setting variables in the command line or in the environment.  Here
105 | is an example:
106 | 
107 |      ./configure CC=c99 CFLAGS=-g LIBS=-lposix
108 | 
109 |    *Note Defining Variables::, for more details.
110 | 
111 | Compiling For Multiple Architectures
112 | ====================================
113 | 
114 |    You can compile the package for more than one kind of computer at the
115 | same time, by placing the object files for each architecture in their
116 | own directory.  To do this, you can use GNU `make'.  `cd' to the
117 | directory where you want the object files and executables to go and run
118 | the `configure' script.  `configure' automatically checks for the
119 | source code in the directory that `configure' is in and in `..'.  This
120 | is known as a "VPATH" build.
121 | 
122 |    With a non-GNU `make', it is safer to compile the package for one
123 | architecture at a time in the source code directory.  After you have
124 | installed the package for one architecture, use `make distclean' before
125 | reconfiguring for another architecture.
126 | 
127 |    On MacOS X 10.5 and later systems, you can create libraries and
128 | executables that work on multiple system types--known as "fat" or
129 | "universal" binaries--by specifying multiple `-arch' options to the
130 | compiler but only a single `-arch' option to the preprocessor.  Like
131 | this:
132 | 
133 |      ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
134 |                  CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
135 |                  CPP="gcc -E" CXXCPP="g++ -E"
136 | 
137 |    This is not guaranteed to produce working output in all cases, you
138 | may have to build one architecture at a time and combine the results
139 | using the `lipo' tool if you have problems.
140 | 
141 | Installation Names
142 | ==================
143 | 
144 |    By default, `make install' installs the package's commands under
145 | `/usr/local/bin', include files under `/usr/local/include', etc.  You
146 | can specify an installation prefix other than `/usr/local' by giving
147 | `configure' the option `--prefix=PREFIX', where PREFIX must be an
148 | absolute file name.
149 | 
150 |    You can specify separate installation prefixes for
151 | architecture-specific files and architecture-independent files.  If you
152 | pass the option `--exec-prefix=PREFIX' to `configure', the package uses
153 | PREFIX as the prefix for installing programs and libraries.
154 | Documentation and other data files still use the regular prefix.
155 | 
156 |    In addition, if you use an unusual directory layout you can give
157 | options like `--bindir=DIR' to specify different values for particular
158 | kinds of files.  Run `configure --help' for a list of the directories
159 | you can set and what kinds of files go in them.  In general, the
160 | default for these options is expressed in terms of `${prefix}', so that
161 | specifying just `--prefix' will affect all of the other directory
162 | specifications that were not explicitly provided.
163 | 
164 |    The most portable way to affect installation locations is to pass the
165 | correct locations to `configure'; however, many packages provide one or
166 | both of the following shortcuts of passing variable assignments to the
167 | `make install' command line to change installation locations without
168 | having to reconfigure or recompile.
169 | 
170 |    The first method involves providing an override variable for each
171 | affected directory.  For example, `make install
172 | prefix=/alternate/directory' will choose an alternate location for all
173 | directory configuration variables that were expressed in terms of
174 | `${prefix}'.  Any directories that were specified during `configure',
175 | but not in terms of `${prefix}', must each be overridden at install
176 | time for the entire installation to be relocated.  The approach of
177 | makefile variable overrides for each directory variable is required by
178 | the GNU Coding Standards, and ideally causes no recompilation.
179 | However, some platforms have known limitations with the semantics of
180 | shared libraries that end up requiring recompilation when using this
181 | method, particularly noticeable in packages that use GNU Libtool.
182 | 
183 |    The second method involves providing the `DESTDIR' variable.  For
184 | example, `make install DESTDIR=/alternate/directory' will prepend
185 | `/alternate/directory' before all installation names.  The approach of
186 | `DESTDIR' overrides is not required by the GNU Coding Standards, and
187 | does not work on platforms that have drive letters.  On the other hand,
188 | it does better at avoiding recompilation issues, and works well even
189 | when some directory options were not specified in terms of `${prefix}'
190 | at `configure' time.
191 | 
192 | Optional Features
193 | =================
194 | 
195 |    If the package supports it, you can cause programs to be installed
196 | with an extra prefix or suffix on their names by giving `configure' the
197 | option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
198 | 
199 |    Some packages pay attention to `--enable-FEATURE' options to
200 | `configure', where FEATURE indicates an optional part of the package.
201 | They may also pay attention to `--with-PACKAGE' options, where PACKAGE
202 | is something like `gnu-as' or `x' (for the X Window System).  The
203 | `README' should mention any `--enable-' and `--with-' options that the
204 | package recognizes.
205 | 
206 |    For packages that use the X Window System, `configure' can usually
207 | find the X include and library files automatically, but if it doesn't,
208 | you can use the `configure' options `--x-includes=DIR' and
209 | `--x-libraries=DIR' to specify their locations.
210 | 
211 |    Some packages offer the ability to configure how verbose the
212 | execution of `make' will be.  For these packages, running `./configure
213 | --enable-silent-rules' sets the default to minimal output, which can be
214 | overridden with `make V=1'; while running `./configure
215 | --disable-silent-rules' sets the default to verbose, which can be
216 | overridden with `make V=0'.
217 | 
218 | Particular systems
219 | ==================
220 | 
221 |    On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
222 | CC is not installed, it is recommended to use the following options in
223 | order to use an ANSI C compiler:
224 | 
225 |      ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
226 | 
227 | and if that doesn't work, install pre-built binaries of GCC for HP-UX.
228 | 
229 |    HP-UX `make' updates targets which have the same time stamps as
230 | their prerequisites, which makes it generally unusable when shipped
231 | generated files such as `configure' are involved.  Use GNU `make'
232 | instead.
233 | 
234 |    On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
235 | parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
236 | a workaround.  If GNU CC is not installed, it is therefore recommended
237 | to try
238 | 
239 |      ./configure CC="cc"
240 | 
241 | and if that doesn't work, try
242 | 
243 |      ./configure CC="cc -nodtk"
244 | 
245 |    On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
246 | directory contains several dysfunctional programs; working variants of
247 | these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
248 | in your `PATH', put it _after_ `/usr/bin'.
249 | 
250 |    On Haiku, software installed for all users goes in `/boot/common',
251 | not `/usr/local'.  It is recommended to use the following options:
252 | 
253 |      ./configure --prefix=/boot/common
254 | 
255 | Specifying the System Type
256 | ==========================
257 | 
258 |    There may be some features `configure' cannot figure out
259 | automatically, but needs to determine by the type of machine the package
260 | will run on.  Usually, assuming the package is built to be run on the
261 | _same_ architectures, `configure' can figure that out, but if it prints
262 | a message saying it cannot guess the machine type, give it the
263 | `--build=TYPE' option.  TYPE can either be a short name for the system
264 | type, such as `sun4', or a canonical name which has the form:
265 | 
266 |      CPU-COMPANY-SYSTEM
267 | 
268 | where SYSTEM can have one of these forms:
269 | 
270 |      OS
271 |      KERNEL-OS
272 | 
273 |    See the file `config.sub' for the possible values of each field.  If
274 | `config.sub' isn't included in this package, then this package doesn't
275 | need to know the machine type.
276 | 
277 |    If you are _building_ compiler tools for cross-compiling, you should
278 | use the option `--target=TYPE' to select the type of system they will
279 | produce code for.
280 | 
281 |    If you want to _use_ a cross compiler, that generates code for a
282 | platform different from the build platform, you should specify the
283 | "host" platform (i.e., that on which the generated programs will
284 | eventually be run) with `--host=TYPE'.
285 | 
286 | Sharing Defaults
287 | ================
288 | 
289 |    If you want to set default values for `configure' scripts to share,
290 | you can create a site shell script called `config.site' that gives
291 | default values for variables like `CC', `cache_file', and `prefix'.
292 | `configure' looks for `PREFIX/share/config.site' if it exists, then
293 | `PREFIX/etc/config.site' if it exists.  Or, you can set the
294 | `CONFIG_SITE' environment variable to the location of the site script.
295 | A warning: not all `configure' scripts look for a site script.
296 | 
297 | Defining Variables
298 | ==================
299 | 
300 |    Variables not defined in a site shell script can be set in the
301 | environment passed to `configure'.  However, some packages may run
302 | configure again during the build, and the customized values of these
303 | variables may be lost.  In order to avoid this problem, you should set
304 | them in the `configure' command line, using `VAR=value'.  For example:
305 | 
306 |      ./configure CC=/usr/local2/bin/gcc
307 | 
308 | causes the specified `gcc' to be used as the C compiler (unless it is
309 | overridden in the site shell script).
310 | 
311 | Unfortunately, this technique does not work for `CONFIG_SHELL' due to
312 | an Autoconf limitation.  Until the limitation is lifted, you can use
313 | this workaround:
314 | 
315 |      CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
316 | 
317 | `configure' Invocation
318 | ======================
319 | 
320 |    `configure' recognizes the following options to control how it
321 | operates.
322 | 
323 | `--help'
324 | `-h'
325 |      Print a summary of all of the options to `configure', and exit.
326 | 
327 | `--help=short'
328 | `--help=recursive'
329 |      Print a summary of the options unique to this package's
330 |      `configure', and exit.  The `short' variant lists options used
331 |      only in the top level, while the `recursive' variant lists options
332 |      also present in any nested packages.
333 | 
334 | `--version'
335 | `-V'
336 |      Print the version of Autoconf used to generate the `configure'
337 |      script, and exit.
338 | 
339 | `--cache-file=FILE'
340 |      Enable the cache: use and save the results of the tests in FILE,
341 |      traditionally `config.cache'.  FILE defaults to `/dev/null' to
342 |      disable caching.
343 | 
344 | `--config-cache'
345 | `-C'
346 |      Alias for `--cache-file=config.cache'.
347 | 
348 | `--quiet'
349 | `--silent'
350 | `-q'
351 |      Do not print messages saying which checks are being made.  To
352 |      suppress all normal output, redirect it to `/dev/null' (any error
353 |      messages will still be shown).
354 | 
355 | `--srcdir=DIR'
356 |      Look for the package's source code in directory DIR.  Usually
357 |      `configure' can determine that directory automatically.
358 | 
359 | `--prefix=DIR'
360 |      Use DIR as the installation prefix.  *note Installation Names::
361 |      for more details, including other options available for fine-tuning
362 |      the installation locations.
363 | 
364 | `--no-create'
365 | `-n'
366 |      Run the configure checks, but stop before creating any output
367 |      files.
368 | 
369 | `configure' also accepts some other, not widely useful, options.  Run
370 | `configure --help' for more details.
371 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
1 | ACLOCAL_AMFLAGS = -I build-aux
2 | SUBDIRS = src . tests scripts
3 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | 2016-09-28  Pak Markthub <markthub.p.aa@m.titech.ac.jp>
2 | * This is the first alpha release version.
3 | * mrCUDA supports multi-GPU remote-to-local GPU migration.
4 | * Only a subset of CUDA Runtime APIs v7.0 are supported, but at least they are enough for LAMMPS to run without problem.
5 | 
6 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | mrCUDA: Migratable rCUDA
 2 | 
 3 | What is it?
 4 | ===========
 5 | 
 6 | mrCUDA is an extension of rCUDA (http://rcuda.net), which aims at enabling
 7 | remote-to-local GPU migration. We develop this project in order to solve the
 8 | performance problems caused by remote GPU communication: overhead from rCUDA,
 9 | and network congestion. By using mrCUDA, a user can migrate execution on a
10 | remote GPU to a local GPU when one becomes available. mrCUDA works seamlessly
11 | with rCUDA and programs that use CUDA Runtime API. There is no need to recompile
12 | the program in order to use mrCUDA. More information regarding mrCUDA can be
13 | found in:
14 | 
15 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Serving More GPU Jobs, with
16 |     Low Penalty, using Remote GPU Execution and Migration." IEEE Cluster 2016.
17 | 
18 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Finishing GPU Jobs
19 |     running on a Multi-GPU Batch-Queue Node-Sharing System Earlier with Remote
20 |     GPU Execution and Migration." ISC2016.
21 | 
22 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Reducing Remote GPU
23 |     Execution's Overhead with mrCUDA." GTC2016.
24 | 
25 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "Serving More GPU Jobs
26 |     in Multi-GPU Batch-Queue Systems using Remote GPU Execution and Migration
27 |     (Unrefereed Workshop manuscript)." IPSJ SIG Notes 2016-HPC-153.
28 | 
29 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: Low-Overhead
30 |     Middleware for Transparently Migrating CUDA Execution from Remote to Local
31 |     GPUs." SC15.
32 | 
33 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: Low-Overhead
34 |     Middleware for Transparently Migrating CUDA Execution from Remote to Local
35 |     GPUs." GTC Japan 2015.
36 | 
37 | *   Pak Markthub, Akihiro Nomura, and Satoshi Matsuoka. "mrCUDA: A middleware
38 |     for migrating rCUDA virtual GPUs to native GPUs (Unrefereed Workshop
39 |     manuscript)." IPSJ SIG Notes 2015-HPC-150 (SWoPP2015).
40 | 
41 | Installation
42 | ============
43 | 
44 | Prerequisites
45 | -------------
46 | 
47 | - check
48 | - CUDA7.0
49 | - glibc-2.0 
50 | - Python2.7
51 | - rCUDAv15.07
52 | 
53 | How to install
54 | --------------
55 | 
56 | mkdir build
57 | cd build
58 | ../configure --prefix=~/mrCUDA-bin --with-rcuda=<path-to-rcuda-top-most-directory>
59 | make
60 | make install
61 | 
62 | Note: We recommend you to specify --prefix because mrCUDA creates its own
63 | libcudart.so that might conflict with the installed libcudart.so from NVIDIA on
64 | your system.
65 | 
66 | How to use?
67 | ===========
68 | 
69 | 1. Make sure your program works with rCUDAv15.07.
70 | 2. Start rCUDAd on a node.
71 | 3. Go to mrCUDA's installed directory.
72 | 4. cd bin
73 | 5. ./mrcudaexec -s <rCUDAd-IP-Address> -t <communication-type IB or TCP> \
74 |    --switch-threshold=<number> -- <your-program> <your-program-arguments>
75 | 
76 | Notes: 
77 | 1. By specifying --switch-threshold, mrCUDA will automatically migrate execution
78 |    when it encounters 'cudaLaunch' more than the specified number. This is helpful
79 |    for testing mrCUDA's migration functionality.
80 | 
81 | 2. In future release, mrCUDA will create a UNIX socket that you can send a
82 |    migration command in to start GPU migration.
83 | 
84 | Acknowledgement
85 | ===============
86 | 
87 | This research was supported by JST, CREST (Research Area: Advanced Core
88 | Technologies for Big Data Integration).
89 | 
90 | 


--------------------------------------------------------------------------------
/build-aux/ar-lib:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # Wrapper for Microsoft lib.exe
  3 | 
  4 | me=ar-lib
  5 | scriptversion=2012-03-01.08; # UTC
  6 | 
  7 | # Copyright (C) 2010-2013 Free Software Foundation, Inc.
  8 | # Written by Peter Rosin <peda@lysator.liu.se>.
  9 | #
 10 | # This program is free software; you can redistribute it and/or modify
 11 | # it under the terms of the GNU General Public License as published by
 12 | # the Free Software Foundation; either version 2, or (at your option)
 13 | # any later version.
 14 | #
 15 | # This program is distributed in the hope that it will be useful,
 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | # GNU General Public License for more details.
 19 | #
 20 | # You should have received a copy of the GNU General Public License
 21 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 22 | 
 23 | # As a special exception to the GNU General Public License, if you
 24 | # distribute this file as part of a program that contains a
 25 | # configuration script generated by Autoconf, you may include it under
 26 | # the same distribution terms that you use for the rest of that program.
 27 | 
 28 | # This file is maintained in Automake, please report
 29 | # bugs to <bug-automake@gnu.org> or send patches to
 30 | # <automake-patches@gnu.org>.
 31 | 
 32 | 
 33 | # func_error message
 34 | func_error ()
 35 | {
 36 |   echo "$me: $1" 1>&2
 37 |   exit 1
 38 | }
 39 | 
 40 | file_conv=
 41 | 
 42 | # func_file_conv build_file
 43 | # Convert a $build file to $host form and store it in $file
 44 | # Currently only supports Windows hosts.
 45 | func_file_conv ()
 46 | {
 47 |   file=$1
 48 |   case $file in
 49 |     / | /[!/]*) # absolute file, and not a UNC file
 50 |       if test -z "$file_conv"; then
 51 | 	# lazily determine how to convert abs files
 52 | 	case `uname -s` in
 53 | 	  MINGW*)
 54 | 	    file_conv=mingw
 55 | 	    ;;
 56 | 	  CYGWIN*)
 57 | 	    file_conv=cygwin
 58 | 	    ;;
 59 | 	  *)
 60 | 	    file_conv=wine
 61 | 	    ;;
 62 | 	esac
 63 |       fi
 64 |       case $file_conv in
 65 | 	mingw)
 66 | 	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
 67 | 	  ;;
 68 | 	cygwin)
 69 | 	  file=`cygpath -m "$file" || echo "$file"`
 70 | 	  ;;
 71 | 	wine)
 72 | 	  file=`winepath -w "$file" || echo "$file"`
 73 | 	  ;;
 74 |       esac
 75 |       ;;
 76 |   esac
 77 | }
 78 | 
 79 | # func_at_file at_file operation archive
 80 | # Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
 81 | # for each of them.
 82 | # When interpreting the content of the @FILE, do NOT use func_file_conv,
 83 | # since the user would need to supply preconverted file names to
 84 | # binutils ar, at least for MinGW.
 85 | func_at_file ()
 86 | {
 87 |   operation=$2
 88 |   archive=$3
 89 |   at_file_contents=`cat "$1"`
 90 |   eval set x "$at_file_contents"
 91 |   shift
 92 | 
 93 |   for member
 94 |   do
 95 |     $AR -NOLOGO $operation:"$member" "$archive" || exit $?
 96 |   done
 97 | }
 98 | 
 99 | case $1 in
100 |   '')
101 |      func_error "no command.  Try '$0 --help' for more information."
102 |      ;;
103 |   -h | --h*)
104 |     cat <<EOF
105 | Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
106 | 
107 | Members may be specified in a file named with @FILE.
108 | EOF
109 |     exit $?
110 |     ;;
111 |   -v | --v*)
112 |     echo "$me, version $scriptversion"
113 |     exit $?
114 |     ;;
115 | esac
116 | 
117 | if test $# -lt 3; then
118 |   func_error "you must specify a program, an action and an archive"
119 | fi
120 | 
121 | AR=$1
122 | shift
123 | while :
124 | do
125 |   if test $# -lt 2; then
126 |     func_error "you must specify a program, an action and an archive"
127 |   fi
128 |   case $1 in
129 |     -lib | -LIB \
130 |     | -ltcg | -LTCG \
131 |     | -machine* | -MACHINE* \
132 |     | -subsystem* | -SUBSYSTEM* \
133 |     | -verbose | -VERBOSE \
134 |     | -wx* | -WX* )
135 |       AR="$AR $1"
136 |       shift
137 |       ;;
138 |     *)
139 |       action=$1
140 |       shift
141 |       break
142 |       ;;
143 |   esac
144 | done
145 | orig_archive=$1
146 | shift
147 | func_file_conv "$orig_archive"
148 | archive=$file
149 | 
150 | # strip leading dash in $action
151 | action=${action#-}
152 | 
153 | delete=
154 | extract=
155 | list=
156 | quick=
157 | replace=
158 | index=
159 | create=
160 | 
161 | while test -n "$action"
162 | do
163 |   case $action in
164 |     d*) delete=yes  ;;
165 |     x*) extract=yes ;;
166 |     t*) list=yes    ;;
167 |     q*) quick=yes   ;;
168 |     r*) replace=yes ;;
169 |     s*) index=yes   ;;
170 |     S*)             ;; # the index is always updated implicitly
171 |     c*) create=yes  ;;
172 |     u*)             ;; # TODO: don't ignore the update modifier
173 |     v*)             ;; # TODO: don't ignore the verbose modifier
174 |     *)
175 |       func_error "unknown action specified"
176 |       ;;
177 |   esac
178 |   action=${action#?}
179 | done
180 | 
181 | case $delete$extract$list$quick$replace,$index in
182 |   yes,* | ,yes)
183 |     ;;
184 |   yesyes*)
185 |     func_error "more than one action specified"
186 |     ;;
187 |   *)
188 |     func_error "no action specified"
189 |     ;;
190 | esac
191 | 
192 | if test -n "$delete"; then
193 |   if test ! -f "$orig_archive"; then
194 |     func_error "archive not found"
195 |   fi
196 |   for member
197 |   do
198 |     case $1 in
199 |       @*)
200 |         func_at_file "${1#@}" -REMOVE "$archive"
201 |         ;;
202 |       *)
203 |         func_file_conv "$1"
204 |         $AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
205 |         ;;
206 |     esac
207 |   done
208 | 
209 | elif test -n "$extract"; then
210 |   if test ! -f "$orig_archive"; then
211 |     func_error "archive not found"
212 |   fi
213 |   if test $# -gt 0; then
214 |     for member
215 |     do
216 |       case $1 in
217 |         @*)
218 |           func_at_file "${1#@}" -EXTRACT "$archive"
219 |           ;;
220 |         *)
221 |           func_file_conv "$1"
222 |           $AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
223 |           ;;
224 |       esac
225 |     done
226 |   else
227 |     $AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
228 |     do
229 |       $AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
230 |     done
231 |   fi
232 | 
233 | elif test -n "$quick$replace"; then
234 |   if test ! -f "$orig_archive"; then
235 |     if test -z "$create"; then
236 |       echo "$me: creating $orig_archive"
237 |     fi
238 |     orig_archive=
239 |   else
240 |     orig_archive=$archive
241 |   fi
242 | 
243 |   for member
244 |   do
245 |     case $1 in
246 |     @*)
247 |       func_file_conv "${1#@}"
248 |       set x "$@" "@$file"
249 |       ;;
250 |     *)
251 |       func_file_conv "$1"
252 |       set x "$@" "$file"
253 |       ;;
254 |     esac
255 |     shift
256 |     shift
257 |   done
258 | 
259 |   if test -n "$orig_archive"; then
260 |     $AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
261 |   else
262 |     $AR -NOLOGO -OUT:"$archive" "$@" || exit $?
263 |   fi
264 | 
265 | elif test -n "$list"; then
266 |   if test ! -f "$orig_archive"; then
267 |     func_error "archive not found"
268 |   fi
269 |   $AR -NOLOGO -LIST "$archive" || exit $?
270 | fi
271 | 


--------------------------------------------------------------------------------
/build-aux/compile:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # Wrapper for compilers which do not understand '-c -o'.
  3 | 
  4 | scriptversion=2012-10-14.11; # UTC
  5 | 
  6 | # Copyright (C) 1999-2013 Free Software Foundation, Inc.
  7 | # Written by Tom Tromey <tromey@cygnus.com>.
  8 | #
  9 | # This program is free software; you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation; either version 2, or (at your option)
 12 | # any later version.
 13 | #
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | # As a special exception to the GNU General Public License, if you
 23 | # distribute this file as part of a program that contains a
 24 | # configuration script generated by Autoconf, you may include it under
 25 | # the same distribution terms that you use for the rest of that program.
 26 | 
 27 | # This file is maintained in Automake, please report
 28 | # bugs to <bug-automake@gnu.org> or send patches to
 29 | # <automake-patches@gnu.org>.
 30 | 
 31 | nl='
 32 | '
 33 | 
 34 | # We need space, tab and new line, in precisely that order.  Quoting is
 35 | # there to prevent tools from complaining about whitespace usage.
 36 | IFS=" ""	$nl"
 37 | 
 38 | file_conv=
 39 | 
 40 | # func_file_conv build_file lazy
 41 | # Convert a $build file to $host form and store it in $file
 42 | # Currently only supports Windows hosts. If the determined conversion
 43 | # type is listed in (the comma separated) LAZY, no conversion will
 44 | # take place.
 45 | func_file_conv ()
 46 | {
 47 |   file=$1
 48 |   case $file in
 49 |     / | /[!/]*) # absolute file, and not a UNC file
 50 |       if test -z "$file_conv"; then
 51 | 	# lazily determine how to convert abs files
 52 | 	case `uname -s` in
 53 | 	  MINGW*)
 54 | 	    file_conv=mingw
 55 | 	    ;;
 56 | 	  CYGWIN*)
 57 | 	    file_conv=cygwin
 58 | 	    ;;
 59 | 	  *)
 60 | 	    file_conv=wine
 61 | 	    ;;
 62 | 	esac
 63 |       fi
 64 |       case $file_conv/,$2, in
 65 | 	*,$file_conv,*)
 66 | 	  ;;
 67 | 	mingw/*)
 68 | 	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
 69 | 	  ;;
 70 | 	cygwin/*)
 71 | 	  file=`cygpath -m "$file" || echo "$file"`
 72 | 	  ;;
 73 | 	wine/*)
 74 | 	  file=`winepath -w "$file" || echo "$file"`
 75 | 	  ;;
 76 |       esac
 77 |       ;;
 78 |   esac
 79 | }
 80 | 
 81 | # func_cl_dashL linkdir
 82 | # Make cl look for libraries in LINKDIR
 83 | func_cl_dashL ()
 84 | {
 85 |   func_file_conv "$1"
 86 |   if test -z "$lib_path"; then
 87 |     lib_path=$file
 88 |   else
 89 |     lib_path="$lib_path;$file"
 90 |   fi
 91 |   linker_opts="$linker_opts -LIBPATH:$file"
 92 | }
 93 | 
 94 | # func_cl_dashl library
 95 | # Do a library search-path lookup for cl
 96 | func_cl_dashl ()
 97 | {
 98 |   lib=$1
 99 |   found=no
100 |   save_IFS=$IFS
101 |   IFS=';'
102 |   for dir in $lib_path $LIB
103 |   do
104 |     IFS=$save_IFS
105 |     if $shared && test -f "$dir/$lib.dll.lib"; then
106 |       found=yes
107 |       lib=$dir/$lib.dll.lib
108 |       break
109 |     fi
110 |     if test -f "$dir/$lib.lib"; then
111 |       found=yes
112 |       lib=$dir/$lib.lib
113 |       break
114 |     fi
115 |     if test -f "$dir/lib$lib.a"; then
116 |       found=yes
117 |       lib=$dir/lib$lib.a
118 |       break
119 |     fi
120 |   done
121 |   IFS=$save_IFS
122 | 
123 |   if test "$found" != yes; then
124 |     lib=$lib.lib
125 |   fi
126 | }
127 | 
128 | # func_cl_wrapper cl arg...
129 | # Adjust compile command to suit cl
130 | func_cl_wrapper ()
131 | {
132 |   # Assume a capable shell
133 |   lib_path=
134 |   shared=:
135 |   linker_opts=
136 |   for arg
137 |   do
138 |     if test -n "$eat"; then
139 |       eat=
140 |     else
141 |       case $1 in
142 | 	-o)
143 | 	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
144 | 	  eat=1
145 | 	  case $2 in
146 | 	    *.o | *.[oO][bB][jJ])
147 | 	      func_file_conv "$2"
148 | 	      set x "$@" -Fo"$file"
149 | 	      shift
150 | 	      ;;
151 | 	    *)
152 | 	      func_file_conv "$2"
153 | 	      set x "$@" -Fe"$file"
154 | 	      shift
155 | 	      ;;
156 | 	  esac
157 | 	  ;;
158 | 	-I)
159 | 	  eat=1
160 | 	  func_file_conv "$2" mingw
161 | 	  set x "$@" -I"$file"
162 | 	  shift
163 | 	  ;;
164 | 	-I*)
165 | 	  func_file_conv "${1#-I}" mingw
166 | 	  set x "$@" -I"$file"
167 | 	  shift
168 | 	  ;;
169 | 	-l)
170 | 	  eat=1
171 | 	  func_cl_dashl "$2"
172 | 	  set x "$@" "$lib"
173 | 	  shift
174 | 	  ;;
175 | 	-l*)
176 | 	  func_cl_dashl "${1#-l}"
177 | 	  set x "$@" "$lib"
178 | 	  shift
179 | 	  ;;
180 | 	-L)
181 | 	  eat=1
182 | 	  func_cl_dashL "$2"
183 | 	  ;;
184 | 	-L*)
185 | 	  func_cl_dashL "${1#-L}"
186 | 	  ;;
187 | 	-static)
188 | 	  shared=false
189 | 	  ;;
190 | 	-Wl,*)
191 | 	  arg=${1#-Wl,}
192 | 	  save_ifs="$IFS"; IFS=','
193 | 	  for flag in $arg; do
194 | 	    IFS="$save_ifs"
195 | 	    linker_opts="$linker_opts $flag"
196 | 	  done
197 | 	  IFS="$save_ifs"
198 | 	  ;;
199 | 	-Xlinker)
200 | 	  eat=1
201 | 	  linker_opts="$linker_opts $2"
202 | 	  ;;
203 | 	-*)
204 | 	  set x "$@" "$1"
205 | 	  shift
206 | 	  ;;
207 | 	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
208 | 	  func_file_conv "$1"
209 | 	  set x "$@" -Tp"$file"
210 | 	  shift
211 | 	  ;;
212 | 	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
213 | 	  func_file_conv "$1" mingw
214 | 	  set x "$@" "$file"
215 | 	  shift
216 | 	  ;;
217 | 	*)
218 | 	  set x "$@" "$1"
219 | 	  shift
220 | 	  ;;
221 |       esac
222 |     fi
223 |     shift
224 |   done
225 |   if test -n "$linker_opts"; then
226 |     linker_opts="-link$linker_opts"
227 |   fi
228 |   exec "$@" $linker_opts
229 |   exit 1
230 | }
231 | 
232 | eat=
233 | 
234 | case $1 in
235 |   '')
236 |      echo "$0: No command.  Try '$0 --help' for more information." 1>&2
237 |      exit 1;
238 |      ;;
239 |   -h | --h*)
240 |     cat <<\EOF
241 | Usage: compile [--help] [--version] PROGRAM [ARGS]
242 | 
243 | Wrapper for compilers which do not understand '-c -o'.
244 | Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
245 | arguments, and rename the output as expected.
246 | 
247 | If you are trying to build a whole package this is not the
248 | right script to run: please start by reading the file 'INSTALL'.
249 | 
250 | Report bugs to <bug-automake@gnu.org>.
251 | EOF
252 |     exit $?
253 |     ;;
254 |   -v | --v*)
255 |     echo "compile $scriptversion"
256 |     exit $?
257 |     ;;
258 |   cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
259 |     func_cl_wrapper "$@"      # Doesn't return...
260 |     ;;
261 | esac
262 | 
263 | ofile=
264 | cfile=
265 | 
266 | for arg
267 | do
268 |   if test -n "$eat"; then
269 |     eat=
270 |   else
271 |     case $1 in
272 |       -o)
273 | 	# configure might choose to run compile as 'compile cc -o foo foo.c'.
274 | 	# So we strip '-o arg' only if arg is an object.
275 | 	eat=1
276 | 	case $2 in
277 | 	  *.o | *.obj)
278 | 	    ofile=$2
279 | 	    ;;
280 | 	  *)
281 | 	    set x "$@" -o "$2"
282 | 	    shift
283 | 	    ;;
284 | 	esac
285 | 	;;
286 |       *.c)
287 | 	cfile=$1
288 | 	set x "$@" "$1"
289 | 	shift
290 | 	;;
291 |       *)
292 | 	set x "$@" "$1"
293 | 	shift
294 | 	;;
295 |     esac
296 |   fi
297 |   shift
298 | done
299 | 
300 | if test -z "$ofile" || test -z "$cfile"; then
301 |   # If no '-o' option was seen then we might have been invoked from a
302 |   # pattern rule where we don't need one.  That is ok -- this is a
303 |   # normal compilation that the losing compiler can handle.  If no
304 |   # '.c' file was seen then we are probably linking.  That is also
305 |   # ok.
306 |   exec "$@"
307 | fi
308 | 
309 | # Name of file we expect compiler to create.
310 | cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
311 | 
312 | # Create the lock directory.
313 | # Note: use '[/\\:.-]' here to ensure that we don't use the same name
314 | # that we are using for the .o file.  Also, base the name on the expected
315 | # object file name, since that is what matters with a parallel build.
316 | lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
317 | while true; do
318 |   if mkdir "$lockdir" >/dev/null 2>&1; then
319 |     break
320 |   fi
321 |   sleep 1
322 | done
323 | # FIXME: race condition here if user kills between mkdir and trap.
324 | trap "rmdir '$lockdir'; exit 1" 1 2 15
325 | 
326 | # Run the compile.
327 | "$@"
328 | ret=$?
329 | 
330 | if test -f "$cofile"; then
331 |   test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
332 | elif test -f "${cofile}bj"; then
333 |   test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
334 | fi
335 | 
336 | rmdir "$lockdir"
337 | exit $ret
338 | 
339 | # Local Variables:
340 | # mode: shell-script
341 | # sh-indentation: 2
342 | # eval: (add-hook 'write-file-hooks 'time-stamp)
343 | # time-stamp-start: "scriptversion="
344 | # time-stamp-format: "%:y-%02m-%02d.%02H"
345 | # time-stamp-time-zone: "UTC"
346 | # time-stamp-end: "; # UTC"
347 | # End:
348 | 


--------------------------------------------------------------------------------
/build-aux/install-sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | # install - install a program, script, or datafile
  3 | 
  4 | scriptversion=2011-11-20.07; # UTC
  5 | 
  6 | # This originates from X11R5 (mit/util/scripts/install.sh), which was
  7 | # later released in X11R6 (xc/config/util/install.sh) with the
  8 | # following copyright and license.
  9 | #
 10 | # Copyright (C) 1994 X Consortium
 11 | #
 12 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 13 | # of this software and associated documentation files (the "Software"), to
 14 | # deal in the Software without restriction, including without limitation the
 15 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16 | # sell copies of the Software, and to permit persons to whom the Software is
 17 | # furnished to do so, subject to the following conditions:
 18 | #
 19 | # The above copyright notice and this permission notice shall be included in
 20 | # all copies or substantial portions of the Software.
 21 | #
 22 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 23 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 24 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 25 | # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 26 | # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
 27 | # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 28 | #
 29 | # Except as contained in this notice, the name of the X Consortium shall not
 30 | # be used in advertising or otherwise to promote the sale, use or other deal-
 31 | # ings in this Software without prior written authorization from the X Consor-
 32 | # tium.
 33 | #
 34 | #
 35 | # FSF changes to this file are in the public domain.
 36 | #
 37 | # Calling this script install-sh is preferred over install.sh, to prevent
 38 | # 'make' implicit rules from creating a file called install from it
 39 | # when there is no Makefile.
 40 | #
 41 | # This script is compatible with the BSD install script, but was written
 42 | # from scratch.
 43 | 
 44 | nl='
 45 | '
 46 | IFS=" ""	$nl"
 47 | 
 48 | # set DOITPROG to echo to test this script
 49 | 
 50 | # Don't use :- since 4.3BSD and earlier shells don't like it.
 51 | doit=${DOITPROG-}
 52 | if test -z "$doit"; then
 53 |   doit_exec=exec
 54 | else
 55 |   doit_exec=$doit
 56 | fi
 57 | 
 58 | # Put in absolute file names if you don't have them in your path;
 59 | # or use environment vars.
 60 | 
 61 | chgrpprog=${CHGRPPROG-chgrp}
 62 | chmodprog=${CHMODPROG-chmod}
 63 | chownprog=${CHOWNPROG-chown}
 64 | cmpprog=${CMPPROG-cmp}
 65 | cpprog=${CPPROG-cp}
 66 | mkdirprog=${MKDIRPROG-mkdir}
 67 | mvprog=${MVPROG-mv}
 68 | rmprog=${RMPROG-rm}
 69 | stripprog=${STRIPPROG-strip}
 70 | 
 71 | posix_glob='?'
 72 | initialize_posix_glob='
 73 |   test "$posix_glob" != "?" || {
 74 |     if (set -f) 2>/dev/null; then
 75 |       posix_glob=
 76 |     else
 77 |       posix_glob=:
 78 |     fi
 79 |   }
 80 | '
 81 | 
 82 | posix_mkdir=
 83 | 
 84 | # Desired mode of installed file.
 85 | mode=0755
 86 | 
 87 | chgrpcmd=
 88 | chmodcmd=$chmodprog
 89 | chowncmd=
 90 | mvcmd=$mvprog
 91 | rmcmd="$rmprog -f"
 92 | stripcmd=
 93 | 
 94 | src=
 95 | dst=
 96 | dir_arg=
 97 | dst_arg=
 98 | 
 99 | copy_on_change=false
100 | no_target_directory=
101 | 
102 | usage="\
103 | Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
104 |    or: $0 [OPTION]... SRCFILES... DIRECTORY
105 |    or: $0 [OPTION]... -t DIRECTORY SRCFILES...
106 |    or: $0 [OPTION]... -d DIRECTORIES...
107 | 
108 | In the 1st form, copy SRCFILE to DSTFILE.
109 | In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
110 | In the 4th, create DIRECTORIES.
111 | 
112 | Options:
113 |      --help     display this help and exit.
114 |      --version  display version info and exit.
115 | 
116 |   -c            (ignored)
117 |   -C            install only if different (preserve the last data modification time)
118 |   -d            create directories instead of installing files.
119 |   -g GROUP      $chgrpprog installed files to GROUP.
120 |   -m MODE       $chmodprog installed files to MODE.
121 |   -o USER       $chownprog installed files to USER.
122 |   -s            $stripprog installed files.
123 |   -t DIRECTORY  install into DIRECTORY.
124 |   -T            report an error if DSTFILE is a directory.
125 | 
126 | Environment variables override the default commands:
127 |   CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
128 |   RMPROG STRIPPROG
129 | "
130 | 
131 | while test $# -ne 0; do
132 |   case $1 in
133 |     -c) ;;
134 | 
135 |     -C) copy_on_change=true;;
136 | 
137 |     -d) dir_arg=true;;
138 | 
139 |     -g) chgrpcmd="$chgrpprog $2"
140 | 	shift;;
141 | 
142 |     --help) echo "$usage"; exit $?;;
143 | 
144 |     -m) mode=$2
145 | 	case $mode in
146 | 	  *' '* | *'	'* | *'
147 | '*	  | *'*'* | *'?'* | *'['*)
148 | 	    echo "$0: invalid mode: $mode" >&2
149 | 	    exit 1;;
150 | 	esac
151 | 	shift;;
152 | 
153 |     -o) chowncmd="$chownprog $2"
154 | 	shift;;
155 | 
156 |     -s) stripcmd=$stripprog;;
157 | 
158 |     -t) dst_arg=$2
159 | 	# Protect names problematic for 'test' and other utilities.
160 | 	case $dst_arg in
161 | 	  -* | [=\(\)!]) dst_arg=./$dst_arg;;
162 | 	esac
163 | 	shift;;
164 | 
165 |     -T) no_target_directory=true;;
166 | 
167 |     --version) echo "$0 $scriptversion"; exit $?;;
168 | 
169 |     --)	shift
170 | 	break;;
171 | 
172 |     -*)	echo "$0: invalid option: $1" >&2
173 | 	exit 1;;
174 | 
175 |     *)  break;;
176 |   esac
177 |   shift
178 | done
179 | 
180 | if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
181 |   # When -d is used, all remaining arguments are directories to create.
182 |   # When -t is used, the destination is already specified.
183 |   # Otherwise, the last argument is the destination.  Remove it from $@.
184 |   for arg
185 |   do
186 |     if test -n "$dst_arg"; then
187 |       # $@ is not empty: it contains at least $arg.
188 |       set fnord "$@" "$dst_arg"
189 |       shift # fnord
190 |     fi
191 |     shift # arg
192 |     dst_arg=$arg
193 |     # Protect names problematic for 'test' and other utilities.
194 |     case $dst_arg in
195 |       -* | [=\(\)!]) dst_arg=./$dst_arg;;
196 |     esac
197 |   done
198 | fi
199 | 
200 | if test $# -eq 0; then
201 |   if test -z "$dir_arg"; then
202 |     echo "$0: no input file specified." >&2
203 |     exit 1
204 |   fi
205 |   # It's OK to call 'install-sh -d' without argument.
206 |   # This can happen when creating conditional directories.
207 |   exit 0
208 | fi
209 | 
210 | if test -z "$dir_arg"; then
211 |   do_exit='(exit $ret); exit $ret'
212 |   trap "ret=129; $do_exit" 1
213 |   trap "ret=130; $do_exit" 2
214 |   trap "ret=141; $do_exit" 13
215 |   trap "ret=143; $do_exit" 15
216 | 
217 |   # Set umask so as not to create temps with too-generous modes.
218 |   # However, 'strip' requires both read and write access to temps.
219 |   case $mode in
220 |     # Optimize common cases.
221 |     *644) cp_umask=133;;
222 |     *755) cp_umask=22;;
223 | 
224 |     *[0-7])
225 |       if test -z "$stripcmd"; then
226 | 	u_plus_rw=
227 |       else
228 | 	u_plus_rw='% 200'
229 |       fi
230 |       cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
231 |     *)
232 |       if test -z "$stripcmd"; then
233 | 	u_plus_rw=
234 |       else
235 | 	u_plus_rw=,u+rw
236 |       fi
237 |       cp_umask=$mode$u_plus_rw;;
238 |   esac
239 | fi
240 | 
241 | for src
242 | do
243 |   # Protect names problematic for 'test' and other utilities.
244 |   case $src in
245 |     -* | [=\(\)!]) src=./$src;;
246 |   esac
247 | 
248 |   if test -n "$dir_arg"; then
249 |     dst=$src
250 |     dstdir=$dst
251 |     test -d "$dstdir"
252 |     dstdir_status=$?
253 |   else
254 | 
255 |     # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
256 |     # might cause directories to be created, which would be especially bad
257 |     # if $src (and thus $dsttmp) contains '*'.
258 |     if test ! -f "$src" && test ! -d "$src"; then
259 |       echo "$0: $src does not exist." >&2
260 |       exit 1
261 |     fi
262 | 
263 |     if test -z "$dst_arg"; then
264 |       echo "$0: no destination specified." >&2
265 |       exit 1
266 |     fi
267 |     dst=$dst_arg
268 | 
269 |     # If destination is a directory, append the input filename; won't work
270 |     # if double slashes aren't ignored.
271 |     if test -d "$dst"; then
272 |       if test -n "$no_target_directory"; then
273 | 	echo "$0: $dst_arg: Is a directory" >&2
274 | 	exit 1
275 |       fi
276 |       dstdir=$dst
277 |       dst=$dstdir/`basename "$src"`
278 |       dstdir_status=0
279 |     else
280 |       # Prefer dirname, but fall back on a substitute if dirname fails.
281 |       dstdir=`
282 | 	(dirname "$dst") 2>/dev/null ||
283 | 	expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
284 | 	     X"$dst" : 'X\(//\)[^/]' \| \
285 | 	     X"$dst" : 'X\(//\)$' \| \
286 | 	     X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
287 | 	echo X"$dst" |
288 | 	    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
289 | 		   s//\1/
290 | 		   q
291 | 		 }
292 | 		 /^X\(\/\/\)[^/].*/{
293 | 		   s//\1/
294 | 		   q
295 | 		 }
296 | 		 /^X\(\/\/\)$/{
297 | 		   s//\1/
298 | 		   q
299 | 		 }
300 | 		 /^X\(\/\).*/{
301 | 		   s//\1/
302 | 		   q
303 | 		 }
304 | 		 s/.*/./; q'
305 |       `
306 | 
307 |       test -d "$dstdir"
308 |       dstdir_status=$?
309 |     fi
310 |   fi
311 | 
312 |   obsolete_mkdir_used=false
313 | 
314 |   if test $dstdir_status != 0; then
315 |     case $posix_mkdir in
316 |       '')
317 | 	# Create intermediate dirs using mode 755 as modified by the umask.
318 | 	# This is like FreeBSD 'install' as of 1997-10-28.
319 | 	umask=`umask`
320 | 	case $stripcmd.$umask in
321 | 	  # Optimize common cases.
322 | 	  *[2367][2367]) mkdir_umask=$umask;;
323 | 	  .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
324 | 
325 | 	  *[0-7])
326 | 	    mkdir_umask=`expr $umask + 22 \
327 | 	      - $umask % 100 % 40 + $umask % 20 \
328 | 	      - $umask % 10 % 4 + $umask % 2
329 | 	    `;;
330 | 	  *) mkdir_umask=$umask,go-w;;
331 | 	esac
332 | 
333 | 	# With -d, create the new directory with the user-specified mode.
334 | 	# Otherwise, rely on $mkdir_umask.
335 | 	if test -n "$dir_arg"; then
336 | 	  mkdir_mode=-m$mode
337 | 	else
338 | 	  mkdir_mode=
339 | 	fi
340 | 
341 | 	posix_mkdir=false
342 | 	case $umask in
343 | 	  *[123567][0-7][0-7])
344 | 	    # POSIX mkdir -p sets u+wx bits regardless of umask, which
345 | 	    # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
346 | 	    ;;
347 | 	  *)
348 | 	    tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
349 | 	    trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
350 | 
351 | 	    if (umask $mkdir_umask &&
352 | 		exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
353 | 	    then
354 | 	      if test -z "$dir_arg" || {
355 | 		   # Check for POSIX incompatibilities with -m.
356 | 		   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
357 | 		   # other-writable bit of parent directory when it shouldn't.
358 | 		   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
359 | 		   ls_ld_tmpdir=`ls -ld "$tmpdir"`
360 | 		   case $ls_ld_tmpdir in
361 | 		     d????-?r-*) different_mode=700;;
362 | 		     d????-?--*) different_mode=755;;
363 | 		     *) false;;
364 | 		   esac &&
365 | 		   $mkdirprog -m$different_mode -p -- "$tmpdir" && {
366 | 		     ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
367 | 		     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
368 | 		   }
369 | 		 }
370 | 	      then posix_mkdir=:
371 | 	      fi
372 | 	      rmdir "$tmpdir/d" "$tmpdir"
373 | 	    else
374 | 	      # Remove any dirs left behind by ancient mkdir implementations.
375 | 	      rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
376 | 	    fi
377 | 	    trap '' 0;;
378 | 	esac;;
379 |     esac
380 | 
381 |     if
382 |       $posix_mkdir && (
383 | 	umask $mkdir_umask &&
384 | 	$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
385 |       )
386 |     then :
387 |     else
388 | 
389 |       # The umask is ridiculous, or mkdir does not conform to POSIX,
390 |       # or it failed possibly due to a race condition.  Create the
391 |       # directory the slow way, step by step, checking for races as we go.
392 | 
393 |       case $dstdir in
394 | 	/*) prefix='/';;
395 | 	[-=\(\)!]*) prefix='./';;
396 | 	*)  prefix='';;
397 |       esac
398 | 
399 |       eval "$initialize_posix_glob"
400 | 
401 |       oIFS=$IFS
402 |       IFS=/
403 |       $posix_glob set -f
404 |       set fnord $dstdir
405 |       shift
406 |       $posix_glob set +f
407 |       IFS=$oIFS
408 | 
409 |       prefixes=
410 | 
411 |       for d
412 |       do
413 | 	test X"$d" = X && continue
414 | 
415 | 	prefix=$prefix$d
416 | 	if test -d "$prefix"; then
417 | 	  prefixes=
418 | 	else
419 | 	  if $posix_mkdir; then
420 | 	    (umask=$mkdir_umask &&
421 | 	     $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
422 | 	    # Don't fail if two instances are running concurrently.
423 | 	    test -d "$prefix" || exit 1
424 | 	  else
425 | 	    case $prefix in
426 | 	      *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
427 | 	      *) qprefix=$prefix;;
428 | 	    esac
429 | 	    prefixes="$prefixes '$qprefix'"
430 | 	  fi
431 | 	fi
432 | 	prefix=$prefix/
433 |       done
434 | 
435 |       if test -n "$prefixes"; then
436 | 	# Don't fail if two instances are running concurrently.
437 | 	(umask $mkdir_umask &&
438 | 	 eval "\$doit_exec \$mkdirprog $prefixes") ||
439 | 	  test -d "$dstdir" || exit 1
440 | 	obsolete_mkdir_used=true
441 |       fi
442 |     fi
443 |   fi
444 | 
445 |   if test -n "$dir_arg"; then
446 |     { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
447 |     { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
448 |     { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
449 |       test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
450 |   else
451 | 
452 |     # Make a couple of temp file names in the proper directory.
453 |     dsttmp=$dstdir/_inst.$$_
454 |     rmtmp=$dstdir/_rm.$$_
455 | 
456 |     # Trap to clean up those temp files at exit.
457 |     trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
458 | 
459 |     # Copy the file name to the temp name.
460 |     (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
461 | 
462 |     # and set any options; do chmod last to preserve setuid bits.
463 |     #
464 |     # If any of these fail, we abort the whole thing.  If we want to
465 |     # ignore errors from any of these, just make sure not to ignore
466 |     # errors from the above "$doit $cpprog $src $dsttmp" command.
467 |     #
468 |     { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
469 |     { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
470 |     { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
471 |     { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
472 | 
473 |     # If -C, don't bother to copy if it wouldn't change the file.
474 |     if $copy_on_change &&
475 |        old=`LC_ALL=C ls -dlL "$dst"	2>/dev/null` &&
476 |        new=`LC_ALL=C ls -dlL "$dsttmp"	2>/dev/null` &&
477 | 
478 |        eval "$initialize_posix_glob" &&
479 |        $posix_glob set -f &&
480 |        set X $old && old=:$2:$4:$5:$6 &&
481 |        set X $new && new=:$2:$4:$5:$6 &&
482 |        $posix_glob set +f &&
483 | 
484 |        test "$old" = "$new" &&
485 |        $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
486 |     then
487 |       rm -f "$dsttmp"
488 |     else
489 |       # Rename the file to the real destination.
490 |       $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
491 | 
492 |       # The rename failed, perhaps because mv can't rename something else
493 |       # to itself, or perhaps because mv is so ancient that it does not
494 |       # support -f.
495 |       {
496 | 	# Now remove or move aside any old file at destination location.
497 | 	# We try this two ways since rm can't unlink itself on some
498 | 	# systems and the destination file might be busy for other
499 | 	# reasons.  In this case, the final cleanup might fail but the new
500 | 	# file should still install successfully.
501 | 	{
502 | 	  test ! -f "$dst" ||
503 | 	  $doit $rmcmd -f "$dst" 2>/dev/null ||
504 | 	  { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
505 | 	    { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
506 | 	  } ||
507 | 	  { echo "$0: cannot unlink or rename $dst" >&2
508 | 	    (exit 1); exit 1
509 | 	  }
510 | 	} &&
511 | 
512 | 	# Now rename the file to the real destination.
513 | 	$doit $mvcmd "$dsttmp" "$dst"
514 |       }
515 |     fi || exit 1
516 | 
517 |     trap '' 0
518 |   fi
519 | done
520 | 
521 | # Local variables:
522 | # eval: (add-hook 'write-file-hooks 'time-stamp)
523 | # time-stamp-start: "scriptversion="
524 | # time-stamp-format: "%:y-%02m-%02d.%02H"
525 | # time-stamp-time-zone: "UTC"
526 | # time-stamp-end: "; # UTC"
527 | # End:
528 | 


--------------------------------------------------------------------------------
/build-aux/ltoptions.m4:
--------------------------------------------------------------------------------
  1 | # Helper functions for option handling.                    -*- Autoconf -*-
  2 | #
  3 | #   Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
  4 | #   Inc.
  5 | #   Written by Gary V. Vaughan, 2004
  6 | #
  7 | # This file is free software; the Free Software Foundation gives
  8 | # unlimited permission to copy and/or distribute it, with or without
  9 | # modifications, as long as this notice is preserved.
 10 | 
 11 | # serial 7 ltoptions.m4
 12 | 
 13 | # This is to help aclocal find these macros, as it can't see m4_define.
 14 | AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
 15 | 
 16 | 
 17 | # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
 18 | # ------------------------------------------
 19 | m4_define([_LT_MANGLE_OPTION],
 20 | [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
 21 | 
 22 | 
 23 | # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
 24 | # ---------------------------------------
 25 | # Set option OPTION-NAME for macro MACRO-NAME, and if there is a
 26 | # matching handler defined, dispatch to it.  Other OPTION-NAMEs are
 27 | # saved as a flag.
 28 | m4_define([_LT_SET_OPTION],
 29 | [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
 30 | m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
 31 |         _LT_MANGLE_DEFUN([$1], [$2]),
 32 |     [m4_warning([Unknown $1 option `$2'])])[]dnl
 33 | ])
 34 | 
 35 | 
 36 | # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
 37 | # ------------------------------------------------------------
 38 | # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
 39 | m4_define([_LT_IF_OPTION],
 40 | [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
 41 | 
 42 | 
 43 | # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
 44 | # -------------------------------------------------------
 45 | # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
 46 | # are set.
 47 | m4_define([_LT_UNLESS_OPTIONS],
 48 | [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
 49 | 	    [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
 50 | 		      [m4_define([$0_found])])])[]dnl
 51 | m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
 52 | ])[]dnl
 53 | ])
 54 | 
 55 | 
 56 | # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
 57 | # ----------------------------------------
 58 | # OPTION-LIST is a space-separated list of Libtool options associated
 59 | # with MACRO-NAME.  If any OPTION has a matching handler declared with
 60 | # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
 61 | # the unknown option and exit.
 62 | m4_defun([_LT_SET_OPTIONS],
 63 | [# Set options
 64 | m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
 65 |     [_LT_SET_OPTION([$1], _LT_Option)])
 66 | 
 67 | m4_if([$1],[LT_INIT],[
 68 |   dnl
 69 |   dnl Simply set some default values (i.e off) if boolean options were not
 70 |   dnl specified:
 71 |   _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
 72 |   ])
 73 |   _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
 74 |   ])
 75 |   dnl
 76 |   dnl If no reference was made to various pairs of opposing options, then
 77 |   dnl we run the default mode handler for the pair.  For example, if neither
 78 |   dnl `shared' nor `disable-shared' was passed, we enable building of shared
 79 |   dnl archives by default:
 80 |   _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
 81 |   _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
 82 |   _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
 83 |   _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
 84 |   		   [_LT_ENABLE_FAST_INSTALL])
 85 |   ])
 86 | ])# _LT_SET_OPTIONS
 87 | 
 88 | 
 89 | ## --------------------------------- ##
 90 | ## Macros to handle LT_INIT options. ##
 91 | ## --------------------------------- ##
 92 | 
 93 | # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
 94 | # -----------------------------------------
 95 | m4_define([_LT_MANGLE_DEFUN],
 96 | [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
 97 | 
 98 | 
 99 | # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
100 | # -----------------------------------------------
101 | m4_define([LT_OPTION_DEFINE],
102 | [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
103 | ])# LT_OPTION_DEFINE
104 | 
105 | 
106 | # dlopen
107 | # ------
108 | LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
109 | ])
110 | 
111 | AU_DEFUN([AC_LIBTOOL_DLOPEN],
112 | [_LT_SET_OPTION([LT_INIT], [dlopen])
113 | AC_DIAGNOSE([obsolete],
114 | [$0: Remove this warning and the call to _LT_SET_OPTION when you
115 | put the `dlopen' option into LT_INIT's first parameter.])
116 | ])
117 | 
118 | dnl aclocal-1.4 backwards compatibility:
119 | dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
120 | 
121 | 
122 | # win32-dll
123 | # ---------
124 | # Declare package support for building win32 dll's.
125 | LT_OPTION_DEFINE([LT_INIT], [win32-dll],
126 | [enable_win32_dll=yes
127 | 
128 | case $host in
129 | *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
130 |   AC_CHECK_TOOL(AS, as, false)
131 |   AC_CHECK_TOOL(DLLTOOL, dlltool, false)
132 |   AC_CHECK_TOOL(OBJDUMP, objdump, false)
133 |   ;;
134 | esac
135 | 
136 | test -z "$AS" && AS=as
137 | _LT_DECL([], [AS],      [1], [Assembler program])dnl
138 | 
139 | test -z "$DLLTOOL" && DLLTOOL=dlltool
140 | _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
141 | 
142 | test -z "$OBJDUMP" && OBJDUMP=objdump
143 | _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
144 | ])# win32-dll
145 | 
146 | AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
147 | [AC_REQUIRE([AC_CANONICAL_HOST])dnl
148 | _LT_SET_OPTION([LT_INIT], [win32-dll])
149 | AC_DIAGNOSE([obsolete],
150 | [$0: Remove this warning and the call to _LT_SET_OPTION when you
151 | put the `win32-dll' option into LT_INIT's first parameter.])
152 | ])
153 | 
154 | dnl aclocal-1.4 backwards compatibility:
155 | dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
156 | 
157 | 
158 | # _LT_ENABLE_SHARED([DEFAULT])
159 | # ----------------------------
160 | # implement the --enable-shared flag, and supports the `shared' and
161 | # `disable-shared' LT_INIT options.
162 | # DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
163 | m4_define([_LT_ENABLE_SHARED],
164 | [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
165 | AC_ARG_ENABLE([shared],
166 |     [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
167 | 	[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
168 |     [p=${PACKAGE-default}
169 |     case $enableval in
170 |     yes) enable_shared=yes ;;
171 |     no) enable_shared=no ;;
172 |     *)
173 |       enable_shared=no
174 |       # Look at the argument we got.  We use all the common list separators.
175 |       lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
176 |       for pkg in $enableval; do
177 | 	IFS="$lt_save_ifs"
178 | 	if test "X$pkg" = "X$p"; then
179 | 	  enable_shared=yes
180 | 	fi
181 |       done
182 |       IFS="$lt_save_ifs"
183 |       ;;
184 |     esac],
185 |     [enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
186 | 
187 |     _LT_DECL([build_libtool_libs], [enable_shared], [0],
188 | 	[Whether or not to build shared libraries])
189 | ])# _LT_ENABLE_SHARED
190 | 
191 | LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
192 | LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
193 | 
194 | # Old names:
195 | AC_DEFUN([AC_ENABLE_SHARED],
196 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
197 | ])
198 | 
199 | AC_DEFUN([AC_DISABLE_SHARED],
200 | [_LT_SET_OPTION([LT_INIT], [disable-shared])
201 | ])
202 | 
203 | AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
204 | AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
205 | 
206 | dnl aclocal-1.4 backwards compatibility:
207 | dnl AC_DEFUN([AM_ENABLE_SHARED], [])
208 | dnl AC_DEFUN([AM_DISABLE_SHARED], [])
209 | 
210 | 
211 | 
212 | # _LT_ENABLE_STATIC([DEFAULT])
213 | # ----------------------------
214 | # implement the --enable-static flag, and support the `static' and
215 | # `disable-static' LT_INIT options.
216 | # DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
217 | m4_define([_LT_ENABLE_STATIC],
218 | [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
219 | AC_ARG_ENABLE([static],
220 |     [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
221 | 	[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
222 |     [p=${PACKAGE-default}
223 |     case $enableval in
224 |     yes) enable_static=yes ;;
225 |     no) enable_static=no ;;
226 |     *)
227 |      enable_static=no
228 |       # Look at the argument we got.  We use all the common list separators.
229 |       lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
230 |       for pkg in $enableval; do
231 | 	IFS="$lt_save_ifs"
232 | 	if test "X$pkg" = "X$p"; then
233 | 	  enable_static=yes
234 | 	fi
235 |       done
236 |       IFS="$lt_save_ifs"
237 |       ;;
238 |     esac],
239 |     [enable_static=]_LT_ENABLE_STATIC_DEFAULT)
240 | 
241 |     _LT_DECL([build_old_libs], [enable_static], [0],
242 | 	[Whether or not to build static libraries])
243 | ])# _LT_ENABLE_STATIC
244 | 
245 | LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
246 | LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
247 | 
248 | # Old names:
249 | AC_DEFUN([AC_ENABLE_STATIC],
250 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
251 | ])
252 | 
253 | AC_DEFUN([AC_DISABLE_STATIC],
254 | [_LT_SET_OPTION([LT_INIT], [disable-static])
255 | ])
256 | 
257 | AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
258 | AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
259 | 
260 | dnl aclocal-1.4 backwards compatibility:
261 | dnl AC_DEFUN([AM_ENABLE_STATIC], [])
262 | dnl AC_DEFUN([AM_DISABLE_STATIC], [])
263 | 
264 | 
265 | 
266 | # _LT_ENABLE_FAST_INSTALL([DEFAULT])
267 | # ----------------------------------
268 | # implement the --enable-fast-install flag, and support the `fast-install'
269 | # and `disable-fast-install' LT_INIT options.
270 | # DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
271 | m4_define([_LT_ENABLE_FAST_INSTALL],
272 | [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
273 | AC_ARG_ENABLE([fast-install],
274 |     [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
275 |     [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
276 |     [p=${PACKAGE-default}
277 |     case $enableval in
278 |     yes) enable_fast_install=yes ;;
279 |     no) enable_fast_install=no ;;
280 |     *)
281 |       enable_fast_install=no
282 |       # Look at the argument we got.  We use all the common list separators.
283 |       lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
284 |       for pkg in $enableval; do
285 | 	IFS="$lt_save_ifs"
286 | 	if test "X$pkg" = "X$p"; then
287 | 	  enable_fast_install=yes
288 | 	fi
289 |       done
290 |       IFS="$lt_save_ifs"
291 |       ;;
292 |     esac],
293 |     [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
294 | 
295 | _LT_DECL([fast_install], [enable_fast_install], [0],
296 | 	 [Whether or not to optimize for fast installation])dnl
297 | ])# _LT_ENABLE_FAST_INSTALL
298 | 
299 | LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
300 | LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
301 | 
302 | # Old names:
303 | AU_DEFUN([AC_ENABLE_FAST_INSTALL],
304 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
305 | AC_DIAGNOSE([obsolete],
306 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put
307 | the `fast-install' option into LT_INIT's first parameter.])
308 | ])
309 | 
310 | AU_DEFUN([AC_DISABLE_FAST_INSTALL],
311 | [_LT_SET_OPTION([LT_INIT], [disable-fast-install])
312 | AC_DIAGNOSE([obsolete],
313 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put
314 | the `disable-fast-install' option into LT_INIT's first parameter.])
315 | ])
316 | 
317 | dnl aclocal-1.4 backwards compatibility:
318 | dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
319 | dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
320 | 
321 | 
322 | # _LT_WITH_PIC([MODE])
323 | # --------------------
324 | # implement the --with-pic flag, and support the `pic-only' and `no-pic'
325 | # LT_INIT options.
326 | # MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
327 | m4_define([_LT_WITH_PIC],
328 | [AC_ARG_WITH([pic],
329 |     [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
330 | 	[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
331 |     [lt_p=${PACKAGE-default}
332 |     case $withval in
333 |     yes|no) pic_mode=$withval ;;
334 |     *)
335 |       pic_mode=default
336 |       # Look at the argument we got.  We use all the common list separators.
337 |       lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
338 |       for lt_pkg in $withval; do
339 | 	IFS="$lt_save_ifs"
340 | 	if test "X$lt_pkg" = "X$lt_p"; then
341 | 	  pic_mode=yes
342 | 	fi
343 |       done
344 |       IFS="$lt_save_ifs"
345 |       ;;
346 |     esac],
347 |     [pic_mode=default])
348 | 
349 | test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
350 | 
351 | _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
352 | ])# _LT_WITH_PIC
353 | 
354 | LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
355 | LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
356 | 
357 | # Old name:
358 | AU_DEFUN([AC_LIBTOOL_PICMODE],
359 | [_LT_SET_OPTION([LT_INIT], [pic-only])
360 | AC_DIAGNOSE([obsolete],
361 | [$0: Remove this warning and the call to _LT_SET_OPTION when you
362 | put the `pic-only' option into LT_INIT's first parameter.])
363 | ])
364 | 
365 | dnl aclocal-1.4 backwards compatibility:
366 | dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
367 | 
368 | ## ----------------- ##
369 | ## LTDL_INIT Options ##
370 | ## ----------------- ##
371 | 
372 | m4_define([_LTDL_MODE], [])
373 | LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
374 | 		 [m4_define([_LTDL_MODE], [nonrecursive])])
375 | LT_OPTION_DEFINE([LTDL_INIT], [recursive],
376 | 		 [m4_define([_LTDL_MODE], [recursive])])
377 | LT_OPTION_DEFINE([LTDL_INIT], [subproject],
378 | 		 [m4_define([_LTDL_MODE], [subproject])])
379 | 
380 | m4_define([_LTDL_TYPE], [])
381 | LT_OPTION_DEFINE([LTDL_INIT], [installable],
382 | 		 [m4_define([_LTDL_TYPE], [installable])])
383 | LT_OPTION_DEFINE([LTDL_INIT], [convenience],
384 | 		 [m4_define([_LTDL_TYPE], [convenience])])
385 | 


--------------------------------------------------------------------------------
/build-aux/ltsugar.m4:
--------------------------------------------------------------------------------
  1 | # ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-
  2 | #
  3 | # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
  4 | # Written by Gary V. Vaughan, 2004
  5 | #
  6 | # This file is free software; the Free Software Foundation gives
  7 | # unlimited permission to copy and/or distribute it, with or without
  8 | # modifications, as long as this notice is preserved.
  9 | 
 10 | # serial 6 ltsugar.m4
 11 | 
 12 | # This is to help aclocal find these macros, as it can't see m4_define.
 13 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
 14 | 
 15 | 
 16 | # lt_join(SEP, ARG1, [ARG2...])
 17 | # -----------------------------
 18 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
 19 | # associated separator.
 20 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
 21 | # versions in m4sugar had bugs.
 22 | m4_define([lt_join],
 23 | [m4_if([$#], [1], [],
 24 |        [$#], [2], [[$2]],
 25 |        [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
 26 | m4_define([_lt_join],
 27 | [m4_if([$#$2], [2], [],
 28 |        [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
 29 | 
 30 | 
 31 | # lt_car(LIST)
 32 | # lt_cdr(LIST)
 33 | # ------------
 34 | # Manipulate m4 lists.
 35 | # These macros are necessary as long as will still need to support
 36 | # Autoconf-2.59 which quotes differently.
 37 | m4_define([lt_car], [[$1]])
 38 | m4_define([lt_cdr],
 39 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
 40 |        [$#], 1, [],
 41 |        [m4_dquote(m4_shift($@))])])
 42 | m4_define([lt_unquote], $1)
 43 | 
 44 | 
 45 | # lt_append(MACRO-NAME, STRING, [SEPARATOR])
 46 | # ------------------------------------------
 47 | # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
 48 | # Note that neither SEPARATOR nor STRING are expanded; they are appended
 49 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
 50 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different
 51 | # than defined and empty).
 52 | #
 53 | # This macro is needed until we can rely on Autoconf 2.62, since earlier
 54 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
 55 | m4_define([lt_append],
 56 | [m4_define([$1],
 57 | 	   m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
 58 | 
 59 | 
 60 | 
 61 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
 62 | # ----------------------------------------------------------
 63 | # Produce a SEP delimited list of all paired combinations of elements of
 64 | # PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list
 65 | # has the form PREFIXmINFIXSUFFIXn.
 66 | # Needed until we can rely on m4_combine added in Autoconf 2.62.
 67 | m4_define([lt_combine],
 68 | [m4_if(m4_eval([$# > 3]), [1],
 69 |        [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
 70 | [[m4_foreach([_Lt_prefix], [$2],
 71 | 	     [m4_foreach([_Lt_suffix],
 72 | 		]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
 73 | 	[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
 74 | 
 75 | 
 76 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
 77 | # -----------------------------------------------------------------------
 78 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
 79 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
 80 | m4_define([lt_if_append_uniq],
 81 | [m4_ifdef([$1],
 82 | 	  [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
 83 | 		 [lt_append([$1], [$2], [$3])$4],
 84 | 		 [$5])],
 85 | 	  [lt_append([$1], [$2], [$3])$4])])
 86 | 
 87 | 
 88 | # lt_dict_add(DICT, KEY, VALUE)
 89 | # -----------------------------
 90 | m4_define([lt_dict_add],
 91 | [m4_define([$1($2)], [$3])])
 92 | 
 93 | 
 94 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
 95 | # --------------------------------------------
 96 | m4_define([lt_dict_add_subkey],
 97 | [m4_define([$1($2:$3)], [$4])])
 98 | 
 99 | 
100 | # lt_dict_fetch(DICT, KEY, [SUBKEY])
101 | # ----------------------------------
102 | m4_define([lt_dict_fetch],
103 | [m4_ifval([$3],
104 | 	m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
105 |     m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
106 | 
107 | 
108 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
109 | # -----------------------------------------------------------------
110 | m4_define([lt_if_dict_fetch],
111 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
112 | 	[$5],
113 |     [$6])])
114 | 
115 | 
116 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
117 | # --------------------------------------------------------------
118 | m4_define([lt_dict_filter],
119 | [m4_if([$5], [], [],
120 |   [lt_join(m4_quote(m4_default([$4], [[, ]])),
121 |            lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
122 | 		      [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
123 | ])
124 | 


--------------------------------------------------------------------------------
/build-aux/ltversion.m4:
--------------------------------------------------------------------------------
 1 | # ltversion.m4 -- version numbers			-*- Autoconf -*-
 2 | #
 3 | #   Copyright (C) 2004 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # @configure_input@
11 | 
12 | # serial 3337 ltversion.m4
13 | # This file is part of GNU Libtool
14 | 
15 | m4_define([LT_PACKAGE_VERSION], [2.4.2])
16 | m4_define([LT_PACKAGE_REVISION], [1.3337])
17 | 
18 | AC_DEFUN([LTVERSION_VERSION],
19 | [macro_version='2.4.2'
20 | macro_revision='1.3337'
21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
22 | _LT_DECL(, macro_revision, 0)
23 | ])
24 | 


--------------------------------------------------------------------------------
/build-aux/lt~obsolete.m4:
--------------------------------------------------------------------------------
 1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-
 2 | #
 3 | #   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004.
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # serial 5 lt~obsolete.m4
11 | 
12 | # These exist entirely to fool aclocal when bootstrapping libtool.
13 | #
14 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
15 | # which have later been changed to m4_define as they aren't part of the
16 | # exported API, or moved to Autoconf or Automake where they belong.
17 | #
18 | # The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN
19 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
20 | # using a macro with the same name in our local m4/libtool.m4 it'll
21 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define
22 | # and doesn't know about Autoconf macros at all.)
23 | #
24 | # So we provide this file, which has a silly filename so it's always
25 | # included after everything else.  This provides aclocal with the
26 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
27 | # because those macros already exist, or will be overwritten later.
28 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 
29 | #
30 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
31 | # Yes, that means every name once taken will need to remain here until
32 | # we give up compatibility with versions before 1.7, at which point
33 | # we need to keep only those names which we still refer to.
34 | 
35 | # This is to help aclocal find these macros, as it can't see m4_define.
36 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
37 | 
38 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION],	[AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
39 | m4_ifndef([AC_PROG_EGREP],		[AC_DEFUN([AC_PROG_EGREP])])
40 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
41 | m4_ifndef([_LT_AC_SHELL_INIT],		[AC_DEFUN([_LT_AC_SHELL_INIT])])
42 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX],	[AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
43 | m4_ifndef([_LT_PROG_LTMAIN],		[AC_DEFUN([_LT_PROG_LTMAIN])])
44 | m4_ifndef([_LT_AC_TAGVAR],		[AC_DEFUN([_LT_AC_TAGVAR])])
45 | m4_ifndef([AC_LTDL_ENABLE_INSTALL],	[AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
46 | m4_ifndef([AC_LTDL_PREOPEN],		[AC_DEFUN([AC_LTDL_PREOPEN])])
47 | m4_ifndef([_LT_AC_SYS_COMPILER],	[AC_DEFUN([_LT_AC_SYS_COMPILER])])
48 | m4_ifndef([_LT_AC_LOCK],		[AC_DEFUN([_LT_AC_LOCK])])
49 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],	[AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
50 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF],	[AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
51 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O],	[AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
52 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
53 | m4_ifndef([AC_LIBTOOL_OBJDIR],		[AC_DEFUN([AC_LIBTOOL_OBJDIR])])
54 | m4_ifndef([AC_LTDL_OBJDIR],		[AC_DEFUN([AC_LTDL_OBJDIR])])
55 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
56 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],	[AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
57 | m4_ifndef([AC_PATH_MAGIC],		[AC_DEFUN([AC_PATH_MAGIC])])
58 | m4_ifndef([AC_PROG_LD_GNU],		[AC_DEFUN([AC_PROG_LD_GNU])])
59 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG],	[AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
60 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD],	[AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
61 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
62 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
63 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
64 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS],	[AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
65 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP],	[AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
66 | m4_ifndef([LT_AC_PROG_EGREP],		[AC_DEFUN([LT_AC_PROG_EGREP])])
67 | m4_ifndef([LT_AC_PROG_SED],		[AC_DEFUN([LT_AC_PROG_SED])])
68 | m4_ifndef([_LT_CC_BASENAME],		[AC_DEFUN([_LT_CC_BASENAME])])
69 | m4_ifndef([_LT_COMPILER_BOILERPLATE],	[AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
70 | m4_ifndef([_LT_LINKER_BOILERPLATE],	[AC_DEFUN([_LT_LINKER_BOILERPLATE])])
71 | m4_ifndef([_AC_PROG_LIBTOOL],		[AC_DEFUN([_AC_PROG_LIBTOOL])])
72 | m4_ifndef([AC_LIBTOOL_SETUP],		[AC_DEFUN([AC_LIBTOOL_SETUP])])
73 | m4_ifndef([_LT_AC_CHECK_DLFCN],		[AC_DEFUN([_LT_AC_CHECK_DLFCN])])
74 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],	[AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
75 | m4_ifndef([_LT_AC_TAGCONFIG],		[AC_DEFUN([_LT_AC_TAGCONFIG])])
76 | m4_ifndef([AC_DISABLE_FAST_INSTALL],	[AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
77 | m4_ifndef([_LT_AC_LANG_CXX],		[AC_DEFUN([_LT_AC_LANG_CXX])])
78 | m4_ifndef([_LT_AC_LANG_F77],		[AC_DEFUN([_LT_AC_LANG_F77])])
79 | m4_ifndef([_LT_AC_LANG_GCJ],		[AC_DEFUN([_LT_AC_LANG_GCJ])])
80 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
81 | m4_ifndef([_LT_AC_LANG_C_CONFIG],	[AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
82 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
83 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG],	[AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
84 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
85 | m4_ifndef([_LT_AC_LANG_F77_CONFIG],	[AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
86 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
87 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG],	[AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
88 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
89 | m4_ifndef([_LT_AC_LANG_RC_CONFIG],	[AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
90 | m4_ifndef([AC_LIBTOOL_CONFIG],		[AC_DEFUN([AC_LIBTOOL_CONFIG])])
91 | m4_ifndef([_LT_AC_FILE_LTDLL_C],	[AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
92 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],	[AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
93 | m4_ifndef([_LT_AC_PROG_CXXCPP],		[AC_DEFUN([_LT_AC_PROG_CXXCPP])])
94 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],	[AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
95 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
96 | m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
97 | m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
98 | m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
99 | 


--------------------------------------------------------------------------------
/build-aux/missing:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # Common wrapper for a few potentially missing GNU programs.
  3 | 
  4 | scriptversion=2012-06-26.16; # UTC
  5 | 
  6 | # Copyright (C) 1996-2013 Free Software Foundation, Inc.
  7 | # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
  8 | 
  9 | # This program is free software; you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation; either version 2, or (at your option)
 12 | # any later version.
 13 | 
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | 
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | # As a special exception to the GNU General Public License, if you
 23 | # distribute this file as part of a program that contains a
 24 | # configuration script generated by Autoconf, you may include it under
 25 | # the same distribution terms that you use for the rest of that program.
 26 | 
 27 | if test $# -eq 0; then
 28 |   echo 1>&2 "Try '$0 --help' for more information"
 29 |   exit 1
 30 | fi
 31 | 
 32 | case $1 in
 33 | 
 34 |   --is-lightweight)
 35 |     # Used by our autoconf macros to check whether the available missing
 36 |     # script is modern enough.
 37 |     exit 0
 38 |     ;;
 39 | 
 40 |   --run)
 41 |     # Back-compat with the calling convention used by older automake.
 42 |     shift
 43 |     ;;
 44 | 
 45 |   -h|--h|--he|--hel|--help)
 46 |     echo "\
 47 | $0 [OPTION]... PROGRAM [ARGUMENT]...
 48 | 
 49 | Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
 50 | to PROGRAM being missing or too old.
 51 | 
 52 | Options:
 53 |   -h, --help      display this help and exit
 54 |   -v, --version   output version information and exit
 55 | 
 56 | Supported PROGRAM values:
 57 |   aclocal   autoconf  autoheader   autom4te  automake  makeinfo
 58 |   bison     yacc      flex         lex       help2man
 59 | 
 60 | Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
 61 | 'g' are ignored when checking the name.
 62 | 
 63 | Send bug reports to <bug-automake@gnu.org>."
 64 |     exit $?
 65 |     ;;
 66 | 
 67 |   -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
 68 |     echo "missing $scriptversion (GNU Automake)"
 69 |     exit $?
 70 |     ;;
 71 | 
 72 |   -*)
 73 |     echo 1>&2 "$0: unknown '$1' option"
 74 |     echo 1>&2 "Try '$0 --help' for more information"
 75 |     exit 1
 76 |     ;;
 77 | 
 78 | esac
 79 | 
 80 | # Run the given program, remember its exit status.
 81 | "$@"; st=$?
 82 | 
 83 | # If it succeeded, we are done.
 84 | test $st -eq 0 && exit 0
 85 | 
 86 | # Also exit now if we it failed (or wasn't found), and '--version' was
 87 | # passed; such an option is passed most likely to detect whether the
 88 | # program is present and works.
 89 | case $2 in --version|--help) exit $st;; esac
 90 | 
 91 | # Exit code 63 means version mismatch.  This often happens when the user
 92 | # tries to use an ancient version of a tool on a file that requires a
 93 | # minimum version.
 94 | if test $st -eq 63; then
 95 |   msg="probably too old"
 96 | elif test $st -eq 127; then
 97 |   # Program was missing.
 98 |   msg="missing on your system"
 99 | else
100 |   # Program was found and executed, but failed.  Give up.
101 |   exit $st
102 | fi
103 | 
104 | perl_URL=http://www.perl.org/
105 | flex_URL=http://flex.sourceforge.net/
106 | gnu_software_URL=http://www.gnu.org/software
107 | 
108 | program_details ()
109 | {
110 |   case $1 in
111 |     aclocal|automake)
112 |       echo "The '$1' program is part of the GNU Automake package:"
113 |       echo "<$gnu_software_URL/automake>"
114 |       echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
115 |       echo "<$gnu_software_URL/autoconf>"
116 |       echo "<$gnu_software_URL/m4/>"
117 |       echo "<$perl_URL>"
118 |       ;;
119 |     autoconf|autom4te|autoheader)
120 |       echo "The '$1' program is part of the GNU Autoconf package:"
121 |       echo "<$gnu_software_URL/autoconf/>"
122 |       echo "It also requires GNU m4 and Perl in order to run:"
123 |       echo "<$gnu_software_URL/m4/>"
124 |       echo "<$perl_URL>"
125 |       ;;
126 |   esac
127 | }
128 | 
129 | give_advice ()
130 | {
131 |   # Normalize program name to check for.
132 |   normalized_program=`echo "$1" | sed '
133 |     s/^gnu-//; t
134 |     s/^gnu//; t
135 |     s/^g//; t'`
136 | 
137 |   printf '%s\n' "'$1' is $msg."
138 | 
139 |   configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
140 |   case $normalized_program in
141 |     autoconf*)
142 |       echo "You should only need it if you modified 'configure.ac',"
143 |       echo "or m4 files included by it."
144 |       program_details 'autoconf'
145 |       ;;
146 |     autoheader*)
147 |       echo "You should only need it if you modified 'acconfig.h' or"
148 |       echo "$configure_deps."
149 |       program_details 'autoheader'
150 |       ;;
151 |     automake*)
152 |       echo "You should only need it if you modified 'Makefile.am' or"
153 |       echo "$configure_deps."
154 |       program_details 'automake'
155 |       ;;
156 |     aclocal*)
157 |       echo "You should only need it if you modified 'acinclude.m4' or"
158 |       echo "$configure_deps."
159 |       program_details 'aclocal'
160 |       ;;
161 |    autom4te*)
162 |       echo "You might have modified some maintainer files that require"
163 |       echo "the 'automa4te' program to be rebuilt."
164 |       program_details 'autom4te'
165 |       ;;
166 |     bison*|yacc*)
167 |       echo "You should only need it if you modified a '.y' file."
168 |       echo "You may want to install the GNU Bison package:"
169 |       echo "<$gnu_software_URL/bison/>"
170 |       ;;
171 |     lex*|flex*)
172 |       echo "You should only need it if you modified a '.l' file."
173 |       echo "You may want to install the Fast Lexical Analyzer package:"
174 |       echo "<$flex_URL>"
175 |       ;;
176 |     help2man*)
177 |       echo "You should only need it if you modified a dependency" \
178 |            "of a man page."
179 |       echo "You may want to install the GNU Help2man package:"
180 |       echo "<$gnu_software_URL/help2man/>"
181 |     ;;
182 |     makeinfo*)
183 |       echo "You should only need it if you modified a '.texi' file, or"
184 |       echo "any other file indirectly affecting the aspect of the manual."
185 |       echo "You might want to install the Texinfo package:"
186 |       echo "<$gnu_software_URL/texinfo/>"
187 |       echo "The spurious makeinfo call might also be the consequence of"
188 |       echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
189 |       echo "want to install GNU make:"
190 |       echo "<$gnu_software_URL/make/>"
191 |       ;;
192 |     *)
193 |       echo "You might have modified some files without having the proper"
194 |       echo "tools for further handling them.  Check the 'README' file, it"
195 |       echo "often tells you about the needed prerequisites for installing"
196 |       echo "this package.  You may also peek at any GNU archive site, in"
197 |       echo "case some other package contains this missing '$1' program."
198 |       ;;
199 |   esac
200 | }
201 | 
202 | give_advice "$1" | sed -e '1s/^/WARNING: /' \
203 |                        -e '2,$s/^/         /' >&2
204 | 
205 | # Propagate the correct exit status (expected to be 127 for a program
206 | # not found, 63 for a program that failed due to version mismatch).
207 | exit $st
208 | 
209 | # Local variables:
210 | # eval: (add-hook 'write-file-hooks 'time-stamp)
211 | # time-stamp-start: "scriptversion="
212 | # time-stamp-format: "%:y-%02m-%02d.%02H"
213 | # time-stamp-time-zone: "UTC"
214 | # time-stamp-end: "; # UTC"
215 | # End:
216 | 


--------------------------------------------------------------------------------
/build-aux/pkg.m4:
--------------------------------------------------------------------------------
  1 | dnl pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
  2 | dnl serial 11 (pkg-config-0.29.1)
  3 | dnl
  4 | dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
  5 | dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
  6 | dnl
  7 | dnl This program is free software; you can redistribute it and/or modify
  8 | dnl it under the terms of the GNU General Public License as published by
  9 | dnl the Free Software Foundation; either version 2 of the License, or
 10 | dnl (at your option) any later version.
 11 | dnl
 12 | dnl This program is distributed in the hope that it will be useful, but
 13 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 15 | dnl General Public License for more details.
 16 | dnl
 17 | dnl You should have received a copy of the GNU General Public License
 18 | dnl along with this program; if not, write to the Free Software
 19 | dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 20 | dnl 02111-1307, USA.
 21 | dnl
 22 | dnl As a special exception to the GNU General Public License, if you
 23 | dnl distribute this file as part of a program that contains a
 24 | dnl configuration script generated by Autoconf, you may include it under
 25 | dnl the same distribution terms that you use for the rest of that
 26 | dnl program.
 27 | 
 28 | dnl PKG_PREREQ(MIN-VERSION)
 29 | dnl -----------------------
 30 | dnl Since: 0.29
 31 | dnl
 32 | dnl Verify that the version of the pkg-config macros are at least
 33 | dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
 34 | dnl installed version of pkg-config, this checks the developer's version
 35 | dnl of pkg.m4 when generating configure.
 36 | dnl
 37 | dnl To ensure that this macro is defined, also add:
 38 | dnl m4_ifndef([PKG_PREREQ],
 39 | dnl     [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
 40 | dnl
 41 | dnl See the "Since" comment for each macro you use to see what version
 42 | dnl of the macros you require.
 43 | m4_defun([PKG_PREREQ],
 44 | [m4_define([PKG_MACROS_VERSION], [0.29.1])
 45 | m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
 46 |     [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
 47 | ])dnl PKG_PREREQ
 48 | 
 49 | dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
 50 | dnl ----------------------------------
 51 | dnl Since: 0.16
 52 | dnl
 53 | dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
 54 | dnl first found in the path. Checks that the version of pkg-config found
 55 | dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
 56 | dnl used since that's the first version where most current features of
 57 | dnl pkg-config existed.
 58 | AC_DEFUN([PKG_PROG_PKG_CONFIG],
 59 | [m4_pattern_forbid([^_?PKG_[A-Z_]+$])
 60 | m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
 61 | m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
 62 | AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
 63 | AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
 64 | AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
 65 | 
 66 | if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
 67 | 	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
 68 | fi
 69 | if test -n "$PKG_CONFIG"; then
 70 | 	_pkg_min_version=m4_default([$1], [0.9.0])
 71 | 	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
 72 | 	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
 73 | 		AC_MSG_RESULT([yes])
 74 | 	else
 75 | 		AC_MSG_RESULT([no])
 76 | 		PKG_CONFIG=""
 77 | 	fi
 78 | fi[]dnl
 79 | ])dnl PKG_PROG_PKG_CONFIG
 80 | 
 81 | dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
 82 | dnl -------------------------------------------------------------------
 83 | dnl Since: 0.18
 84 | dnl
 85 | dnl Check to see whether a particular set of modules exists. Similar to
 86 | dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
 87 | dnl
 88 | dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
 89 | dnl only at the first occurence in configure.ac, so if the first place
 90 | dnl it's called might be skipped (such as if it is within an "if", you
 91 | dnl have to call PKG_CHECK_EXISTS manually
 92 | AC_DEFUN([PKG_CHECK_EXISTS],
 93 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
 94 | if test -n "$PKG_CONFIG" && \
 95 |     AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
 96 |   m4_default([$2], [:])
 97 | m4_ifvaln([$3], [else
 98 |   $3])dnl
 99 | fi])
100 | 
101 | dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
102 | dnl ---------------------------------------------
103 | dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
104 | dnl pkg_failed based on the result.
105 | m4_define([_PKG_CONFIG],
106 | [if test -n "$$1"; then
107 |     pkg_cv_[]$1="$$1"
108 |  elif test -n "$PKG_CONFIG"; then
109 |     PKG_CHECK_EXISTS([$3],
110 |                      [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
111 | 		      test "x$?" != "x0" && pkg_failed=yes ],
112 | 		     [pkg_failed=yes])
113 |  else
114 |     pkg_failed=untried
115 | fi[]dnl
116 | ])dnl _PKG_CONFIG
117 | 
118 | dnl _PKG_SHORT_ERRORS_SUPPORTED
119 | dnl ---------------------------
120 | dnl Internal check to see if pkg-config supports short errors.
121 | AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
122 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])
123 | if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
124 |         _pkg_short_errors_supported=yes
125 | else
126 |         _pkg_short_errors_supported=no
127 | fi[]dnl
128 | ])dnl _PKG_SHORT_ERRORS_SUPPORTED
129 | 
130 | 
131 | dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
132 | dnl   [ACTION-IF-NOT-FOUND])
133 | dnl --------------------------------------------------------------
134 | dnl Since: 0.4.0
135 | dnl
136 | dnl Note that if there is a possibility the first call to
137 | dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
138 | dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
139 | AC_DEFUN([PKG_CHECK_MODULES],
140 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
141 | AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
142 | AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
143 | 
144 | pkg_failed=no
145 | AC_MSG_CHECKING([for $1])
146 | 
147 | _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
148 | _PKG_CONFIG([$1][_LIBS], [libs], [$2])
149 | 
150 | m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
151 | and $1[]_LIBS to avoid the need to call pkg-config.
152 | See the pkg-config man page for more details.])
153 | 
154 | if test $pkg_failed = yes; then
155 |    	AC_MSG_RESULT([no])
156 |         _PKG_SHORT_ERRORS_SUPPORTED
157 |         if test $_pkg_short_errors_supported = yes; then
158 | 	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
159 |         else 
160 | 	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
161 |         fi
162 | 	# Put the nasty error message in config.log where it belongs
163 | 	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
164 | 
165 | 	m4_default([$4], [AC_MSG_ERROR(
166 | [Package requirements ($2) were not met:
167 | 
168 | $$1_PKG_ERRORS
169 | 
170 | Consider adjusting the PKG_CONFIG_PATH environment variable if you
171 | installed software in a non-standard prefix.
172 | 
173 | _PKG_TEXT])[]dnl
174 |         ])
175 | elif test $pkg_failed = untried; then
176 |      	AC_MSG_RESULT([no])
177 | 	m4_default([$4], [AC_MSG_FAILURE(
178 | [The pkg-config script could not be found or is too old.  Make sure it
179 | is in your PATH or set the PKG_CONFIG environment variable to the full
180 | path to pkg-config.
181 | 
182 | _PKG_TEXT
183 | 
184 | To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
185 |         ])
186 | else
187 | 	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
188 | 	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
189 |         AC_MSG_RESULT([yes])
190 | 	$3
191 | fi[]dnl
192 | ])dnl PKG_CHECK_MODULES
193 | 
194 | 
195 | dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
196 | dnl   [ACTION-IF-NOT-FOUND])
197 | dnl ---------------------------------------------------------------------
198 | dnl Since: 0.29
199 | dnl
200 | dnl Checks for existence of MODULES and gathers its build flags with
201 | dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
202 | dnl and VARIABLE-PREFIX_LIBS from --libs.
203 | dnl
204 | dnl Note that if there is a possibility the first call to
205 | dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
206 | dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
207 | dnl configure.ac.
208 | AC_DEFUN([PKG_CHECK_MODULES_STATIC],
209 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
210 | _save_PKG_CONFIG=$PKG_CONFIG
211 | PKG_CONFIG="$PKG_CONFIG --static"
212 | PKG_CHECK_MODULES($@)
213 | PKG_CONFIG=$_save_PKG_CONFIG[]dnl
214 | ])dnl PKG_CHECK_MODULES_STATIC
215 | 
216 | 
217 | dnl PKG_INSTALLDIR([DIRECTORY])
218 | dnl -------------------------
219 | dnl Since: 0.27
220 | dnl
221 | dnl Substitutes the variable pkgconfigdir as the location where a module
222 | dnl should install pkg-config .pc files. By default the directory is
223 | dnl $libdir/pkgconfig, but the default can be changed by passing
224 | dnl DIRECTORY. The user can override through the --with-pkgconfigdir
225 | dnl parameter.
226 | AC_DEFUN([PKG_INSTALLDIR],
227 | [m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
228 | m4_pushdef([pkg_description],
229 |     [pkg-config installation directory @<:@]pkg_default[@:>@])
230 | AC_ARG_WITH([pkgconfigdir],
231 |     [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
232 |     [with_pkgconfigdir=]pkg_default)
233 | AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
234 | m4_popdef([pkg_default])
235 | m4_popdef([pkg_description])
236 | ])dnl PKG_INSTALLDIR
237 | 
238 | 
239 | dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
240 | dnl --------------------------------
241 | dnl Since: 0.27
242 | dnl
243 | dnl Substitutes the variable noarch_pkgconfigdir as the location where a
244 | dnl module should install arch-independent pkg-config .pc files. By
245 | dnl default the directory is $datadir/pkgconfig, but the default can be
246 | dnl changed by passing DIRECTORY. The user can override through the
247 | dnl --with-noarch-pkgconfigdir parameter.
248 | AC_DEFUN([PKG_NOARCH_INSTALLDIR],
249 | [m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
250 | m4_pushdef([pkg_description],
251 |     [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
252 | AC_ARG_WITH([noarch-pkgconfigdir],
253 |     [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
254 |     [with_noarch_pkgconfigdir=]pkg_default)
255 | AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
256 | m4_popdef([pkg_default])
257 | m4_popdef([pkg_description])
258 | ])dnl PKG_NOARCH_INSTALLDIR
259 | 
260 | 
261 | dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
262 | dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
263 | dnl -------------------------------------------
264 | dnl Since: 0.28
265 | dnl
266 | dnl Retrieves the value of the pkg-config variable for the given module.
267 | AC_DEFUN([PKG_CHECK_VAR],
268 | [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
269 | AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
270 | 
271 | _PKG_CONFIG([$1], [variable="][$3]["], [$2])
272 | AS_VAR_COPY([$1], [pkg_cv_][$1])
273 | 
274 | AS_VAR_IF([$1], [""], [$5], [$4])dnl
275 | ])dnl PKG_CHECK_VAR
276 | 


--------------------------------------------------------------------------------
/build-aux/test-driver:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # test-driver - basic testsuite driver script.
  3 | 
  4 | scriptversion=2012-06-27.10; # UTC
  5 | 
  6 | # Copyright (C) 2011-2013 Free Software Foundation, Inc.
  7 | #
  8 | # This program is free software; you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation; either version 2, or (at your option)
 11 | # any later version.
 12 | #
 13 | # This program is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | # As a special exception to the GNU General Public License, if you
 22 | # distribute this file as part of a program that contains a
 23 | # configuration script generated by Autoconf, you may include it under
 24 | # the same distribution terms that you use for the rest of that program.
 25 | 
 26 | # This file is maintained in Automake, please report
 27 | # bugs to <bug-automake@gnu.org> or send patches to
 28 | # <automake-patches@gnu.org>.
 29 | 
 30 | # Make unconditional expansion of undefined variables an error.  This
 31 | # helps a lot in preventing typo-related bugs.
 32 | set -u
 33 | 
 34 | usage_error ()
 35 | {
 36 |   echo "$0: $*" >&2
 37 |   print_usage >&2
 38 |   exit 2
 39 | }
 40 | 
 41 | print_usage ()
 42 | {
 43 |   cat <<END
 44 | Usage:
 45 |   test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
 46 |               [--expect-failure={yes|no}] [--color-tests={yes|no}]
 47 |               [--enable-hard-errors={yes|no}] [--] TEST-SCRIPT
 48 | The '--test-name', '--log-file' and '--trs-file' options are mandatory.
 49 | END
 50 | }
 51 | 
 52 | # TODO: better error handling in option parsing (in particular, ensure
 53 | # TODO: $log_file, $trs_file and $test_name are defined).
 54 | test_name= # Used for reporting.
 55 | log_file=  # Where to save the output of the test script.
 56 | trs_file=  # Where to save the metadata of the test run.
 57 | expect_failure=no
 58 | color_tests=no
 59 | enable_hard_errors=yes
 60 | while test $# -gt 0; do
 61 |   case $1 in
 62 |   --help) print_usage; exit $?;;
 63 |   --version) echo "test-driver $scriptversion"; exit $?;;
 64 |   --test-name) test_name=$2; shift;;
 65 |   --log-file) log_file=$2; shift;;
 66 |   --trs-file) trs_file=$2; shift;;
 67 |   --color-tests) color_tests=$2; shift;;
 68 |   --expect-failure) expect_failure=$2; shift;;
 69 |   --enable-hard-errors) enable_hard_errors=$2; shift;;
 70 |   --) shift; break;;
 71 |   -*) usage_error "invalid option: '$1'";;
 72 |   esac
 73 |   shift
 74 | done
 75 | 
 76 | if test $color_tests = yes; then
 77 |   # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
 78 |   red='[0;31m' # Red.
 79 |   grn='[0;32m' # Green.
 80 |   lgn='[1;32m' # Light green.
 81 |   blu='[1;34m' # Blue.
 82 |   mgn='[0;35m' # Magenta.
 83 |   std='[m'     # No color.
 84 | else
 85 |   red= grn= lgn= blu= mgn= std=
 86 | fi
 87 | 
 88 | do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
 89 | trap "st=129; $do_exit" 1
 90 | trap "st=130; $do_exit" 2
 91 | trap "st=141; $do_exit" 13
 92 | trap "st=143; $do_exit" 15
 93 | 
 94 | # Test script is run here.
 95 | "$@" >$log_file 2>&1
 96 | estatus=$?
 97 | if test $enable_hard_errors = no && test $estatus -eq 99; then
 98 |   estatus=1
 99 | fi
100 | 
101 | case $estatus:$expect_failure in
102 |   0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
103 |   0:*)   col=$grn res=PASS  recheck=no  gcopy=no;;
104 |   77:*)  col=$blu res=SKIP  recheck=no  gcopy=yes;;
105 |   99:*)  col=$mgn res=ERROR recheck=yes gcopy=yes;;
106 |   *:yes) col=$lgn res=XFAIL recheck=no  gcopy=yes;;
107 |   *:*)   col=$red res=FAIL  recheck=yes gcopy=yes;;
108 | esac
109 | 
110 | # Report outcome to console.
111 | echo "${col}${res}${std}: $test_name"
112 | 
113 | # Register the test result, and other relevant metadata.
114 | echo ":test-result: $res" > $trs_file
115 | echo ":global-test-result: $res" >> $trs_file
116 | echo ":recheck: $recheck" >> $trs_file
117 | echo ":copy-in-global-log: $gcopy" >> $trs_file
118 | 
119 | # Local Variables:
120 | # mode: shell-script
121 | # sh-indentation: 2
122 | # eval: (add-hook 'write-file-hooks 'time-stamp)
123 | # time-stamp-start: "scriptversion="
124 | # time-stamp-format: "%:y-%02m-%02d.%02H"
125 | # time-stamp-time-zone: "UTC"
126 | # time-stamp-end: "; # UTC"
127 | # End:
128 | 


--------------------------------------------------------------------------------
/config.h.in:
--------------------------------------------------------------------------------
  1 | /* config.h.in.  Generated from configure.ac by autoheader.  */
  2 | 
  3 | /* Define if --enable-debug option is found. */
  4 | #undef DEBUG
  5 | 
  6 | /* Define to 1 if you have the <dlfcn.h> header file. */
  7 | #undef HAVE_DLFCN_H
  8 | 
  9 | /* Define to 1 if you have the `dup2' function. */
 10 | #undef HAVE_DUP2
 11 | 
 12 | /* Define to 1 if you have the <fcntl.h> header file. */
 13 | #undef HAVE_FCNTL_H
 14 | 
 15 | /* Define to 1 if you have the `fork' function. */
 16 | #undef HAVE_FORK
 17 | 
 18 | /* Define to 1 if you have the `gettimeofday' function. */
 19 | #undef HAVE_GETTIMEOFDAY
 20 | 
 21 | /* Define to 1 if you have the <inttypes.h> header file. */
 22 | #undef HAVE_INTTYPES_H
 23 | 
 24 | /* Define to 1 if you have the `cuda' library (-lcuda). */
 25 | #undef HAVE_LIBCUDA
 26 | 
 27 | /* Define to 1 if you have the `cudart' library (-lcudart). */
 28 | #undef HAVE_LIBCUDART
 29 | 
 30 | /* Define to 1 if you have the `dl' library (-ldl). */
 31 | #undef HAVE_LIBDL
 32 | 
 33 | /* Define to 1 if you have the `pthread' library (-lpthread). */
 34 | #undef HAVE_LIBPTHREAD
 35 | 
 36 | /* Define to 1 if your system has a GNU libc compatible `malloc' function, and
 37 |    to 0 otherwise. */
 38 | #undef HAVE_MALLOC
 39 | 
 40 | /* Define to 1 if you have the <memory.h> header file. */
 41 | #undef HAVE_MEMORY_H
 42 | 
 43 | /* Define to 1 if you have the `mempcpy' function. */
 44 | #undef HAVE_MEMPCPY
 45 | 
 46 | /* Define to 1 if you have the `mkfifo' function. */
 47 | #undef HAVE_MKFIFO
 48 | 
 49 | /* Define to 1 if you have the <stdint.h> header file. */
 50 | #undef HAVE_STDINT_H
 51 | 
 52 | /* Define to 1 if you have the <stdlib.h> header file. */
 53 | #undef HAVE_STDLIB_H
 54 | 
 55 | /* Define to 1 if you have the <strings.h> header file. */
 56 | #undef HAVE_STRINGS_H
 57 | 
 58 | /* Define to 1 if you have the <string.h> header file. */
 59 | #undef HAVE_STRING_H
 60 | 
 61 | /* Define to 1 if you have the `strtol' function. */
 62 | #undef HAVE_STRTOL
 63 | 
 64 | /* Define to 1 if you have the <sys/stat.h> header file. */
 65 | #undef HAVE_SYS_STAT_H
 66 | 
 67 | /* Define to 1 if you have the <sys/time.h> header file. */
 68 | #undef HAVE_SYS_TIME_H
 69 | 
 70 | /* Define to 1 if you have the <sys/types.h> header file. */
 71 | #undef HAVE_SYS_TYPES_H
 72 | 
 73 | /* Define to 1 if you have the <unistd.h> header file. */
 74 | #undef HAVE_UNISTD_H
 75 | 
 76 | /* Define to 1 if you have the `vfork' function. */
 77 | #undef HAVE_VFORK
 78 | 
 79 | /* Define to 1 if you have the <vfork.h> header file. */
 80 | #undef HAVE_VFORK_H
 81 | 
 82 | /* Define to 1 if `fork' works. */
 83 | #undef HAVE_WORKING_FORK
 84 | 
 85 | /* Define to 1 if `vfork' works. */
 86 | #undef HAVE_WORKING_VFORK
 87 | 
 88 | /* Define to the sub-directory in which libtool stores uninstalled libraries.
 89 |    */
 90 | #undef LT_OBJDIR
 91 | 
 92 | /* Define to 1 if your C compiler doesn't accept -c and -o together. */
 93 | #undef NO_MINUS_C_MINUS_O
 94 | 
 95 | /* Name of package */
 96 | #undef PACKAGE
 97 | 
 98 | /* Define to the address where bug reports for this package should be sent. */
 99 | #undef PACKAGE_BUGREPORT
100 | 
101 | /* Define to the full name of this package. */
102 | #undef PACKAGE_NAME
103 | 
104 | /* Define to the full name and version of this package. */
105 | #undef PACKAGE_STRING
106 | 
107 | /* Define to the one symbol short name of this package. */
108 | #undef PACKAGE_TARNAME
109 | 
110 | /* Define to the home page for this package. */
111 | #undef PACKAGE_URL
112 | 
113 | /* Define to the version of this package. */
114 | #undef PACKAGE_VERSION
115 | 
116 | /* Define to 1 if you have the ANSI C header files. */
117 | #undef STDC_HEADERS
118 | 
119 | /* Version number of package */
120 | #undef VERSION
121 | 
122 | /* Define to `__inline__' or `__inline' if that's what the C compiler
123 |    calls it, or to nothing if 'inline' is not supported under any name.  */
124 | #ifndef __cplusplus
125 | #undef inline
126 | #endif
127 | 
128 | /* Define to rpl_malloc if the replacement function should be used. */
129 | #undef malloc
130 | 
131 | /* Define to `int' if <sys/types.h> does not define. */
132 | #undef pid_t
133 | 
134 | /* Define to `unsigned int' if <sys/types.h> does not define. */
135 | #undef size_t
136 | 
137 | /* Define to `int' if <sys/types.h> does not define. */
138 | #undef ssize_t
139 | 
140 | /* Define as `fork' if `vfork' does not work. */
141 | #undef vfork
142 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
  1 | AC_PREREQ([2.69])
  2 | 
  3 | AC_INIT([mrcuda], [7.0.0], [markthub.p.aa@m.titech.ac.jp])
  4 | AC_CONFIG_AUX_DIR([build-aux])
  5 | AM_INIT_AUTOMAKE
  6 | AC_CONFIG_MACRO_DIR([build-aux])
  7 | AC_CONFIG_HEADERS([config.h])
  8 | AC_CONFIG_FILES([Makefile src/Makefile tests/Makefile scripts/Makefile])
  9 | AC_ENABLE_SHARED(yes)
 10 | AC_ENABLE_STATIC(no)
 11 | 
 12 | AC_ARG_WITH([rcuda], [
 13 |     AS_HELP_STRING([--with-rcuda=[RCUDA_PATH]],
 14 |         [optionally specify the installation path of rCUDA.]
 15 |     )
 16 | ])
 17 | 
 18 | AC_ARG_WITH([nvcc], [
 19 |     AS_HELP_STRING([--with-nvcc=[nvcc]],
 20 |         [optionally specify nvcc you want to use.]
 21 |     )], [
 22 |         AC_SUBST(NVCC, "${with_nvcc}")
 23 |     ]
 24 | )
 25 | 
 26 | AC_ARG_WITH([libcudart], [
 27 |     AS_HELP_STRING([--with-libcudart=[libcudart]],
 28 |         [optionally specify CUDA libcudart you want to use.]
 29 |     )], [
 30 |         AC_SUBST(NVIDIA_LIBCUDART, "${with_libcudart}")
 31 |     ]
 32 | )
 33 | 
 34 | AC_ARG_ENABLE([debug],
 35 |     AS_HELP_STRING([--enable-debug], [Enable debug output]))
 36 | AS_IF([test "x$enable_debug" = "xyes"], [
 37 |     AC_DEFINE(DEBUG, 1, [Define if --enable-debug option is found.])
 38 | ])
 39 | 
 40 | # Checks for programs
 41 | AM_PROG_AR
 42 | AC_PROG_CC
 43 | AM_PROG_CC_C_O
 44 | AC_PROG_INSTALL
 45 | AC_PROG_LN_S
 46 | AM_PATH_PYTHON([2.7])
 47 | 
 48 | if test x"${NVCC}" == x""; then
 49 |     AC_PATH_PROG(NVCC, nvcc, no)
 50 |     if test x"${NVCC}" == x"no"; then
 51 |         AC_MSG_ERROR([Cannot find nvcc.])
 52 |     fi
 53 | fi
 54 | 
 55 | if test x"${NVIDIA_LIBCUDART}" == x""; then
 56 |     AC_PATH_PROG(NVIDIA_LIBCUDART, [libcudart.so], no, [$LD_LIBRARY_PATH$PATH_SEPARATOR$LIBRARY_PATH])
 57 |     if test x"${NVIDIA_LIBCUDART}" == x"no"; then
 58 |         AC_MSG_ERROR([Cannot find CUDA's libcudart.so.])
 59 |     fi
 60 | fi
 61 | 
 62 | AC_PATH_PROG(RCUDA_RCUDACOMMIB, rCUDAcommIB.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH])
 63 | if test x"${RCUDA_RCUDACOMMIB}" == x"no"; then
 64 |     AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.])
 65 | fi
 66 | 
 67 | AC_PATH_PROG(RCUDA_RCUDACOMMTCP, rCUDAcommTCP.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH])
 68 | if test x"${RCUDA_RCUDACOMMTCP}" == x"no"; then
 69 |     AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.])
 70 | fi
 71 | 
 72 | AC_PATH_PROG(RCUDA_LIBCUDART, libcudart.so, no, [$with_rcuda/lib$PATH_SEPARATOR$PATH])
 73 | if test x"${RCUDA_LIBCUDART}" == x"no"; then
 74 |     AC_MSG_ERROR([Cannot find rCUDA. Make sure rCUDA is installed on your system.])
 75 | fi
 76 | 
 77 | # Checks for modules
 78 | PKG_CHECK_MODULES([CHECK], [check >= 0.9.4])
 79 | PKG_CHECK_MODULES([DEPS], [glib-2.0 >= 2.24.1])
 80 | LT_INIT
 81 | 
 82 | # Checks for libraries.
 83 | # FIXME: Replace `main' with a function in `-lcuda':
 84 | AC_CHECK_LIB([cuda], [cuCtxCreate])
 85 | # FIXME: Replace `main' with a function in `-lcudart':
 86 | AC_CHECK_LIB([cudart], [cudaMemcpy])
 87 | # FIXME: Replace `main' with a function in `-ldl':
 88 | AC_CHECK_LIB([dl], [dlsym])
 89 | # FIXME: Replace `main' with a function in `-lpthread':
 90 | AC_CHECK_LIB([pthread], [pthread_mutex_init])
 91 | 
 92 | # Checks for header files.
 93 | AC_CHECK_HEADERS([fcntl.h stdlib.h string.h sys/time.h unistd.h])
 94 | 
 95 | # Checks for typedefs, structures, and compiler characteristics.
 96 | AC_C_INLINE
 97 | AC_TYPE_PID_T
 98 | AC_TYPE_SIZE_T
 99 | AC_TYPE_SSIZE_T
100 | 
101 | # Checks for library functions.
102 | AC_FUNC_FORK
103 | AC_FUNC_MALLOC
104 | AC_CHECK_FUNCS([dup2 gettimeofday mempcpy mkfifo strtol])
105 | 
106 | AC_OUTPUT
107 | 


--------------------------------------------------------------------------------
/notes/func-list.txt:
--------------------------------------------------------------------------------
 1 | extern __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void);
 2 | extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func);
 3 | extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol(const void *symbol, const void *src, size_t count, size_t offset __dv(0), enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice));
 4 | extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
 5 | extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, unsigned int flags);
 6 | extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, size_t count);
 7 | extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr);
 8 | extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, size_t size, size_t offset);
 9 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
10 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
11 | extern __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), cudaStream_t stream __dv(0));
12 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
13 | extern __host__ cudaError_t CUDARTAPI cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX));
14 | extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f);
15 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device);
16 | extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream);
17 | extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, size_t *total);
18 | extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device);
19 | extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags( unsigned int flags );
20 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
21 | extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
22 | 
23 | /* Maybe we don't need these, hopefully */
24 | CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
25 | CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
26 | CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
27 | CUresult CUDAAPI cuDeviceGetCount(int *count);
28 | CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
29 | 


--------------------------------------------------------------------------------
/results/memcpybw-mhelper.out:
--------------------------------------------------------------------------------
  1 | prog lib count size_per_call time
  2 | cudamemcpy mrcuda 0 1024 23.707000
  3 | cudamemcpy mrcuda 0 2048 23.586000
  4 | cudamemcpy mrcuda 0 4096 24.085000
  5 | cudamemcpy mrcuda 0 8192 27.142000
  6 | cudamemcpy mrcuda 0 16384 32.360000
  7 | cudamemcpy mrcuda 0 32768 43.379000
  8 | cudamemcpy mrcuda 0 65536 65.852000
  9 | cudamemcpy mrcuda 0 131072 107.484000
 10 | cudamemcpy mrcuda 0 262144 190.487000
 11 | cudamemcpy mrcuda 0 524288 360.121000
 12 | cudamemcpy mrcuda 0 1048576 740.892000
 13 | cudamemcpy mrcuda 0 2097152 1502.484000
 14 | cudamemcpy mrcuda 0 4194304 3162.210000
 15 | cudamemcpy mrcuda 0 8388608 6589.956000
 16 | cudamemcpy mrcuda 0 16777216 14099.772000
 17 | cudamemcpy mrcuda 0 33554432 28670.813000
 18 | cudamemcpy mrcuda 0 67108864 57351.585000
 19 | cudamemcpy mrcuda 0 134217728 114402.483000
 20 | cudamemcpy mrcuda 0 268435456 228174.487000
 21 | cudamemcpy mrcuda 0 536870912 456561.788000
 22 | cudamemcpy mrcuda 1 1024 25.535000
 23 | cudamemcpy mrcuda 1 2048 24.536000
 24 | cudamemcpy mrcuda 1 4096 24.919000
 25 | cudamemcpy mrcuda 1 8192 28.170000
 26 | cudamemcpy mrcuda 1 16384 33.451000
 27 | cudamemcpy mrcuda 1 32768 44.960000
 28 | cudamemcpy mrcuda 1 65536 67.520000
 29 | cudamemcpy mrcuda 1 131072 109.729000
 30 | cudamemcpy mrcuda 1 262144 192.256000
 31 | cudamemcpy mrcuda 1 524288 363.553000
 32 | cudamemcpy mrcuda 1 1048576 774.701000
 33 | cudamemcpy mrcuda 1 2097152 1615.745000
 34 | cudamemcpy mrcuda 1 4194304 3289.496000
 35 | cudamemcpy mrcuda 1 8388608 6818.549000
 36 | cudamemcpy mrcuda 1 16777216 14308.595000
 37 | cudamemcpy mrcuda 1 33554432 28948.875000
 38 | cudamemcpy mrcuda 1 67108864 57716.634000
 39 | cudamemcpy mrcuda 1 134217728 114734.186000
 40 | cudamemcpy mrcuda 1 268435456 228719.460000
 41 | cudamemcpy mrcuda 1 536870912 457770.822000
 42 | cudamemcpy mrcuda 2 1024 24.210000
 43 | cudamemcpy mrcuda 2 2048 23.541000
 44 | cudamemcpy mrcuda 2 4096 23.846000
 45 | cudamemcpy mrcuda 2 8192 27.095000
 46 | cudamemcpy mrcuda 2 16384 32.419000
 47 | cudamemcpy mrcuda 2 32768 43.485000
 48 | cudamemcpy mrcuda 2 65536 65.377000
 49 | cudamemcpy mrcuda 2 131072 107.146000
 50 | cudamemcpy mrcuda 2 262144 190.121000
 51 | cudamemcpy mrcuda 2 524288 360.972000
 52 | cudamemcpy mrcuda 2 1048576 769.155000
 53 | cudamemcpy mrcuda 2 2097152 1610.051000
 54 | cudamemcpy mrcuda 2 4194304 3281.864000
 55 | cudamemcpy mrcuda 2 8388608 6782.289000
 56 | cudamemcpy mrcuda 2 16777216 14296.611000
 57 | cudamemcpy mrcuda 2 33554432 28939.121000
 58 | cudamemcpy mrcuda 2 67108864 57711.919000
 59 | cudamemcpy mrcuda 2 134217728 115116.285000
 60 | cudamemcpy mrcuda 2 268435456 229418.731000
 61 | cudamemcpy mrcuda 2 536870912 458720.893000
 62 | cudamemcpy mrcuda 3 1024 24.432000
 63 | cudamemcpy mrcuda 3 2048 23.433000
 64 | cudamemcpy mrcuda 3 4096 23.725000
 65 | cudamemcpy mrcuda 3 8192 26.918000
 66 | cudamemcpy mrcuda 3 16384 32.127000
 67 | cudamemcpy mrcuda 3 32768 43.121000
 68 | cudamemcpy mrcuda 3 65536 65.396000
 69 | cudamemcpy mrcuda 3 131072 107.548000
 70 | cudamemcpy mrcuda 3 262144 190.862000
 71 | cudamemcpy mrcuda 3 524288 363.774000
 72 | cudamemcpy mrcuda 3 1048576 733.667000
 73 | cudamemcpy mrcuda 3 2097152 1561.821000
 74 | cudamemcpy mrcuda 3 4194304 3264.504000
 75 | cudamemcpy mrcuda 3 8388608 6770.502000
 76 | cudamemcpy mrcuda 3 16777216 14221.611000
 77 | cudamemcpy mrcuda 3 33554432 28949.865000
 78 | cudamemcpy mrcuda 3 67108864 57812.058000
 79 | cudamemcpy mrcuda 3 134217728 115184.516000
 80 | cudamemcpy mrcuda 3 268435456 229561.263000
 81 | cudamemcpy mrcuda 3 536870912 458857.855000
 82 | cudamemcpy mrcuda 4 1024 23.803000
 83 | cudamemcpy mrcuda 4 2048 23.921000
 84 | cudamemcpy mrcuda 4 4096 24.169000
 85 | cudamemcpy mrcuda 4 8192 27.698000
 86 | cudamemcpy mrcuda 4 16384 32.877000
 87 | cudamemcpy mrcuda 4 32768 43.996000
 88 | cudamemcpy mrcuda 4 65536 66.135000
 89 | cudamemcpy mrcuda 4 131072 107.902000
 90 | cudamemcpy mrcuda 4 262144 190.761000
 91 | cudamemcpy mrcuda 4 524288 357.221000
 92 | cudamemcpy mrcuda 4 1048576 734.587000
 93 | cudamemcpy mrcuda 4 2097152 1501.452000
 94 | cudamemcpy mrcuda 4 4194304 3157.658000
 95 | cudamemcpy mrcuda 4 8388608 6724.800000
 96 | cudamemcpy mrcuda 4 16777216 14084.432000
 97 | cudamemcpy mrcuda 4 33554432 28745.811000
 98 | cudamemcpy mrcuda 4 67108864 57456.041000
 99 | cudamemcpy mrcuda 4 134217728 114529.454000
100 | cudamemcpy mrcuda 4 268435456 228555.257000
101 | cudamemcpy mrcuda 4 536870912 456908.974000
102 | cudamemcpy mrcuda 5 1024 24.740000
103 | cudamemcpy mrcuda 5 2048 24.142000
104 | cudamemcpy mrcuda 5 4096 24.591000
105 | cudamemcpy mrcuda 5 8192 27.863000
106 | cudamemcpy mrcuda 5 16384 33.229000
107 | cudamemcpy mrcuda 5 32768 44.054000
108 | cudamemcpy mrcuda 5 65536 66.328000
109 | cudamemcpy mrcuda 5 131072 108.365000
110 | cudamemcpy mrcuda 5 262144 190.897000
111 | cudamemcpy mrcuda 5 524288 362.404000
112 | cudamemcpy mrcuda 5 1048576 768.185000
113 | cudamemcpy mrcuda 5 2097152 1609.308000
114 | cudamemcpy mrcuda 5 4194304 3284.896000
115 | cudamemcpy mrcuda 5 8388608 6900.502000
116 | cudamemcpy mrcuda 5 16777216 14266.649000
117 | cudamemcpy mrcuda 5 33554432 28960.732000
118 | cudamemcpy mrcuda 5 67108864 57796.792000
119 | cudamemcpy mrcuda 5 134217728 115037.879000
120 | cudamemcpy mrcuda 5 268435456 229401.095000
121 | cudamemcpy mrcuda 5 536870912 458578.661000
122 | cudamemcpy mrcuda 6 1024 23.720000
123 | cudamemcpy mrcuda 6 2048 23.777000
124 | cudamemcpy mrcuda 6 4096 24.075000
125 | cudamemcpy mrcuda 6 8192 27.386000
126 | cudamemcpy mrcuda 6 16384 32.648000
127 | cudamemcpy mrcuda 6 32768 43.408000
128 | cudamemcpy mrcuda 6 65536 65.562000
129 | cudamemcpy mrcuda 6 131072 107.481000
130 | cudamemcpy mrcuda 6 262144 190.448000
131 | cudamemcpy mrcuda 6 524288 361.721000
132 | cudamemcpy mrcuda 6 1048576 775.303000
133 | cudamemcpy mrcuda 6 2097152 1602.592000
134 | cudamemcpy mrcuda 6 4194304 3262.345000
135 | cudamemcpy mrcuda 6 8388608 6864.071000
136 | cudamemcpy mrcuda 6 16777216 14261.377000
137 | cudamemcpy mrcuda 6 33554432 28879.013000
138 | cudamemcpy mrcuda 6 67108864 57695.091000
139 | cudamemcpy mrcuda 6 134217728 114821.480000
140 | cudamemcpy mrcuda 6 268435456 228831.121000
141 | cudamemcpy mrcuda 6 536870912 457399.061000
142 | cudamemcpy mrcuda 7 1024 24.467000
143 | cudamemcpy mrcuda 7 2048 23.916000
144 | cudamemcpy mrcuda 7 4096 24.277000
145 | cudamemcpy mrcuda 7 8192 27.448000
146 | cudamemcpy mrcuda 7 16384 32.568000
147 | cudamemcpy mrcuda 7 32768 43.604000
148 | cudamemcpy mrcuda 7 65536 65.713000
149 | cudamemcpy mrcuda 7 131072 108.453000
150 | cudamemcpy mrcuda 7 262144 191.636000
151 | cudamemcpy mrcuda 7 524288 361.887000
152 | cudamemcpy mrcuda 7 1048576 774.704000
153 | cudamemcpy mrcuda 7 2097152 1610.828000
154 | cudamemcpy mrcuda 7 4194304 3275.267000
155 | cudamemcpy mrcuda 7 8388608 6902.061000
156 | cudamemcpy mrcuda 7 16777216 14316.970000
157 | cudamemcpy mrcuda 7 33554432 29001.885000
158 | cudamemcpy mrcuda 7 67108864 57918.016000
159 | cudamemcpy mrcuda 7 134217728 115225.240000
160 | cudamemcpy mrcuda 7 268435456 229559.663000
161 | cudamemcpy mrcuda 7 536870912 458412.680000
162 | cudamemcpy mrcuda 8 1024 24.201000
163 | cudamemcpy mrcuda 8 2048 23.573000
164 | cudamemcpy mrcuda 8 4096 24.008000
165 | cudamemcpy mrcuda 8 8192 27.317000
166 | cudamemcpy mrcuda 8 16384 32.523000
167 | cudamemcpy mrcuda 8 32768 43.500000
168 | cudamemcpy mrcuda 8 65536 65.584000
169 | cudamemcpy mrcuda 8 131072 107.949000
170 | cudamemcpy mrcuda 8 262144 190.804000
171 | cudamemcpy mrcuda 8 524288 364.756000
172 | cudamemcpy mrcuda 8 1048576 724.120000
173 | cudamemcpy mrcuda 8 2097152 1550.997000
174 | cudamemcpy mrcuda 8 4194304 3218.520000
175 | cudamemcpy mrcuda 8 8388608 6666.086000
176 | cudamemcpy mrcuda 8 16777216 14107.568000
177 | cudamemcpy mrcuda 8 33554432 28795.421000
178 | cudamemcpy mrcuda 8 67108864 57638.160000
179 | cudamemcpy mrcuda 8 134217728 114926.156000
180 | cudamemcpy mrcuda 8 268435456 229095.012000
181 | cudamemcpy mrcuda 8 536870912 457751.274000
182 | cudamemcpy mrcuda 9 1024 23.594000
183 | cudamemcpy mrcuda 9 2048 24.050000
184 | cudamemcpy mrcuda 9 4096 24.403000
185 | cudamemcpy mrcuda 9 8192 27.678000
186 | cudamemcpy mrcuda 9 16384 32.980000
187 | cudamemcpy mrcuda 9 32768 43.657000
188 | cudamemcpy mrcuda 9 65536 65.738000
189 | cudamemcpy mrcuda 9 131072 107.550000
190 | cudamemcpy mrcuda 9 262144 190.718000
191 | cudamemcpy mrcuda 9 524288 363.023000
192 | cudamemcpy mrcuda 9 1048576 729.539000
193 | cudamemcpy mrcuda 9 2097152 1505.266000
194 | cudamemcpy mrcuda 9 4194304 3158.483000
195 | cudamemcpy mrcuda 9 8388608 6669.139000
196 | cudamemcpy mrcuda 9 16777216 14043.886000
197 | cudamemcpy mrcuda 9 33554432 28737.694000
198 | cudamemcpy mrcuda 9 67108864 57545.285000
199 | cudamemcpy mrcuda 9 134217728 114763.498000
200 | cudamemcpy mrcuda 9 268435456 229028.840000
201 | cudamemcpy mrcuda 9 536870912 457445.702000
202 | cudamemcpy native 0 1024 4.898000
203 | cudamemcpy native 0 2048 5.050000
204 | cudamemcpy native 0 4096 5.473000
205 | cudamemcpy native 0 8192 6.790000
206 | cudamemcpy native 0 16384 9.148000
207 | cudamemcpy native 0 32768 13.953000
208 | cudamemcpy native 0 65536 23.594000
209 | cudamemcpy native 0 131072 36.605000
210 | cudamemcpy native 0 262144 66.875000
211 | cudamemcpy native 0 524288 127.410000
212 | cudamemcpy native 0 1048576 249.071000
213 | cudamemcpy native 0 2097152 423.865000
214 | cudamemcpy native 0 4194304 773.866000
215 | cudamemcpy native 0 8388608 1464.670000
216 | cudamemcpy native 0 16777216 2860.749000
217 | cudamemcpy native 0 33554432 5636.800000
218 | cudamemcpy native 0 67108864 11195.433000
219 | cudamemcpy native 0 134217728 22313.544000
220 | cudamemcpy native 0 268435456 44545.160000
221 | cudamemcpy native 0 536870912 88965.398000
222 | cudamemcpy native 1 1024 4.922000
223 | cudamemcpy native 1 2048 5.040000
224 | cudamemcpy native 1 4096 5.501000
225 | cudamemcpy native 1 8192 6.835000
226 | cudamemcpy native 1 16384 9.161000
227 | cudamemcpy native 1 32768 13.996000
228 | cudamemcpy native 1 65536 23.676000
229 | cudamemcpy native 1 131072 36.652000
230 | cudamemcpy native 1 262144 66.976000
231 | cudamemcpy native 1 524288 127.428000
232 | cudamemcpy native 1 1048576 248.992000
233 | cudamemcpy native 1 2097152 423.326000
234 | cudamemcpy native 1 4194304 772.537000
235 | cudamemcpy native 1 8388608 1464.817000
236 | cudamemcpy native 1 16777216 2855.708000
237 | cudamemcpy native 1 33554432 5633.196000
238 | cudamemcpy native 1 67108864 11194.047000
239 | cudamemcpy native 1 134217728 22306.524000
240 | cudamemcpy native 1 268435456 44531.106000
241 | cudamemcpy native 1 536870912 88942.431000
242 | cudamemcpy native 2 1024 4.927000
243 | cudamemcpy native 2 2048 5.030000
244 | cudamemcpy native 2 4096 5.484000
245 | cudamemcpy native 2 8192 6.796000
246 | cudamemcpy native 2 16384 9.124000
247 | cudamemcpy native 2 32768 13.966000
248 | cudamemcpy native 2 65536 23.586000
249 | cudamemcpy native 2 131072 36.658000
250 | cudamemcpy native 2 262144 66.871000
251 | cudamemcpy native 2 524288 127.386000
252 | cudamemcpy native 2 1048576 276.266000
253 | cudamemcpy native 2 2097152 449.664000
254 | cudamemcpy native 2 4194304 800.574000
255 | cudamemcpy native 2 8388608 1490.734000
256 | cudamemcpy native 2 16777216 2890.811000
257 | cudamemcpy native 2 33554432 5670.958000
258 | cudamemcpy native 2 67108864 11224.960000
259 | cudamemcpy native 2 134217728 22320.847000
260 | cudamemcpy native 2 268435456 44527.648000
261 | cudamemcpy native 2 536870912 88956.920000
262 | cudamemcpy native 3 1024 4.914000
263 | cudamemcpy native 3 2048 5.064000
264 | cudamemcpy native 3 4096 5.478000
265 | cudamemcpy native 3 8192 6.788000
266 | cudamemcpy native 3 16384 9.145000
267 | cudamemcpy native 3 32768 13.995000
268 | cudamemcpy native 3 65536 23.598000
269 | cudamemcpy native 3 131072 36.605000
270 | cudamemcpy native 3 262144 66.904000
271 | cudamemcpy native 3 524288 127.529000
272 | cudamemcpy native 3 1048576 249.258000
273 | cudamemcpy native 3 2097152 423.666000
274 | cudamemcpy native 3 4194304 772.672000
275 | cudamemcpy native 3 8388608 1464.225000
276 | cudamemcpy native 3 16777216 2858.230000
277 | cudamemcpy native 3 33554432 5633.703000
278 | cudamemcpy native 3 67108864 11185.736000
279 | cudamemcpy native 3 134217728 22286.092000
280 | cudamemcpy native 3 268435456 44494.399000
281 | cudamemcpy native 3 536870912 88906.943000
282 | cudamemcpy native 4 1024 4.929000
283 | cudamemcpy native 4 2048 5.058000
284 | cudamemcpy native 4 4096 5.505000
285 | cudamemcpy native 4 8192 6.804000
286 | cudamemcpy native 4 16384 9.168000
287 | cudamemcpy native 4 32768 13.978000
288 | cudamemcpy native 4 65536 23.640000
289 | cudamemcpy native 4 131072 36.665000
290 | cudamemcpy native 4 262144 66.956000
291 | cudamemcpy native 4 524288 127.496000
292 | cudamemcpy native 4 1048576 249.023000
293 | cudamemcpy native 4 2097152 423.554000
294 | cudamemcpy native 4 4194304 772.551000
295 | cudamemcpy native 4 8388608 1463.784000
296 | cudamemcpy native 4 16777216 2858.779000
297 | cudamemcpy native 4 33554432 5634.908000
298 | cudamemcpy native 4 67108864 11197.449000
299 | cudamemcpy native 4 134217728 22302.144000
300 | cudamemcpy native 4 268435456 44528.614000
301 | cudamemcpy native 4 536870912 88931.180000
302 | cudamemcpy native 5 1024 4.916000
303 | cudamemcpy native 5 2048 5.056000
304 | cudamemcpy native 5 4096 5.495000
305 | cudamemcpy native 5 8192 6.817000
306 | cudamemcpy native 5 16384 9.193000
307 | cudamemcpy native 5 32768 14.084000
308 | cudamemcpy native 5 65536 23.629000
309 | cudamemcpy native 5 131072 36.595000
310 | cudamemcpy native 5 262144 66.844000
311 | cudamemcpy native 5 524288 127.363000
312 | cudamemcpy native 5 1048576 248.870000
313 | cudamemcpy native 5 2097152 423.485000
314 | cudamemcpy native 5 4194304 772.666000
315 | cudamemcpy native 5 8388608 1464.365000
316 | cudamemcpy native 5 16777216 2859.676000
317 | cudamemcpy native 5 33554432 5636.493000
318 | cudamemcpy native 5 67108864 11201.132000
319 | cudamemcpy native 5 134217728 22306.450000
320 | cudamemcpy native 5 268435456 44522.188000
321 | cudamemcpy native 5 536870912 88964.001000
322 | cudamemcpy native 6 1024 4.880000
323 | cudamemcpy native 6 2048 5.054000
324 | cudamemcpy native 6 4096 5.494000
325 | cudamemcpy native 6 8192 6.780000
326 | cudamemcpy native 6 16384 9.165000
327 | cudamemcpy native 6 32768 13.966000
328 | cudamemcpy native 6 65536 23.590000
329 | cudamemcpy native 6 131072 36.656000
330 | cudamemcpy native 6 262144 67.015000
331 | cudamemcpy native 6 524288 127.506000
332 | cudamemcpy native 6 1048576 249.662000
333 | cudamemcpy native 6 2097152 424.632000
334 | cudamemcpy native 6 4194304 773.746000
335 | cudamemcpy native 6 8388608 1468.640000
336 | cudamemcpy native 6 16777216 2864.502000
337 | cudamemcpy native 6 33554432 5649.572000
338 | cudamemcpy native 6 67108864 11222.734000
339 | cudamemcpy native 6 134217728 22363.116000
340 | cudamemcpy native 6 268435456 44651.063000
341 | cudamemcpy native 6 536870912 89210.271000
342 | cudamemcpy native 7 1024 4.906000
343 | cudamemcpy native 7 2048 5.061000
344 | cudamemcpy native 7 4096 5.507000
345 | cudamemcpy native 7 8192 6.801000
346 | cudamemcpy native 7 16384 9.163000
347 | cudamemcpy native 7 32768 14.023000
348 | cudamemcpy native 7 65536 23.634000
349 | cudamemcpy native 7 131072 36.718000
350 | cudamemcpy native 7 262144 67.052000
351 | cudamemcpy native 7 524288 128.877000
352 | cudamemcpy native 7 1048576 249.558000
353 | cudamemcpy native 7 2097152 424.701000
354 | cudamemcpy native 7 4194304 773.827000
355 | cudamemcpy native 7 8388608 1468.707000
356 | cudamemcpy native 7 16777216 2867.657000
357 | cudamemcpy native 7 33554432 5652.540000
358 | cudamemcpy native 7 67108864 11232.904000
359 | cudamemcpy native 7 134217728 22372.873000
360 | cudamemcpy native 7 268435456 44661.230000
361 | cudamemcpy native 7 536870912 89251.043000
362 | cudamemcpy native 8 1024 4.858000
363 | cudamemcpy native 8 2048 5.027000
364 | cudamemcpy native 8 4096 5.465000
365 | cudamemcpy native 8 8192 6.807000
366 | cudamemcpy native 8 16384 9.141000
367 | cudamemcpy native 8 32768 13.981000
368 | cudamemcpy native 8 65536 23.640000
369 | cudamemcpy native 8 131072 36.645000
370 | cudamemcpy native 8 262144 67.007000
371 | cudamemcpy native 8 524288 127.678000
372 | cudamemcpy native 8 1048576 249.723000
373 | cudamemcpy native 8 2097152 425.068000
374 | cudamemcpy native 8 4194304 775.541000
375 | cudamemcpy native 8 8388608 1469.898000
376 | cudamemcpy native 8 16777216 2869.008000
377 | cudamemcpy native 8 33554432 5658.383000
378 | cudamemcpy native 8 67108864 11239.155000
379 | cudamemcpy native 8 134217728 22381.782000
380 | cudamemcpy native 8 268435456 44677.469000
381 | cudamemcpy native 8 536870912 89271.732000
382 | cudamemcpy native 9 1024 4.917000
383 | cudamemcpy native 9 2048 5.055000
384 | cudamemcpy native 9 4096 5.491000
385 | cudamemcpy native 9 8192 6.795000
386 | cudamemcpy native 9 16384 9.172000
387 | cudamemcpy native 9 32768 13.993000
388 | cudamemcpy native 9 65536 23.643000
389 | cudamemcpy native 9 131072 36.683000
390 | cudamemcpy native 9 262144 66.824000
391 | cudamemcpy native 9 524288 127.519000
392 | cudamemcpy native 9 1048576 276.583000
393 | cudamemcpy native 9 2097152 450.542000
394 | cudamemcpy native 9 4194304 799.578000
395 | cudamemcpy native 9 8388608 1491.421000
396 | cudamemcpy native 9 16777216 2892.257000
397 | cudamemcpy native 9 33554432 5671.093000
398 | cudamemcpy native 9 67108864 11226.631000
399 | cudamemcpy native 9 134217728 22329.188000
400 | cudamemcpy native 9 268435456 44524.088000
401 | cudamemcpy native 9 536870912 88925.813000
402 | 


--------------------------------------------------------------------------------
/results/nullker-mhelper.out:
--------------------------------------------------------------------------------
  1 | prog lib count num_calls time
  2 | nullker mrcuda 0 1024 13.909000
  3 | nullker mrcuda 0 2048 27.694000
  4 | nullker mrcuda 0 4096 55.480000
  5 | nullker mrcuda 0 8192 110.551000
  6 | nullker mrcuda 0 16384 220.698000
  7 | nullker mrcuda 0 32768 376.599000
  8 | nullker mrcuda 0 65536 745.303000
  9 | nullker mrcuda 0 131072 1487.305000
 10 | nullker mrcuda 0 262144 2981.731000
 11 | nullker mrcuda 0 524288 5951.194000
 12 | nullker mrcuda 0 1048576 11905.031000
 13 | nullker mrcuda 0 2097152 23794.678000
 14 | nullker mrcuda 0 4194304 47580.513000
 15 | nullker mrcuda 0 8388608 95185.737000
 16 | nullker mrcuda 0 16777216 190206.546000
 17 | nullker mrcuda 1 1024 13.948000
 18 | nullker mrcuda 1 2048 27.847000
 19 | nullker mrcuda 1 4096 55.602000
 20 | nullker mrcuda 1 8192 110.650000
 21 | nullker mrcuda 1 16384 221.341000
 22 | nullker mrcuda 1 32768 383.100000
 23 | nullker mrcuda 1 65536 746.129000
 24 | nullker mrcuda 1 131072 1496.601000
 25 | nullker mrcuda 1 262144 2995.116000
 26 | nullker mrcuda 1 524288 5985.191000
 27 | nullker mrcuda 1 1048576 11979.065000
 28 | nullker mrcuda 1 2097152 23947.264000
 29 | nullker mrcuda 1 4194304 47903.340000
 30 | nullker mrcuda 1 8388608 95821.928000
 31 | nullker mrcuda 1 16777216 191631.296000
 32 | nullker mrcuda 2 1024 11.762000
 33 | nullker mrcuda 2 2048 23.572000
 34 | nullker mrcuda 2 4096 46.782000
 35 | nullker mrcuda 2 8192 92.995000
 36 | nullker mrcuda 2 16384 186.496000
 37 | nullker mrcuda 2 32768 373.267000
 38 | nullker mrcuda 2 65536 745.067000
 39 | nullker mrcuda 2 131072 1491.059000
 40 | nullker mrcuda 2 262144 2975.860000
 41 | nullker mrcuda 2 524288 5957.743000
 42 | nullker mrcuda 2 1048576 11907.501000
 43 | nullker mrcuda 2 2097152 23853.835000
 44 | nullker mrcuda 2 4194304 47667.329000
 45 | nullker mrcuda 2 8388608 95264.386000
 46 | nullker mrcuda 2 16777216 190606.974000
 47 | nullker mrcuda 3 1024 13.931000
 48 | nullker mrcuda 3 2048 27.930000
 49 | nullker mrcuda 3 4096 55.193000
 50 | nullker mrcuda 3 8192 109.578000
 51 | nullker mrcuda 3 16384 220.805000
 52 | nullker mrcuda 3 32768 376.383000
 53 | nullker mrcuda 3 65536 744.637000
 54 | nullker mrcuda 3 131072 1473.593000
 55 | nullker mrcuda 3 262144 2953.483000
 56 | nullker mrcuda 3 524288 5922.429000
 57 | nullker mrcuda 3 1048576 11851.660000
 58 | nullker mrcuda 3 2097152 23698.863000
 59 | nullker mrcuda 3 4194304 47307.894000
 60 | nullker mrcuda 3 8388608 94766.222000
 61 | nullker mrcuda 3 16777216 189753.937000
 62 | nullker mrcuda 4 1024 12.148000
 63 | nullker mrcuda 4 2048 24.513000
 64 | nullker mrcuda 4 4096 48.559000
 65 | nullker mrcuda 4 8192 96.675000
 66 | nullker mrcuda 4 16384 194.782000
 67 | nullker mrcuda 4 32768 387.978000
 68 | nullker mrcuda 4 65536 779.653000
 69 | nullker mrcuda 4 131072 1559.794000
 70 | nullker mrcuda 4 262144 3126.936000
 71 | nullker mrcuda 4 524288 6238.395000
 72 | nullker mrcuda 4 1048576 12470.579000
 73 | nullker mrcuda 4 2097152 24921.762000
 74 | nullker mrcuda 4 4194304 49788.988000
 75 | nullker mrcuda 4 8388608 99457.578000
 76 | nullker mrcuda 4 16777216 199119.432000
 77 | nullker mrcuda 5 1024 14.360000
 78 | nullker mrcuda 5 2048 28.614000
 79 | nullker mrcuda 5 4096 57.200000
 80 | nullker mrcuda 5 8192 113.692000
 81 | nullker mrcuda 5 16384 227.503000
 82 | nullker mrcuda 5 32768 388.961000
 83 | nullker mrcuda 5 65536 767.622000
 84 | nullker mrcuda 5 131072 1537.665000
 85 | nullker mrcuda 5 262144 3072.827000
 86 | nullker mrcuda 5 524288 6149.966000
 87 | nullker mrcuda 5 1048576 12304.688000
 88 | nullker mrcuda 5 2097152 24605.251000
 89 | nullker mrcuda 5 4194304 49200.814000
 90 | nullker mrcuda 5 8388608 98456.404000
 91 | nullker mrcuda 5 16777216 196701.645000
 92 | nullker mrcuda 6 1024 14.162000
 93 | nullker mrcuda 6 2048 28.289000
 94 | nullker mrcuda 6 4096 56.480000
 95 | nullker mrcuda 6 8192 112.021000
 96 | nullker mrcuda 6 16384 223.856000
 97 | nullker mrcuda 6 32768 391.024000
 98 | nullker mrcuda 6 65536 755.072000
 99 | nullker mrcuda 6 131072 1515.169000
100 | nullker mrcuda 6 262144 3039.992000
101 | nullker mrcuda 6 524288 6077.132000
102 | nullker mrcuda 6 1048576 12119.706000
103 | nullker mrcuda 6 2097152 24327.747000
104 | nullker mrcuda 6 4194304 48537.433000
105 | nullker mrcuda 6 8388608 97030.690000
106 | nullker mrcuda 6 16777216 194138.853000
107 | nullker mrcuda 7 1024 13.810000
108 | nullker mrcuda 7 2048 27.852000
109 | nullker mrcuda 7 4096 55.083000
110 | nullker mrcuda 7 8192 109.188000
111 | nullker mrcuda 7 16384 218.314000
112 | nullker mrcuda 7 32768 376.721000
113 | nullker mrcuda 7 65536 735.194000
114 | nullker mrcuda 7 131072 1481.617000
115 | nullker mrcuda 7 262144 2954.298000
116 | nullker mrcuda 7 524288 5911.131000
117 | nullker mrcuda 7 1048576 11806.652000
118 | nullker mrcuda 7 2097152 23656.850000
119 | nullker mrcuda 7 4194304 47241.286000
120 | nullker mrcuda 7 8388608 94611.828000
121 | nullker mrcuda 7 16777216 189050.351000
122 | nullker mrcuda 8 1024 14.095000
123 | nullker mrcuda 8 2048 28.081000
124 | nullker mrcuda 8 4096 55.970000
125 | nullker mrcuda 8 8192 110.973000
126 | nullker mrcuda 8 16384 222.415000
127 | nullker mrcuda 8 32768 390.757000
128 | nullker mrcuda 8 65536 751.369000
129 | nullker mrcuda 8 131072 1505.198000
130 | nullker mrcuda 8 262144 3009.886000
131 | nullker mrcuda 8 524288 6017.065000
132 | nullker mrcuda 8 1048576 12057.644000
133 | nullker mrcuda 8 2097152 24091.687000
134 | nullker mrcuda 8 4194304 48175.926000
135 | nullker mrcuda 8 8388608 96237.943000
136 | nullker mrcuda 8 16777216 192701.657000
137 | nullker mrcuda 9 1024 14.060000
138 | nullker mrcuda 9 2048 28.105000
139 | nullker mrcuda 9 4096 56.126000
140 | nullker mrcuda 9 8192 111.257000
141 | nullker mrcuda 9 16384 222.588000
142 | nullker mrcuda 9 32768 388.535000
143 | nullker mrcuda 9 65536 749.658000
144 | nullker mrcuda 9 131072 1509.148000
145 | nullker mrcuda 9 262144 3006.658000
146 | nullker mrcuda 9 524288 6021.239000
147 | nullker mrcuda 9 1048576 12045.826000
148 | nullker mrcuda 9 2097152 24086.604000
149 | nullker mrcuda 9 4194304 48086.814000
150 | nullker mrcuda 9 8388608 96193.359000
151 | nullker mrcuda 9 16777216 192125.235000
152 | nullker native 0 1024 1.693000
153 | nullker native 0 2048 3.328000
154 | nullker native 0 4096 6.596000
155 | nullker native 0 8192 12.316000
156 | nullker native 0 16384 24.535000
157 | nullker native 0 32768 49.045000
158 | nullker native 0 65536 98.111000
159 | nullker native 0 131072 196.670000
160 | nullker native 0 262144 392.933000
161 | nullker native 0 524288 787.145000
162 | nullker native 0 1048576 1578.461000
163 | nullker native 0 2097152 3150.883000
164 | nullker native 0 4194304 6305.144000
165 | nullker native 0 8388608 12608.160000
166 | nullker native 0 16777216 25167.428000
167 | nullker native 1 1024 1.930000
168 | nullker native 1 2048 3.870000
169 | nullker native 1 4096 7.645000
170 | nullker native 1 8192 14.576000
171 | nullker native 1 16384 29.061000
172 | nullker native 1 32768 59.444000
173 | nullker native 1 65536 116.105000
174 | nullker native 1 131072 232.417000
175 | nullker native 1 262144 395.501000
176 | nullker native 1 524288 782.965000
177 | nullker native 1 1048576 1572.530000
178 | nullker native 1 2097152 3140.093000
179 | nullker native 1 4194304 6273.480000
180 | nullker native 1 8388608 12540.560000
181 | nullker native 1 16777216 25076.293000
182 | nullker native 2 1024 1.931000
183 | nullker native 2 2048 3.850000
184 | nullker native 2 4096 7.639000
185 | nullker native 2 8192 14.556000
186 | nullker native 2 16384 29.103000
187 | nullker native 2 32768 58.037000
188 | nullker native 2 65536 116.313000
189 | nullker native 2 131072 229.299000
190 | nullker native 2 262144 392.810000
191 | nullker native 2 524288 784.319000
192 | nullker native 2 1048576 1571.662000
193 | nullker native 2 2097152 3142.692000
194 | nullker native 2 4194304 6281.323000
195 | nullker native 2 8388608 12563.783000
196 | nullker native 2 16777216 25143.079000
197 | nullker native 3 1024 1.925000
198 | nullker native 3 2048 3.851000
199 | nullker native 3 4096 7.658000
200 | nullker native 3 8192 14.506000
201 | nullker native 3 16384 29.193000
202 | nullker native 3 32768 58.076000
203 | nullker native 3 65536 116.383000
204 | nullker native 3 131072 230.686000
205 | nullker native 3 262144 391.538000
206 | nullker native 3 524288 781.478000
207 | nullker native 3 1048576 1567.546000
208 | nullker native 3 2097152 3139.788000
209 | nullker native 3 4194304 6269.944000
210 | nullker native 3 8388608 12546.773000
211 | nullker native 3 16777216 25069.748000
212 | nullker native 4 1024 1.924000
213 | nullker native 4 2048 3.862000
214 | nullker native 4 4096 7.633000
215 | nullker native 4 8192 14.547000
216 | nullker native 4 16384 29.111000
217 | nullker native 4 32768 58.003000
218 | nullker native 4 65536 116.376000
219 | nullker native 4 131072 232.829000
220 | nullker native 4 262144 398.024000
221 | nullker native 4 524288 784.508000
222 | nullker native 4 1048576 1573.480000
223 | nullker native 4 2097152 3139.734000
224 | nullker native 4 4194304 6275.518000
225 | nullker native 4 8388608 12546.614000
226 | nullker native 4 16777216 25070.691000
227 | nullker native 5 1024 1.670000
228 | nullker native 5 2048 3.293000
229 | nullker native 5 4096 6.543000
230 | nullker native 5 8192 12.294000
231 | nullker native 5 16384 24.577000
232 | nullker native 5 32768 48.953000
233 | nullker native 5 65536 97.914000
234 | nullker native 5 131072 195.743000
235 | nullker native 5 262144 392.773000
236 | nullker native 5 524288 783.770000
237 | nullker native 5 1048576 1574.795000
238 | nullker native 5 2097152 3143.471000
239 | nullker native 5 4194304 6282.858000
240 | nullker native 5 8388608 12580.392000
241 | nullker native 5 16777216 25153.583000
242 | nullker native 6 1024 1.916000
243 | nullker native 6 2048 3.872000
244 | nullker native 6 4096 7.634000
245 | nullker native 6 8192 14.544000
246 | nullker native 6 16384 29.210000
247 | nullker native 6 32768 58.203000
248 | nullker native 6 65536 116.366000
249 | nullker native 6 131072 232.485000
250 | nullker native 6 262144 393.469000
251 | nullker native 6 524288 783.498000
252 | nullker native 6 1048576 1571.326000
253 | nullker native 6 2097152 3140.647000
254 | nullker native 6 4194304 6264.444000
255 | nullker native 6 8388608 12527.807000
256 | nullker native 6 16777216 25067.070000
257 | nullker native 7 1024 1.926000
258 | nullker native 7 2048 3.839000
259 | nullker native 7 4096 7.660000
260 | nullker native 7 8192 14.561000
261 | nullker native 7 16384 29.069000
262 | nullker native 7 32768 58.140000
263 | nullker native 7 65536 116.144000
264 | nullker native 7 131072 228.217000
265 | nullker native 7 262144 392.149000
266 | nullker native 7 524288 783.238000
267 | nullker native 7 1048576 1572.047000
268 | nullker native 7 2097152 3140.495000
269 | nullker native 7 4194304 6267.781000
270 | nullker native 7 8388608 12546.230000
271 | nullker native 7 16777216 25100.963000
272 | nullker native 8 1024 1.688000
273 | nullker native 8 2048 3.300000
274 | nullker native 8 4096 6.539000
275 | nullker native 8 8192 12.336000
276 | nullker native 8 16384 24.511000
277 | nullker native 8 32768 48.958000
278 | nullker native 8 65536 97.993000
279 | nullker native 8 131072 195.946000
280 | nullker native 8 262144 392.690000
281 | nullker native 8 524288 784.694000
282 | nullker native 8 1048576 1573.828000
283 | nullker native 8 2097152 3146.828000
284 | nullker native 8 4194304 6291.588000
285 | nullker native 8 8388608 12585.252000
286 | nullker native 8 16777216 25207.118000
287 | nullker native 9 1024 1.916000
288 | nullker native 9 2048 3.869000
289 | nullker native 9 4096 7.611000
290 | nullker native 9 8192 14.613000
291 | nullker native 9 16384 29.131000
292 | nullker native 9 32768 58.367000
293 | nullker native 9 65536 116.631000
294 | nullker native 9 131072 233.111000
295 | nullker native 9 262144 395.504000
296 | nullker native 9 524288 785.364000
297 | nullker native 9 1048576 1573.954000
298 | nullker native 9 2097152 3150.368000
299 | nullker native 9 4194304 6289.309000
300 | nullker native 9 8388608 12580.401000
301 | nullker native 9 16777216 25135.924000
302 | 


--------------------------------------------------------------------------------
/scripts/Makefile.am:
--------------------------------------------------------------------------------
 1 | bin_PROGRAMS = mrcudaexec
 2 | mrcudaexec_SOURCES = mrcudaexec.py.template
 3 | 
 4 | mrcudaexec$(EXEEXT): mrcudaexec.py.template
 5 | 	cp $< mrcudaexec$(EXEEXT)
 6 | 	${SED} -i -- 's/{{ RCUDA_LIBCUDART }}/$(shell echo "${RCUDA_LIBCUDART}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT)
 7 | 	${SED} -i -- 's/{{ NVIDIA_LIBCUDART }}/$(shell echo "${NVIDIA_LIBCUDART}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT)
 8 | 	${SED} -i -- 's/{{ MRCUDA_LIBPATH }}/$(shell echo "${libdir}" | ${SED} -e 's/\//\\\//g')/g' mrcudaexec$(EXEEXT)
 9 | 
10 | 


--------------------------------------------------------------------------------
/scripts/lib64/libcudart.so:
--------------------------------------------------------------------------------
1 | ../../build/src/.libs/libcudart.so


--------------------------------------------------------------------------------
/scripts/lib64/libcudart.so.6.5:
--------------------------------------------------------------------------------
1 | libcudart.so


--------------------------------------------------------------------------------
/scripts/lib64/rCUDAcommIB.so:
--------------------------------------------------------------------------------
1 | /home/pak/Projects/rCUDAv15.07-CUDA7.0/lib/rCUDAcommIB.so


--------------------------------------------------------------------------------
/scripts/lib64/rCUDAcommTCP.so:
--------------------------------------------------------------------------------
1 | /home/pak/Projects/rCUDAv15.07-CUDA7.0/lib/rCUDAcommTCP.so


--------------------------------------------------------------------------------
/scripts/mrcudaexec.py.template:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import subprocess
  6 | from optparse import OptionParser
  7 | 
  8 | framework_directory = os.path.dirname(os.path.abspath(__file__))
  9 | framework_lib_directory = '{{ MRCUDA_LIBPATH }}'
 10 | 
 11 | def parse_args():
 12 |     parser = OptionParser(usage = '%prog -s SERVER [options] -- PROGRAM')
 13 |     parser.add_option('-t', '--network-type', dest = 'network_type',
 14 |         choices = ['IB', 'TCP',],
 15 |         default = 'TCP',
 16 |         help = 'type of network for rCUDA (IB, TCP) [default = TCP].'
 17 |     )
 18 |     parser.add_option('-n', '--number-of-devices', type = 'int',
 19 |         default = 1,
 20 |         dest = 'number_of_devices',
 21 |         help = 'number of GPU devices to be used [default = 1].'
 22 |     )
 23 |     parser.add_option('-s', '--server',
 24 |         dest = 'server_address',
 25 |         help = 'rCUDA server address.',
 26 |     )
 27 |     parser.add_option('-p', '--port', type = 'int',
 28 |         default = 8308,
 29 |         dest = 'port',
 30 |         help = 'Port number [default = 8308].',
 31 |     )
 32 |     parser.add_option('-f', '--server-file',
 33 |         dest = 'server_file',
 34 |         help = 'rCUDA server file.'
 35 |     )
 36 |     parser.add_option('--rcuda-libcudart',
 37 |         dest = 'rcuda_libcudart',
 38 |         default = '{{ RCUDA_LIBCUDART }}',
 39 |         help = 'rCUDA\'s libcudart.so path [default = \'{{ RCUDA_LIBCUDART }}\']'
 40 |     )
 41 |     parser.add_option('--nvidia-libcudart',
 42 |         dest = 'nvidia_libcudart',
 43 |         default = '{{ NVIDIA_LIBCUDART }}',
 44 |         help = 'NVIDIA\'s libcudart.so path [default = \'{{ NVIDIA_LIBCUDART }}\']'
 45 |     )
 46 |     parser.add_option('--switch-threshold', type = 'int',
 47 |         dest = 'switch_threshold',
 48 |         default = 0,
 49 |         help = 'Switching threshold value (positive integer) [default = 0]'
 50 |     )
 51 |     parser.add_option('--sock-path',
 52 |         dest = 'sock_path',
 53 |         default = '/tmp/mrcuda.sock',
 54 |         help = 'Switching socket path [default = /tmp/mrcuda.sock]'
 55 |     )
 56 |     parser.add_option('--mhelper-path',
 57 |         dest = 'mhelper_path',
 58 |         default = os.path.join(framework_directory, 'mhelper'),
 59 |         help = 'mhelper\'s path [default = %s]' % (os.path.join(framework_directory, 'mhelper'),)
 60 |     )
 61 | 
 62 |     options, args = parser.parse_args()
 63 |     if not options.server_address and not options.server_file:
 64 |         parser.error('either -s or -f option is required.')
 65 |     elif options.server_address and options.server_file:
 66 |         parser.error('-s and -f options cannot be used at the same time.')
 67 |     if options.switch_threshold != 'RCUDA' and options.switch_threshold != 'NVIDIA':
 68 |         try:
 69 |             int(options.switch_threshold)
 70 |         except ValueError:
 71 |             parser.error('Only a positive integer, "RCUDA", or "NVIDIA" are allowed as a value of --switch-threshold.')
 72 |     if len(args) == 0:
 73 |         parser.error('Please specify PROGRAM to execute.')
 74 |     return options, args
 75 | 
 76 | def main(options, args):
 77 |     ld_lib_path = framework_lib_directory
 78 | 
 79 |     program_args = args
 80 | 
 81 |     environment = os.environ.copy()
 82 |     if 'LD_LIBRARY_PATH' in environment:
 83 |         ld_lib_path = ld_lib_path + ':' + environment.get('LD_LIBRARY_PATH')
 84 | 
 85 |     environment['LD_LIBRARY_PATH'] = ld_lib_path
 86 |     environment['RCUDAPROTO'] = options.network_type
 87 |     environment['RCUDA_DEVICE_COUNT'] = str(options.number_of_devices)
 88 |     environment['MRCUDA_NVIDIA_LIB_PATH'] = options.nvidia_libcudart
 89 |     environment['MRCUDA_RCUDA_LIB_PATH'] = options.rcuda_libcudart
 90 |     environment['MRCUDA_SOCK_PATH'] = options.sock_path
 91 |     environment['MHELPER_PATH'] = options.mhelper_path
 92 | 
 93 |     i = 0
 94 |     if options.server_address:
 95 |         while i < options.number_of_devices:
 96 |             environment['RCUDA_DEVICE_' + str(i)] = options.server_address + '@' + str(options.port) + ':' + str(i)
 97 |             environment['MRCUDA_SWITCH_THRESHOLD_' + str(i)] = str(options.switch_threshold)
 98 |             i += 1
 99 |     else:
100 |         f = open(options.server_file, 'r')
101 |         server_lists = f.readlines()
102 |         f.close()
103 | 
104 |         server_lists = [s.strip() for s in server_lists if len(s.strip()) > 0]
105 |         if len(server_lists) == 0:
106 |             raise Exception('Server file does not contain any server information.')
107 | 
108 |         j = 0
109 |         while i < options.number_of_devices:
110 |             server_info = server_lists[j].split('|')
111 |             if len(server_info) != 2:
112 |                 raise Exception('Server file is not well-formed.')
113 |             environment['RCUDA_DEVICE_' + str(i)] = server_info[0]
114 |             environment['MRCUDA_SWITCH_THRESHOLD_' + str(i)] = server_info[1]
115 |             j += 1
116 |             if j >= len(server_lists):
117 |                 j = 0
118 |             i += 1
119 | 
120 |     p = subprocess.Popen(
121 |         program_args,
122 |         env = environment
123 |     )
124 |     p.wait()
125 | 
126 | if __name__ == '__main__':
127 |     options, args = parse_args()
128 |     main(options, args)
129 | 
130 | 


--------------------------------------------------------------------------------
/scripts/plotters/overhead.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import matplotlib.lines as mlines
  3 | import matplotlib.markers as mmarkers
  4 | import matplotlib.font_manager
  5 | from matplotlib import rcParams
  6 | 
  7 | rcParams['mathtext.fontset'] = 'custom'
  8 | 
  9 | import numpy as np
 10 | 
 11 | import csv
 12 | import argparse
 13 | import math
 14 | 
 15 | COLOR = ['b', 'g', 'r', 'c', 'm', 'y',]
 16 | 
 17 | def parseargs():
 18 |     """
 19 |     Manage the program arguments.
 20 |     """
 21 |     parser = argparse.ArgumentParser(
 22 |         description = 'mrCUDA overhead benchmark result plotter'
 23 |     )
 24 |     parser.add_argument('type',
 25 |         choices = ('memsync', 'memsync-bw', 'mhelper-nullker', 'mhelper-memcpybw', 'record-replay',),
 26 |         help = 'Overhead type'
 27 |     )
 28 |     parser.add_argument('resultfile', type = argparse.FileType('r'),
 29 |         help = 'Result file (csv)'
 30 |     )
 31 |     return parser.parse_args()
 32 | 
 33 | def read_memsync_input(input_file):
 34 |     # All time is in ms.
 35 |     # All sizes are in B.
 36 |     reader = csv.DictReader(input_file, delimiter = ' ')
 37 |     result = list()
 38 |     for row in reader:
 39 |         row['total_size'] = int(row['total_size'])
 40 |         row['num_regions'] = int(row['num_regions'])
 41 |         # Filter out some results to reduce size
 42 |         if math.log(row['num_regions'], 2) % 2 == 1:
 43 |             continue
 44 |         row['memsync_time'] = float(row['memsync_time'])
 45 |         row['rcuda_time'] = float(row['rcuda_time'])
 46 |         row['local_time'] = float(row['local_time'])
 47 |         row['nvidia_time'] = float(row['nvidia_time'])
 48 |         row['other_time'] = float(row['other_time'])
 49 |         row['size_per_region'] = float(row['total_size']) / float(row['num_regions'])
 50 |         row['bw'] = row['total_size'] / row['nvidia_time'] * (10 ** -3) # MB / s
 51 |         result.append(row)
 52 |     return result
 53 | 
 54 | def plot_memsync(input_data):
 55 |     properties = {
 56 |         'bw_coef': 0.04721 * (10 ** 6), # 1 / s
 57 |         'bw_max': 4778.505 * (10 ** 6), # B / s
 58 |         'memsync_coef': 5.686 * (10 ** -11), # s / B
 59 |         'memsync_const': 0, # s
 60 |     }
 61 | 
 62 |     group_dict = dict()
 63 |     predicted_dict = dict()
 64 |     for data in input_data:
 65 |         if data['num_regions'] not in group_dict:
 66 |             group_dict[data['num_regions']] = [list(), list(),]
 67 |         group_data = group_dict[data['num_regions']]
 68 |         group_data[0].append(data['size_per_region'])
 69 |         group_data[1].append(data['local_time'] / 1000) 
 70 | 
 71 |         if data['num_regions'] not in predicted_dict:
 72 |             predicted_dict[data['num_regions']] = dict()
 73 |         if data['size_per_region'] not in predicted_dict[data['num_regions']]:
 74 |             predicted_dict[data['num_regions']][data['size_per_region']] = data['num_regions'] * (properties['memsync_coef'] * data['size_per_region'] + properties['memsync_const'] + data['size_per_region'] / min(properties['bw_max'], properties['bw_coef'] * data['size_per_region']))
 75 | 
 76 |     legend_list = list()
 77 |     i = 0
 78 |     for num_regions, group_data in sorted(group_dict.items(), key = lambda item: item[0]):
 79 |         plt.scatter(group_data[0], group_data[1], 
 80 |             c = COLOR[i % len(COLOR)],
 81 |             marker = 'o' if i < len(COLOR) else '+',
 82 |             s = 40
 83 |         )
 84 |         x, y = zip(*sorted(predicted_dict[num_regions].items(), key = lambda item: item[0]))
 85 |         p = plt.plot(x, y, COLOR[i % len(COLOR)], linewidth = 4)
 86 |         legend_list.append((p[0], '$\mathbf{2^{%d}}$ regions' % (math.log(num_regions, 2),),))
 87 |         i += 1
 88 | 
 89 |     p = mlines.Line2D([], [], color = 'black', linewidth = 4)
 90 |     legend_list.append((p, 'Predicted',))
 91 |     p = mlines.Line2D([], [], color = 'black', marker = 'o', markersize = 16, linestyle = 'None')
 92 |     legend_list.append((p, 'Measured',))
 93 | 
 94 |     legend_list.reverse()
 95 | 
 96 |     plt.legend(zip(*legend_list)[0], zip(*legend_list)[1],
 97 |         loc = 'upper left',
 98 |         prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold')
 99 |     )
100 |     plt.xscale('log', basex = 2)
101 |     plt.yscale('log', basey = 10)
102 |     plt.xlim(xmin = 0)
103 |     plt.ylim(ymin = 0)
104 | 
105 |     plt.xlabel('$\mathbf{data\_size_i}$ (B)', size = 40, weight = 'bold')
106 |     plt.ylabel('Time (s)', size = 40, weight = 'bold')
107 | 
108 |     plt.xticks(size = 35, weight = 'bold')
109 |     plt.yticks(size = 35, weight = 'bold')
110 | 
111 |     plt.show()
112 | 
113 | def plot_memsync_bw(input_data):
114 |     properties = {
115 |         'bw_coef': 0.04721 * (10 ** 6), # 1 / s
116 |         'bw_max': 4778.505 * (10 ** 6), # B / s
117 |         'memsync_coef': 5.686 * (10 ** -11), # s / B
118 |         'memsync_const': 0, # s
119 |     }
120 | 
121 |     measured_data = [(row['size_per_region'], row['bw'],) for row in input_data]
122 |     predicted_data = [(size_per_region, min(properties['bw_max'], properties['bw_coef'] * size_per_region) * (10 ** -6),) for size_per_region in sorted(set(zip(*measured_data)[0]))]
123 | 
124 |     legend_list = list()
125 |     p = plt.scatter(
126 |         zip(*measured_data)[0],
127 |         zip(*measured_data)[1],
128 |         c = COLOR[0],
129 |         marker = 'o',
130 |         s = 40
131 |     )
132 |     legend_list.append((p, 'Measured',))
133 |     x, y = zip(*predicted_data)
134 |     plt.plot(x, y, COLOR[0], linewidth = 4)
135 |     p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4)
136 |     legend_list.append((p, 'Predicted',))
137 | 
138 |     plt.legend(zip(*legend_list)[0], zip(*legend_list)[1],
139 |         loc = 'upper left',
140 |         prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold')
141 |     )
142 |     plt.xscale('log', basex = 2)
143 |     plt.yscale('log', basey = 10)
144 |     plt.xlim(xmin = 0)
145 |     plt.ylim(ymin = 0)
146 | 
147 |     plt.xlabel('Size per region (B)', size = 30, weight = 'bold')
148 |     plt.ylabel('Bandwidth (MB / s)', size = 30, weight = 'bold')
149 | 
150 |     plt.xticks(size = 25, weight = 'bold')
151 |     plt.yticks(size = 25, weight = 'bold')
152 | 
153 |     plt.show()
154 | 
155 | def read_mhelper_input(input_file):
156 |     # All time is in ms.
157 |     # All sizes are in B.
158 |     reader = csv.DictReader(input_file, delimiter = ' ')
159 |     result = list()
160 |     for row in reader:
161 |         row['count'] = int(row['count'])
162 |         row['time'] = float(row['time'])
163 |         if 'num_calls' in row:
164 |             row['num_calls'] = int(row['num_calls'])
165 |         else:
166 |             row['size_per_call'] = int(row['size_per_call'])
167 |         result.append(row)
168 |     return result
169 | 
170 | def plot_mhelper_nullker(input_data):
171 |     properties = {
172 |         'coefd': 6.87138 * (10 ** -10), #s
173 |         'coefc': 9.98263 * (10 ** -6), # s
174 |         'const': 0.00293373, # s
175 |     }
176 | 
177 |     native_data = dict()
178 |     mrcuda_data = dict()
179 |     for data in input_data:
180 |         if data['lib'] == 'native':
181 |             data_dict = native_data
182 |         else:
183 |             data_dict = mrcuda_data
184 |         if data['num_calls'] not in data_dict:
185 |             data_dict[data['num_calls']] = list()
186 |         data_dict[data['num_calls']].append(data['time'])
187 | 
188 |     x_values = list()
189 |     y_values = list()
190 | 
191 |     for num_calls in native_data.iterkeys():
192 |         avg_time = np.average(native_data[num_calls])
193 |         for time in mrcuda_data[num_calls]:
194 |             x_values.append(num_calls)
195 |             y_values.append((time - avg_time) * (10 ** -3)) # seconds
196 | 
197 |     legend_list = list()
198 | 
199 |     p = plt.scatter(
200 |         x_values,
201 |         y_values,
202 |         c = COLOR[0],
203 |         marker = 'o',
204 |         s = 40
205 |     )
206 |     legend_list.append((p, 'Measured',))
207 | 
208 |     x_values = sorted(set(x_values))
209 |     y_values = [properties['coefc'] * x + properties['const'] for x in x_values]
210 | 
211 |     plt.plot(x_values, y_values, COLOR[0], linewidth = 4)
212 |     p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4)
213 |     legend_list.append((p, 'Predicted',))
214 | 
215 |     plt.legend(zip(*legend_list)[0], zip(*legend_list)[1],
216 |         loc = 'upper left',
217 |         prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold')
218 |     )
219 |     plt.xscale('log', basex = 2)
220 |     plt.yscale('log', basey = 10)
221 |     plt.xlim(xmin = 0)
222 |     plt.ylim(ymin = 0)
223 | 
224 |     plt.xlabel('Number of calls', size = 30, weight = 'bold')
225 |     plt.ylabel('Time (s)', size = 30, weight = 'bold')
226 | 
227 |     plt.xticks(size = 25, weight = 'bold')
228 |     plt.yticks(size = 25, weight = 'bold')
229 | 
230 |     plt.show()
231 | 
232 | def plot_mhelper_memcpybw(input_data):
233 |     properties = {
234 |         'coefd': 6.87138 * (10 ** -10), #s
235 |         'coefc': 9.98263 * (10 ** -6), # s
236 |         'const': 0.00293373, # s
237 |         'num_calls': 1000,
238 |     }
239 | 
240 |     native_data = dict()
241 |     mrcuda_data = dict()
242 |     for data in input_data:
243 |         if data['lib'] == 'native':
244 |             data_dict = native_data
245 |         else:
246 |             data_dict = mrcuda_data
247 |         if data['size_per_call'] not in data_dict:
248 |             data_dict[data['size_per_call']] = list()
249 |         data_dict[data['size_per_call']].append(data['time'])
250 | 
251 |     x_values = list()
252 |     y_values = list()
253 | 
254 |     for size_per_call in native_data.iterkeys():
255 |         avg_time = np.average(native_data[size_per_call])
256 |         for time in mrcuda_data[size_per_call]:
257 |             x_values.append(size_per_call)
258 |             y_values.append((time - avg_time) * (10 ** -3)) # seconds
259 | 
260 |     legend_list = list()
261 | 
262 |     p = plt.scatter(
263 |         x_values,
264 |         y_values,
265 |         c = COLOR[0],
266 |         marker = 'o',
267 |         s = 40
268 |     )
269 |     legend_list.append((p, 'Measured',))
270 | 
271 |     x_values = sorted(set(x_values))
272 |     y_values = [properties['coefd'] * x * properties['num_calls'] + properties['coefc'] * properties['num_calls'] + properties['const'] for x in x_values]
273 | 
274 |     plt.plot(x_values, y_values, COLOR[0], linewidth = 4)
275 |     p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4)
276 |     legend_list.append((p, 'Predicted',))
277 | 
278 |     plt.legend(zip(*legend_list)[0], zip(*legend_list)[1],
279 |         loc = 'upper left',
280 |         prop = matplotlib.font_manager.FontProperties(size = 40, weight = 'bold')
281 |     )
282 |     plt.xscale('log', basex = 2)
283 |     plt.yscale('log', basey = 10)
284 |     plt.xlim(xmin = 0)
285 |     plt.ylim(ymin = 0)
286 | 
287 |     plt.xlabel('Size per calls (B)', size = 40, weight = 'bold')
288 |     plt.ylabel('Time (s)', size = 40, weight = 'bold')
289 | 
290 |     plt.xticks(size = 35, weight = 'bold')
291 |     plt.yticks(size = 35, weight = 'bold')
292 | 
293 |     plt.show()
294 | 
295 | def read_record_replay_input(input_file):
296 |     # All time is in s.
297 |     reader = csv.DictReader(input_file, delimiter = ',')
298 |     result = list()
299 |     for row in reader:
300 |         if row['mrcuda_switch num_replay']:
301 |             row['mrcuda_record time'] = float(row['mrcuda_record time'])
302 |             row['mrcuda_switch time'] = float(row['mrcuda_switch time'])
303 |             row['mrcuda_sync_mem time'] = float(row['mrcuda_sync_mem time'])
304 |             row['mrcuda_replay time'] = row['mrcuda_switch time'] - row['mrcuda_sync_mem time']
305 |             row['mrcuda_switch num_replay'] = int(row['mrcuda_switch num_replay'])
306 |             result.append(row)
307 |     return result
308 | 
309 | def plot_record_replay(input_data):
310 |     properties = {
311 |         'record_coef': 2.825 * (10 ** -7), # s
312 |         'record_const': 0.3437 * (10 ** -3), # s
313 |         'replay_coef': 1.031 * (10 ** -6), # s
314 |         'replay_const': 1.2437, # s
315 |     }
316 | 
317 |     fig, ax1 = plt.subplots()
318 |     ax2 = ax1.twinx()
319 | 
320 |     legend_list = list()
321 | 
322 |     x_values = [row['mrcuda_switch num_replay'] for row in input_data]
323 | 
324 |     p = ax1.scatter(
325 |         x_values,
326 |         [row['mrcuda_record time'] for row in input_data],
327 |         c = COLOR[0],
328 |         marker = 'o',
329 |         s = 40
330 |     )
331 |     legend_list.append((p, 'Record Overhead (Measured)',))
332 | 
333 |     p = ax2.scatter(
334 |         x_values,
335 |         [row['mrcuda_replay time'] for row in input_data],
336 |         c = COLOR[1],
337 |         marker = 'o',
338 |         s = 40
339 |     )
340 |     legend_list.append((p, 'Replay Overhead (Measured)',))
341 | 
342 |     x_values = sorted(set(x_values))
343 | 
344 |     ax1.plot(
345 |         x_values, 
346 |         [properties['record_coef'] * x + properties['record_const'] for x in x_values], 
347 |         COLOR[0], 
348 |         linewidth = 4
349 |     )
350 |     p = mlines.Line2D([], [], color = COLOR[0], linewidth = 4)
351 |     legend_list.append((p, 'Record Overhead (Predicted)',))
352 | 
353 |     ax2.plot(
354 |         x_values, 
355 |         [properties['replay_coef'] * x + properties['replay_const'] for x in x_values], 
356 |         COLOR[1], 
357 |         linewidth = 4
358 |     )
359 |     p = mlines.Line2D([], [], color = COLOR[1], linewidth = 4)
360 |     legend_list.append((p, 'Replay Overhead (Predicted)',))
361 | 
362 |     plt.legend(zip(*legend_list)[0], zip(*legend_list)[1],
363 |         loc = 'lower right',
364 |         prop = matplotlib.font_manager.FontProperties(size = 30, weight = 'bold')
365 |     )
366 |     #plt.xscale('log', basex = 2)
367 |     #plt.yscale('log', basey = 10)
368 |     ax1.set_xlim(xmin = 0)
369 |     ax2.set_xlim(xmin = 0)
370 |     ax1.set_ylim(ymin = 0)
371 |     ax2.set_ylim(ymin = 0)
372 | 
373 |     ax1.set_xlabel('num_record (x10,000)', size = 30, weight = 'bold')
374 |     ax1.set_ylabel('Record Time (ms)', size = 30, weight = 'bold')
375 |     ax2.set_ylabel('Replay Time (s)', size = 30, weight = 'bold')
376 | 
377 |     ax1.set_xticklabels(['%d' % (int(label) / 10000,) for label in ax1.get_xticks().tolist()])
378 | 
379 |     for label in ax1.get_xticklabels():
380 |         label.set_fontsize(25)
381 |         label.set_fontweight('bold')
382 | 
383 |     ax1.set_yticklabels(['%d' % (float(label) * 1000,) for label in ax1.get_yticks().tolist()])
384 | 
385 |     for label in ax1.get_yticklabels():
386 |         label.set_fontsize(25)
387 |         label.set_fontweight('bold')
388 |     for label in ax2.get_yticklabels():
389 |         label.set_fontsize(25)
390 |         label.set_fontweight('bold')
391 | 
392 |     plt.show()
393 | 
394 | def main():
395 |     """
396 |     Main function.
397 |     """
398 |     args = parseargs()
399 | 
400 |     if args.type == 'memsync':
401 |         input_data = read_memsync_input(args.resultfile)
402 |         plot_memsync(input_data)
403 |     elif args.type == 'memsync-bw':
404 |         input_data = read_memsync_input(args.resultfile)
405 |         plot_memsync_bw(input_data)
406 |     elif args.type == 'mhelper-nullker':
407 |         input_data = read_mhelper_input(args.resultfile)
408 |         plot_mhelper_nullker(input_data)
409 |     elif args.type == 'mhelper-memcpybw':
410 |         input_data = read_mhelper_input(args.resultfile)
411 |         plot_mhelper_memcpybw(input_data)
412 |     elif args.type == 'record-replay':
413 |         input_data = read_record_replay_input(args.resultfile)
414 |         plot_record_replay(input_data)
415 | 
416 | if __name__ == "__main__":
417 |     main()
418 | 
419 | 


--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | bin_PROGRAMS = mhelper
 2 | mhelper_SOURCES = mhelper.c intercomm_mem.c
 3 | mhelper_LDFLAGS = -lcuda -lcudart $(DEPS_LIBS)
 4 | mhelper_CPPFLAGS = -I/usr/local/cuda/include $(DEPS_CFLAGS)
 5 | 
 6 | lib_LTLIBRARIES = libcudart.la
 7 | libcudart_la_SOURCES = comm.c interface.c mrcuda.c record.c intercomm.c intercomm_mem.c intercomm_interface.c
 8 | libcudart_la_LDFLAGS = -avoid-version -shared -ldl $(DEPS_LIBS)
 9 | libcudart_la_CPPFLAGS = -I/usr/local/cuda/include $(DEPS_CFLAGS)
10 | 
11 | lib_LIBRARIES = libcomm.a
12 | libcomm_a_SOURCES = comm.c comm.h common.h
13 | libcomm_a_CPPFLAGS = -lpthread -pthread $(DEPS_CFLAGS)
14 | 
15 | install-exec-hook:
16 | 	${LN_S} ${RCUDA_RCUDACOMMIB} ${libdir}
17 | 	${LN_S} ${RCUDA_RCUDACOMMTCP} ${libdir}
18 | 	${LN_S} ${libdir}/libcudart.so ${libdir}/libcudart.so.7.0
19 | 
20 | 


--------------------------------------------------------------------------------
/src/comm.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <sys/types.h>
  5 | #include <sys/stat.h>
  6 | #include <fcntl.h>
  7 | #include <unistd.h>
  8 | #include <pthread.h>
  9 | 
 10 | #include "comm.h"
 11 | 
 12 | #define LISTEN_BACKLOG 1
 13 | 
 14 | typedef struct __MRCUDAComm
 15 | {
 16 | 	pthread_t listeningThread;
 17 | 
 18 | 	char *path;
 19 | 	void (*callback)(void);
 20 | 
 21 | 	int fd;
 22 | } __MRCUDAComm;
 23 | 
 24 | static __MRCUDAComm __mrcudaCommObj;
 25 | 
 26 | 
 27 | /**
 28 |  * Terminate the socket.
 29 |  */
 30 | static void __mrcuda_comm_fini()
 31 | {
 32 | 	DPRINTF("ENTER __mrcuda_comm_fini.\n");
 33 | 	close(__mrcudaCommObj.fd);
 34 | 	unlink(__mrcudaCommObj.path);
 35 | 	free(__mrcudaCommObj.path);
 36 | 	DPRINTF("EXIT __mrcuda_comm_fini.\n");
 37 | }
 38 | 
 39 | /**
 40 |  * This function creates a FIFO file specified by the path.
 41 |  * If it fails to do so for any reasons, it returns the error number; otherwise, return 0
 42 |  * @param path path of the FIFO file to be created.
 43 |  * @return 0 if success, otherwise the error number.
 44 |  */
 45 | static int __mrcuda_comm_init(char *path)
 46 | {
 47 | 	DPRINTF("ENTER __mrcuda_comm_init.\n");
 48 | 
 49 | 	DPRINTF("__mrcuda_comm_init allocates __mrcudaCommObj.path\n");
 50 | 	if((__mrcudaCommObj.path = (char *)malloc(strlen(path) + 1)) == NULL)
 51 | 		goto __mrcuda_comm_init_err_1;
 52 | 
 53 | 	DPRINTF("__mrcuda_comm_init strcpy path.\n");
 54 | 	strcpy(__mrcudaCommObj.path, path);
 55 | 
 56 | 	DPRINTF("__mrcuda_comm_init mkfifo.\n");
 57 | 	if(mkfifo(__mrcudaCommObj.path, 0666) == -1)
 58 | 		goto __mrcuda_comm_init_err_2;
 59 | 
 60 | 	DPRINTF("EXIT SUCCESS __mrcuda_comm_init.\n");
 61 | 	return 0;
 62 | 
 63 | __mrcuda_comm_init_err_2:
 64 | 	free(__mrcudaCommObj.path);
 65 | __mrcuda_comm_init_err_1:
 66 | 	DPRINTF("EXIT FAILURE __mrcuda_comm_init.\n");
 67 | 	return -1;
 68 | }
 69 | 
 70 | 
 71 | /**
 72 |  * This is the main loop for repeatedly listening to a signal.
 73 |  * If it receives a correct signal, it terminates the socket and calls the callback.
 74 |  * This function should be called from a different thread since it blocks the execution.
 75 |  */
 76 | static void *__mrcuda_comm_listening_main_loop(void *arg)
 77 | {
 78 | 	DPRINTF("ENTER __mrcuda_comm_listening_main_loop.\n");
 79 | 
 80 | 	#define BUF_SIZE 1
 81 | 
 82 | 	char buf[BUF_SIZE];
 83 | 	ssize_t readSize;
 84 | 
 85 | 	DPRINTF("__mrcuda_comm_init open file.\n");
 86 | 	if((__mrcudaCommObj.fd = open(__mrcudaCommObj.path, O_RDONLY)) == -1)
 87 | 		goto __mrcuda_comm_listening_main_loop_err_1;
 88 | 
 89 | 	while(1)
 90 | 	{
 91 | 		DPRINTF("__mrcuda_comm_listening_main_loop is waiting.\n");
 92 | 		if((readSize = read(__mrcudaCommObj.fd, buf, BUF_SIZE)) == -1)
 93 | 			goto __mrcuda_comm_listening_main_loop_err_1;
 94 | 		DPRINTF("__mrcuda_comm_listening_main_loop received a signal.\n");
 95 | 		if(strncmp(buf, "1", BUF_SIZE) == 0)
 96 | 		{
 97 | 			DPRINTF("__mrcuda_comm_listening_main_loop calls the callback.\n");
 98 | 			__mrcudaCommObj.callback();
 99 | 			break;
100 | 		}
101 | 	}
102 | 
103 | __mrcuda_comm_listening_main_loop_err_1:
104 | 	__mrcuda_comm_fini();
105 | 
106 | 	DPRINTF("EXIT __mrcuda_comm_listening_main_loop.\n");
107 | 
108 | 	#undef BUF_SIZE
109 | }
110 | 
111 | /**
112 |  * This function starts listening to a signal that tells the system to switch to native CUDA.
113 |  * After it receives the signal, this function calls the callback and terminates the socket.
114 |  * This function executes the listening process in a different thread; thus, it returns almost immediately.
115 |  * Note: if the signal is not well form, this function will simply skips that signal and not calls the callback.
116 |  * @param path path for creating a new UNIX socket for listening to the signal.
117 |  * @param callback the function that will be called after received a signal.
118 |  * @return 0 if success, the error number otherwise.
119 |  */
120 | int mrcuda_comm_listen_for_signal(char *path, void (*callback)(void))
121 | {
122 | 	DPRINTF("ENTER mrcuda_comm_listen_for_signal.\n");
123 |     int ret = 0;
124 |     if((ret = __mrcuda_comm_init(path)) != 0)
125 |         return ret;
126 | 	__mrcudaCommObj.callback = callback;
127 | 
128 | 	DPRINTF("mrcuda_comm_listen_for_signal creates a thread.\n");
129 |     if((ret = pthread_create(&(__mrcudaCommObj.listeningThread), NULL, &__mrcuda_comm_listening_main_loop, NULL)) != 0)
130 |         __mrcuda_comm_fini();
131 | 	
132 | 
133 | 	DPRINTF("EXIT mrcuda_comm_listen_for_signal.\n");
134 |     return ret;
135 | }
136 | 
137 | 


--------------------------------------------------------------------------------
/src/comm.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRCUDA_COMM__HEADER__
 2 | #define __MRCUDA_COMM__HEADER__
 3 | 
 4 | #include "common.h"
 5 | 
 6 | /**
 7 |  * This function starts listening to a signal that tells the system to switch to native CUDA.
 8 |  * After it receives the signal, this function calls the callback and terminates the socket.
 9 |  * This function executes the listening process in a different thread; thus, it returns almost immediately.
10 |  * Note: if the signal is not well form, this function will simply skips that signal and not calls the callback.
11 |  * @param path path for creating a new UNIX socket for listening to the signal.
12 |  * @param callback the function that will be called after received a signal.
13 |  * @return 0 if success, the error number otherwise.
14 |  */
15 | int mrcuda_comm_listen_for_signal(char *path, void (*callback)(void));
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRCUDA_COMMON__HEADER__
 2 | #define __MRCUDA_COMMON__HEADER__
 3 | 
 4 | #include <config.h>
 5 | 
 6 | #include <stdio.h>
 7 | #include <glib.h>
 8 | #include <sys/time.h>
 9 | 
10 | #if DEBUG
11 |     #define DPRINTF(fmt, ...) \
12 |         do {fprintf(stderr, "FILE: " __FILE__ ", LINE: %d, " fmt, __LINE__, ##__VA_ARGS__);} while(0)
13 | #else
14 |     #define DPRINTF(fmt, ...) \
15 |         do {;;} while(0)
16 | #endif
17 | 
18 | #define REPORT_ERROR_AND_EXIT(...) \
19 |     do { \
20 |         perror("FATAL ERROR"); \
21 |         fprintf(stderr, __VA_ARGS__); \
22 |         exit(EXIT_FAILURE); \
23 |     } while(0)
24 | 
25 | #define STARTTIMMER() \
26 |     struct timeval t1, t2; \
27 |     gettimeofday(&t1, NULL);
28 | 
29 | #define ENDTIMMER(acctime) \
30 |     gettimeofday(&t2, NULL); \
31 |     acctime += (t2.tv_sec - t1.tv_sec) * 1000.0 + (t2.tv_usec - t1.tv_usec) / 1000.0;
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/src/intercomm.c:
--------------------------------------------------------------------------------
  1 | #include <unistd.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <signal.h>
  5 | #include <cuda_runtime.h>
  6 | 
  7 | #include "intercomm.h"
  8 | #include "datatypes.h"
  9 | 
 10 | /**
 11 |  * Create a helper process and assign the mrcudaGPU to it.
 12 |  * @param mrcudaGPU the GPU information to assign to the created process.
 13 |  * @param helperProgPath the path to the helper application.
 14 |  * @param gpuID the ID of the GPU the helper application will use.
 15 |  * @return a ptr to the created process on success; NULL otherwise.
 16 |  */
 17 | MHelperProcess_t *mhelper_create(MRCUDAGPU_t *mrcudaGPU, const char *helperProgPath, int gpuID)
 18 | {
 19 |     int rPipePair[2], wPipePair[2];
 20 |     MHelperProcess_t *mhelperProcess;
 21 |     pid_t pid;
 22 |     char gpuIDStr[15];
 23 | 
 24 |     if (pipe(rPipePair) != 0)
 25 |         goto __mhelper_create_err_0;
 26 |     if (pipe(wPipePair) != 0)
 27 |         goto __mhelper_create_err_1;
 28 |     if ((mhelperProcess = malloc(sizeof(MHelperProcess_t))) == NULL)
 29 |         goto __mhelper_create_err_2;
 30 |     pid = fork();
 31 |     if (pid == 0) {   // child process
 32 |         close(wPipePair[1]);
 33 |         close(rPipePair[0]);
 34 |         dup2(wPipePair[0], fileno(stdin));
 35 |         dup2(rPipePair[1], fileno(stdout));
 36 |         sprintf(gpuIDStr, "%d", gpuID);
 37 |         execl(helperProgPath, helperProgPath, gpuIDStr, "\0");
 38 |         perror("Helper Program Exec");
 39 |         _exit(EXIT_FAILURE);
 40 |     }
 41 |     else if (pid < 0)   // error; cannot fork
 42 |         goto __mhelper_create_err_3;
 43 |     else {  // parent process
 44 |         close(wPipePair[0]);
 45 |         close(rPipePair[1]);
 46 |         mhelperProcess->readPipe = rPipePair[0];
 47 |         mhelperProcess->writePipe = wPipePair[1];
 48 |         mhelperProcess->pid = pid;
 49 |         if (mhelper_int_init(&(mhelperProcess->handle), mhelperProcess) != 0)
 50 |             goto __mhelper_create_err_3;
 51 |         mrcudaGPU->mhelperProcess = mhelperProcess;
 52 |         return mhelperProcess;
 53 |     }
 54 | 
 55 | __mhelper_create_err_3:
 56 |     free(mhelperProcess);
 57 | __mhelper_create_err_2:
 58 |     close(wPipePair[0]);
 59 |     close(wPipePair[1]);
 60 | __mhelper_create_err_1:
 61 |     close(rPipePair[0]);
 62 |     close(rPipePair[1]);
 63 | __mhelper_create_err_0:
 64 |     return NULL;
 65 | }
 66 | 
 67 | /**
 68 |  * Destroy the helper process.
 69 |  * @param process the process to be destroyed.
 70 |  * @return 0 on success; another number otherwise.
 71 |  */
 72 | int mhelper_destroy(MHelperProcess_t *process)
 73 | {
 74 |     int ret = kill(process->pid, SIGQUIT);
 75 |     if (ret == 0)
 76 |         free(process);
 77 |     return ret;
 78 | }
 79 | 
 80 | /**
 81 |  * Ask the process to execute the command.
 82 |  * @param process the process that will execute the specified command.
 83 |  * @param command the command to be executed on the process.
 84 |  * @return the result of the execution.
 85 |  */
 86 | MHelperResult_t mhelper_call(MHelperProcess_t *process, MHelperCommand_t command)
 87 | {
 88 |     ssize_t n;
 89 |     size_t remainingSize = sizeof(MHelperCommand_t);
 90 |     char *buf = (char *)&command;
 91 |     MHelperResult_t result;
 92 | 
 93 |     while (remainingSize > 0) {
 94 |         n = write(process->writePipe, buf, remainingSize);
 95 |         if (n < 0)
 96 |             goto __mhelper_call_err_0;
 97 |         remainingSize -= n;
 98 |         buf += n;
 99 |     }
100 | 
101 |     remainingSize = sizeof(MHelperResult_t);
102 |     buf = (char *)&result;
103 |     while (remainingSize > 0) {
104 |         n = read(process->readPipe, buf, remainingSize);
105 |         if (n < 0)
106 |             goto __mhelper_call_err_0;
107 |         remainingSize -= n;
108 |         buf += n;
109 |     }
110 |     if (result.id != command.id || result.type != command.type)
111 |         goto __mhelper_call_err_0;
112 |     return result;
113 | 
114 | __mhelper_call_err_0:
115 |     result.id = command.id;
116 |     result.type = command.type;
117 |     result.internalError = -1;
118 |     result.cudaError = cudaSuccess;
119 |     return result;
120 | }
121 | 
122 | /**
123 |  * Generate a unique ID for a command to be used with the specified mrcudaGPU.
124 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t
125 |  * @return a unique ID
126 |  */
127 | int mhelper_generate_command_id(MRCUDAGPU_t *mrcudaGPU)
128 | {
129 |     return (rand() << 4) | mrcudaGPU->virtualNumber;
130 | }
131 | 
132 | 


--------------------------------------------------------------------------------
/src/intercomm.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRCUDA_INTERCOMM__HEADER__
 2 | #define __MRCUDA_INTERCOMM__HEADER__
 3 | 
 4 | #include "datatypes.h"
 5 | #include "intercomm_mem.h"
 6 | 
 7 | /**
 8 |  * Create a helper process and assign the mrcudaGPU to it.
 9 |  * @param mrcudaGPU the GPU information to assign to the created process.
10 |  * @param helperProgPath the path to the helper application.
11 |  * @param gpuID the ID of the GPU the helper application will use.
12 |  * @return a ptr to the created process on success; NULL otherwise.
13 |  */
14 | MHelperProcess_t *mhelper_create(MRCUDAGPU_t *mrcudaGPU, const char *helperProgPath, int gpuID);
15 | 
16 | /**
17 |  * Destroy the helper process.
18 |  * @param process the process to be destroyed.
19 |  * @return 0 on success; another number otherwise.
20 |  */
21 | int mhelper_destroy(MHelperProcess_t *process);
22 | 
23 | /**
24 |  * Ask the process to execute the command.
25 |  * @param process the process that will execute the specified command.
26 |  * @param command the command to be executed on the process.
27 |  * @return the result of the execution.
28 |  */
29 | MHelperResult_t mhelper_call(MHelperProcess_t *process, MHelperCommand_t command);
30 | 
31 | /**
32 |  * Generate a unique ID for a command to be used with the specified mrcudaGPU.
33 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t
34 |  * @return a unique ID
35 |  */
36 | int mhelper_generate_command_id(MRCUDAGPU_t *mrcudaGPU);
37 | 
38 | #endif /* __MRCUDA_INTERCOMM__HEADER__ */
39 | 
40 | 


--------------------------------------------------------------------------------
/src/intercomm_interface.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRUCDA_INTERCOMM_INTERFACE__HEADER__
 2 | #define __MRCUDA_INTERCOMM_INTERFACE__HEADER__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | #include "datatypes.h"
 7 | 
 8 | /**
 9 |  * Initialize a handler with a helper process.
10 |  * @param handler output of initialized handler.
11 |  * @param process a ptr to a helper process.
12 |  * @return 0 on success; -1 otherwise.
13 |  */
14 | int mhelper_int_init(MRCUDASym_t **handler, MHelperProcess_t *process);
15 | 
16 | 
17 | /* Interfaces */
18 | 
19 | /**
20 |  * Create a context on the helper process.
21 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t a context will be created on.
22 |  * @return 0 on success; -1 otherwise.
23 |  */
24 | int mhelper_int_cuCtxCreate_internal(MRCUDAGPU_t *mrcudaGPU);
25 | 
26 | void **mhelper_int_cudaRegisterFatBinary(void *fatCubin);
27 | void **mhelper_int_cudaRegisterFatBinary_internal(MRCUDAGPU_t *mrcudaGPU, void *fatCubin);
28 | 
29 | void mhelper_int_cudaUnregisterFatBinary(void **fatCubinHandle);
30 | void mhelper_int_cudaUnregisterFatBinary_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle);
31 | 
32 | void mhelper_int_cudaRegisterVar(void **fatCubinHandle, char *hostVar, char *deviceAddress, const char *deviceName, int ext, int size, int constant, int global);
33 | void mhelper_int_cudaRegisterVar_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, char *hostVar, char *deviceAddress, const char *deviceName, int ext, int size, int constant, int global);
34 | 
35 | void mhelper_int_cudaRegisterTexture(void **fatCubinHandle, const struct textureReference *hostVar, const void **deviceAddress, const char *deviceName, int dim, int norm, int ext);
36 | void mhelper_int_cudaRegisterTexture_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, const struct textureReference *hostVar, const void **deviceAddress, const char *deviceName, int dim, int norm, int ext);
37 | 
38 | void mhelper_int_cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize);
39 | void mhelper_int_cudaRegisterFunction_internal(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize);
40 | 
41 | cudaError_t mhelper_int_cudaLaunch(const void *func);
42 | cudaError_t mhelper_int_cudaLaunch_internal(MRCUDAGPU_t *mrcudaGPU, const void *func);
43 | 
44 | cudaError_t mhelper_int_cudaHostAlloc(void **pHost,  size_t size,  unsigned int flags);
45 | 
46 | cudaError_t mhelper_int_cudaDeviceReset(void);
47 | cudaError_t mhelper_int_cudaDeviceReset_internal(MRCUDAGPU_t *mrcudaGPU);
48 | 
49 | cudaError_t mhelper_int_cudaDeviceSynchronize(void);
50 | cudaError_t mhelper_int_cudaDeviceSynchronize_internal(MRCUDAGPU_t *mrcudaGPU);
51 | 
52 | cudaError_t mhelper_int_cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device);
53 | cudaError_t mhelper_int_cudaGetDeviceProperties_internal(MRCUDAGPU_t *mrcudaGPU, struct cudaDeviceProp *prop, int device);
54 | 
55 | cudaError_t mhelper_int_cudaMalloc(void **devPtr, size_t size);
56 | cudaError_t mhelper_int_cudaMalloc_internal(MRCUDAGPU_t *mrcudaGPU, void **devPtr, size_t size);
57 | 
58 | cudaError_t mhelper_int_cudaFreeHost(void *ptr);
59 | 
60 | cudaError_t mhelper_int_cudaFree(void *devPtr);
61 | cudaError_t mhelper_int_cudaFree_internal(MRCUDAGPU_t *mrcudaGPU, void *devPtr);
62 | 
63 | cudaError_t mhelper_int_cudaMemcpyToSymbolAsync(const void *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
64 | cudaError_t mhelper_int_cudaMemcpyToSymbolAsync_internal(MRCUDAGPU_t *mrcudaGPU, const void *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
65 | 
66 | cudaError_t mhelper_int_cudaMemcpyFromSymbolAsync(void *dst, const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
67 | cudaError_t mhelper_int_cudaMemcpyFromSymbolAsync_internal(MRCUDAGPU_t *mrcudaGPU, void *dst, const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
68 | 
69 | cudaError_t mhelper_int_cudaSetupArgument(const void *arg, size_t size, size_t offset);
70 | cudaError_t mhelper_int_cudaSetupArgument_internal(MRCUDAGPU_t *mrcudaGPU, const void *arg, size_t size, size_t offset);
71 | 
72 | cudaError_t mhelper_int_cudaStreamSynchronize(cudaStream_t stream);
73 | cudaError_t mhelper_int_cudaStreamSynchronize_internal(MRCUDAGPU_t *mrcudaGPU, cudaStream_t stream);
74 | 
75 | cudaError_t mhelper_int_cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream);
76 | cudaError_t mhelper_int_cudaConfigureCall_internal(MRCUDAGPU_t *mrcudaGPU, dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream);
77 | 
78 | cudaError_t mhelper_int_cudaGetLastError(void);
79 | cudaError_t mhelper_int_cudaGetLastError_internal(MRCUDAGPU_t *mrcudaGPU);
80 | 
81 | cudaError_t mhelper_int_cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
82 | cudaError_t mhelper_int_cudaMemcpy_internal(MRCUDAGPU_t *mrcudaGPU, void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
83 | 
84 | cudaError_t mhelper_int_cudaSetDevice(int device);
85 | cudaError_t mhelper_int_cudaSetDevice_internal(MRCUDAGPU_t *mrcudaGPU, int device);
86 | 
87 | cudaError_t mhelper_int_cudaStreamCreate(cudaStream_t *pStream);
88 | cudaError_t mhelper_int_cudaStreamCreate_internal(MRCUDAGPU_t *mrcudaGPU, cudaStream_t *pStream);
89 | 
90 | #endif /* __MRCUDA_INTERCOMM_INTERFACE__HEADER__ */
91 | 
92 | 


--------------------------------------------------------------------------------
/src/intercomm_mem.c:
--------------------------------------------------------------------------------
  1 | #include <sys/shm.h>
  2 | #include <sys/ipc.h>
  3 | #include <sys/types.h>
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | 
  7 | #include "common.h"
  8 | #include "datatypes.h"
  9 | #include "intercomm_mem.h"
 10 | 
 11 | #define DEV_RANDOM "/dev/urandom"
 12 | 
 13 | static int initRand = 0;
 14 | 
 15 | /**
 16 |  * Generate a key to be associated with a shared memory region.
 17 |  * @return a key.
 18 |  */
 19 | static key_t generate_key()
 20 | {
 21 |     FILE *f;
 22 |     unsigned int seed;
 23 | 	size_t remainingSize = sizeof(unsigned int);
 24 | 	size_t readSize = 0;
 25 | 
 26 |     if (!initRand) {
 27 |         f = fopen(DEV_RANDOM, "r");
 28 | 		while (remainingSize > 0) {
 29 | 			if ((readSize = fread(&seed, remainingSize, 1, f)) == 0)
 30 | 				REPORT_ERROR_AND_EXIT("Cannot read from " DEV_RANDOM ".\n");
 31 | 			remainingSize -= readSize;
 32 | 		}
 33 |         fclose(f);
 34 |         srand(seed);
 35 |         initRand = !initRand;
 36 |     }
 37 |     return (key_t)rand();
 38 | }
 39 | 
 40 | /**
 41 |  * Malloc memory on shared-memory region.
 42 |  * @param size the size of memory to be allocated.
 43 |  * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise.
 44 |  */
 45 | MRCUDASharedMemLocalInfo_t *mhelper_mem_malloc(size_t size)
 46 | {
 47 |     MRCUDASharedMemLocalInfo_t *sharedMemInfo = calloc(1, sizeof(MRCUDASharedMemLocalInfo_t));
 48 |     if (sharedMemInfo == NULL)
 49 |         goto __mhelper_mem_malloc_err_0;
 50 |     sharedMemInfo->sharedMem.key = generate_key();
 51 |     if ((sharedMemInfo->shmid = shmget(sharedMemInfo->sharedMem.key, size, IPC_CREAT | IPC_EXCL | 0600)) <= 0)
 52 |         goto __mhelper_mem_malloc_err_1;
 53 |     if ((sharedMemInfo->startAddr = shmat(sharedMemInfo->shmid, NULL, 0)) == NULL)
 54 |         goto __mhelper_mem_malloc_err_2;
 55 |     sharedMemInfo->sharedMem.size = size;
 56 |     return sharedMemInfo;
 57 | 
 58 | __mhelper_mem_malloc_err_2:
 59 |     shmctl(sharedMemInfo->shmid, IPC_RMID, NULL);
 60 | __mhelper_mem_malloc_err_1:
 61 |     free(sharedMemInfo);
 62 | __mhelper_mem_malloc_err_0:
 63 |     return NULL;
 64 | }
 65 | 
 66 | /**
 67 |  * Detach and destroy the shared region specified by the sharedMemInfo.
 68 |  * @param sharedMemInfo the information of the shared region.
 69 |  * @return 0 on success; other number otherwise.
 70 |  */
 71 | int mhelper_mem_free(MRCUDASharedMemLocalInfo_t *sharedMemInfo)
 72 | {
 73 |     int ret = shmctl(sharedMemInfo->shmid, IPC_RMID, NULL);
 74 |     if (ret == 0)
 75 |         free(sharedMemInfo);
 76 |     return ret;
 77 | }
 78 | 
 79 | /**
 80 |  * Get the memory region associated with the specified sharedMem.
 81 |  * @param sharedMem the minimum information of the shared region.
 82 |  * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise.
 83 |  */
 84 | MRCUDASharedMemLocalInfo_t *mhelper_mem_get(MRCUDASharedMem_t sharedMem)
 85 | {
 86 |     MRCUDASharedMemLocalInfo_t *sharedMemInfo = calloc(1, sizeof(MRCUDASharedMemLocalInfo_t));
 87 |     if (sharedMemInfo == NULL)
 88 |         goto __mhelper_mem_get_err_0;
 89 |     if ((sharedMemInfo->shmid = shmget(sharedMem.key, sharedMem.size, 0666)) <= 0)
 90 |         goto __mhelper_mem_get_err_1;
 91 |     if ((sharedMemInfo->startAddr = shmat(sharedMemInfo->shmid, NULL, 0)) == NULL)
 92 |         goto __mhelper_mem_get_err_1;
 93 |     sharedMemInfo->sharedMem = sharedMem;
 94 |     return sharedMemInfo;
 95 | 
 96 | __mhelper_mem_get_err_1:
 97 |     free(sharedMemInfo);
 98 | __mhelper_mem_get_err_0:
 99 |     return NULL;
100 | }
101 | 
102 | /**
103 |  * Detach the shared region specified by the sharedMemInfo.
104 |  * @param sharedMemInfo the information of the shared region.
105 |  * @return 0 on success; another number otherwise.
106 |  */
107 | int mhelper_mem_detach(MRCUDASharedMemLocalInfo_t *sharedMemInfo)
108 | {
109 |     return shmdt(sharedMemInfo->startAddr);
110 | }
111 | 
112 | 


--------------------------------------------------------------------------------
/src/intercomm_mem.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRCUDA_INTERCOMM_MEM__HEADER__
 2 | #define __MRCUDA_INTERCOMM_MEM__HEADER__
 3 | 
 4 | #include <sys/shm.h>
 5 | #include <sys/ipc.h>
 6 | #include <sys/types.h>
 7 | 
 8 | #include "datatypes.h"
 9 | 
10 | /**
11 |  * Malloc memory on shared-memory region.
12 |  * @param size the size of memory to be allocated.
13 |  * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise.
14 |  */
15 | MRCUDASharedMemLocalInfo_t *mhelper_mem_malloc(size_t size);
16 | 
17 | /**
18 |  * Detach and destroy the shared region specified by the sharedMemInfo.
19 |  * @param sharedMemInfo the information of the shared region.
20 |  * @return 0 on success; other number otherwise.
21 |  */
22 | int mhelper_mem_free(MRCUDASharedMemLocalInfo_t *sharedMemInfo);
23 | 
24 | /**
25 |  * Get the memory region associated with the specified sharedMem.
26 |  * @param sharedMem the minimum information of the shared region.
27 |  * @return a ptr to a MRCUDASharedMemLocalInfo_t on success. NULL otherwise.
28 |  */
29 | MRCUDASharedMemLocalInfo_t *mhelper_mem_get(MRCUDASharedMem_t sharedMem);
30 | 
31 | /**
32 |  * Detach the shared region specified by the sharedMemInfo.
33 |  * @param sharedMemInfo the information of the shared region.
34 |  * @return 0 on success; another number otherwise.
35 |  */
36 | int mhelper_mem_detach(MRCUDASharedMemLocalInfo_t *sharedMemInfo);
37 | 
38 | #endif /* __MRCUDA_INTERCOMM_MEM__HEADER__ */
39 | 
40 | 


--------------------------------------------------------------------------------
/src/mrcuda.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MRCUDA__HEADER__
 2 | #define __MRCUDA__HEADER__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | #include <glib.h>
 6 | #include "common.h"
 7 | #include "datatypes.h"
 8 | 
 9 | extern MRCUDASym_t *mrcudaSymNvidia;
10 | extern MRCUDASym_t *mrcudaSymRCUDA;
11 | 
12 | extern int mrcudaNumGPUs;
13 | extern MRCUDAGPU_t *mrcudaGPUList;
14 | 
15 | extern GHashTable *mrcudaGPUThreadMap;
16 | 
17 | extern MRCUDAState_e mrcudaState;
18 | 
19 | /**
20 |  * Get the GPU assigned to the calling thread.
21 |  * @return a pointer to the assigned GPU.
22 |  */
23 | MRCUDAGPU_t *mrcuda_get_current_gpu();
24 | 
25 | /**
26 |  * Set the GPU assigned to the calling thread.
27 |  * @param device virtual device ID.
28 |  */
29 | void mrcuda_set_current_gpu(int device);
30 | 
31 | 
32 | /**
33 |  * Initialize mrCUDA.
34 |  * Print error and terminate the program if an error occurs.
35 |  */
36 | void mrcuda_init();
37 | 
38 | /**
39 |  * Finalize mrCUDA.
40 |  */
41 | int mrcuda_fini();
42 | 
43 | /**
44 |  * Switch the specified mrcudaGPU from rCUDA to native.
45 |  * @param mrcudaGPU a ptr to the mrcudaGPU to be switched.
46 |  * @param toGPUNumber the native GPU number to be moved to.
47 |  */
48 | void mrcuda_switch(MRCUDAGPU_t *mrcudaGPU, int toGPUNumber);
49 | 
50 | /**
51 |  * Create a barrier such that subsequent calls are blocked until the barrier is released.
52 |  * @param mrcudaGPU a ptr to the GPU a barrier will be created on.
53 |  */
54 | void mrcuda_function_call_lock(MRCUDAGPU_t *mrcudaGPU);
55 | 
56 | /**
57 |  * Release the barrier; thus, allow subsequent calls to be processed normally.
58 |  * @param mrcudaGPU a ptr to the GPU the barrier will be released.
59 |  */
60 | void mrcuda_function_call_release(MRCUDAGPU_t *mrcudaGPU);
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/src/record.h:
--------------------------------------------------------------------------------
  1 | #ifndef __MRCUDA_RECORD__HEADER__
  2 | #define __MRCUDA_RECORD__HEADER__
  3 | 
  4 | #include <cuda_runtime.h>
  5 | #include <glib.h>
  6 | 
  7 | #include "common.h"
  8 | #include "datatypes.h"
  9 | 
 10 | extern double recordAccTime;
 11 | extern double memsyncAccTime;
 12 | extern double memsyncrCUDAAccTime;
 13 | extern double memsyncNvidiaAccTime;
 14 | extern int memsyncNumCalls;
 15 | extern double memsyncSize;
 16 | 
 17 | extern MRecordGPU_t *mrecordGPUList;
 18 | 
 19 | /**
 20 |  * Initialize the record/replay module.
 21 |  * Exit and report error if found.
 22 |  */
 23 | void mrcuda_record_init();
 24 | 
 25 | /**
 26 |  * Finalize the record/replay module.
 27 |  */
 28 | void mrcuda_record_fini();
 29 | 
 30 | /**
 31 |  * Record a cudaRegisterFatBinary call.
 32 |  */
 33 | void mrcuda_record_cudaRegisterFatBinary(MRCUDAGPU_t *mrcudaGPU, void* fatCubin, void **fatCubinHandle);
 34 | 
 35 | /**
 36 |  * Record a cudaRegisterFunction call.
 37 |  */
 38 | void mrcuda_record_cudaRegisterFunction(
 39 |     MRCUDAGPU_t *mrcudaGPU,
 40 |     void **fatCubinHandle, 
 41 |     const char *hostFun, 
 42 |     char *deviceFun, 
 43 |     const char *deviceName, 
 44 |     int thread_limit, 
 45 |     uint3 *tid, 
 46 |     uint3 *bid, 
 47 |     dim3 *bDim, 
 48 |     dim3 *gDim, 
 49 |     int *wSize
 50 | );
 51 | 
 52 | /**
 53 |  * Record a cudaRegisterVar call.
 54 |  */
 55 | void mrcuda_record_cudaRegisterVar(
 56 |     MRCUDAGPU_t *mrcudaGPU, 
 57 |     void **fatCubinHandle, 
 58 |     char *hostVar, 
 59 |     char *deviceAddress,
 60 |     const char *deviceName,
 61 |     int ext,
 62 |     int size,
 63 |     int constant,
 64 |     int global
 65 | );
 66 | 
 67 | /**
 68 |  * Record a cudaRegisterTexture call.
 69 |  */
 70 | void mrcuda_record_cudaRegisterTexture(
 71 |     MRCUDAGPU_t *mrcudaGPU,
 72 |     void **fatCubinHandle,
 73 |     const struct textureReference *hostVar,
 74 |     const void **deviceAddress,
 75 |     const char *deviceName,
 76 |     int dim,
 77 |     int norm,
 78 |     int ext
 79 | );
 80 | 
 81 | /**
 82 |  * Record a cudaUnregisterFatBinary call.
 83 |  */
 84 | void mrcuda_record_cudaUnregisterFatBinary(MRCUDAGPU_t *mrcudaGPU, void **fatCubinHandle);
 85 | 
 86 | /**
 87 |  * Record a cudaMalloc call.
 88 |  */
 89 | void mrcuda_record_cudaMalloc(MRCUDAGPU_t *mrcudaGPU, void **devPtr, size_t size);
 90 | 
 91 | /**
 92 |  * Record a cudaFree call.
 93 |  */
 94 | void mrcuda_record_cudaFree(MRCUDAGPU_t *mrcudaGPU, void *devPtr);
 95 | 
 96 | /**
 97 |  * Record a cudaBindTexture call.
 98 |  */
 99 | void mrcuda_record_cudaBindTexture(
100 |     MRCUDAGPU_t *mrcudaGPU, 
101 |     size_t *offset, 
102 |     const struct textureReference *texref, 
103 |     const void *devPtr, 
104 |     const struct cudaChannelFormatDesc *desc, 
105 |     size_t size
106 | );
107 | 
108 | /**
109 |  * Record a cudaStreamCreate call.
110 |  */
111 | void mrcuda_record_cudaStreamCreate(MRCUDAGPU_t *mrcudaGPU, cudaStream_t *pStream);
112 | 
113 | /**
114 |  * Record a cudaHostAlloc call.
115 |  * The dual function of this call is mrcuda_replay_cudaFreeHost.
116 |  */
117 | void mrcuda_record_cudaHostAlloc(MRCUDAGPU_t *mrcudaGPU, void **pHost, size_t size, unsigned int flags);
118 | 
119 | /**
120 |  * Record a cudaSetDeviceFlags call.
121 |  */
122 | void mrcuda_record_cudaSetDeviceFlags(MRCUDAGPU_t *mrcudaGPU, unsigned int flags);
123 | 
124 | 
125 | /**
126 |  * Replay a cudaRegisterFatBinary call.
127 |  */
128 | void mrcuda_replay_cudaRegisterFatBinary(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
129 | 
130 | /**
131 |  * Replay a cudaRegisterFunction call.
132 |  */
133 | void mrcuda_replay_cudaRegisterFunction(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
134 | 
135 | /**
136 |  * Replay a cudaRegisterVar call.
137 |  */
138 | void mrcuda_replay_cudaRegisterVar(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
139 | 
140 | /**
141 |  * Replay a cudaRegisterTexture call.
142 |  */
143 | void mrcuda_replay_cudaRegisterTexture(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
144 | 
145 | /**
146 |  * Replay a cudaUnregisterFatBinary call.
147 |  */
148 | void mrcuda_replay_cudaUnregisterFatBinary(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
149 | 
150 | /**
151 |  * Replay a cudaMalloc call.
152 |  */
153 | void mrcuda_replay_cudaMalloc(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
154 | 
155 | /**
156 |  * Replay a cudaFree call.
157 |  */
158 | void mrcuda_replay_cudaFree(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
159 | 
160 | /**
161 |  * Replay a cudaBindTexture call.
162 |  */
163 | void mrcuda_replay_cudaBindTexture(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
164 | 
165 | /**
166 |  * Replay a cudaStreamCreate call.
167 |  */
168 | void mrcuda_replay_cudaStreamCreate(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
169 | 
170 | /**
171 |  * Replay a cudaFreeHost call.
172 |  * This function looks for the library used for allocating the ptr.
173 |  * The dual function of this call is mrcuda_record_cudaHostAlloc.
174 |  */
175 | MRCUDASym_t *mrcuda_replay_cudaFreeHost(MRCUDAGPU_t *mrcudaGPU, void *ptr);
176 | 
177 | /**
178 |  * Replay a cudaSetDeviceFlags call.
179 |  */
180 | void mrcuda_replay_cudaSetDeviceFlags(MRCUDAGPU_t *mrcudaGPU, MRecord_t *record);
181 | 
182 | /**
183 |  * Download the content of active memory regions to the native device.
184 |  * Exit and report error if an error is found.
185 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t that the sync mem will be performed on.
186 |  */
187 | void mrcuda_sync_mem(MRCUDAGPU_t *mrcudaGPU);
188 | 
189 | /**
190 |  * Simulate cuda streams on the native CUDA so that the number of streams are equaled to that of rCUDA.
191 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t that the simulate stream will be performed on.
192 |  */
193 | void mrcuda_simulate_stream(MRCUDAGPU_t *mrcudaGPU);
194 | 
195 | /**
196 |  * Simulate cuCtxCreate on the specified gpuID.
197 |  * If mrcudaGPU->status == MRCUDA_GPU_STATUS_HELPER, ask the helper to handle the command.
198 |  * @param mrcudaGPU a ptr to a MRCUDAGPU_t.
199 |  * @param gpuID the ID of the GPU a context will be created on.
200 |  * @return 0 on success; -1 otherwise.
201 |  */
202 | int mrcuda_simulate_cuCtxCreate(MRCUDAGPU_t *mrcudaGPU, int gpuID);
203 | 
204 | #endif
205 | 


--------------------------------------------------------------------------------
/tests/Makefile.am:
--------------------------------------------------------------------------------
1 | TESTS = check_comm
2 | check_PROGRAMS = check_comm
3 | check_comm_SOURCES = check_comm.c $(top_builddir)/src/comm.h
4 | check_comm_CFLAGS = @CHECK_CFLAGS@ -pthread 
5 | check_comm_LDADD = $(top_builddir)/src/libcomm.a @CHECK_LIBS@
6 | check_comm_LDFLAGS = -pthread -lpthread
7 | 
8 | 


--------------------------------------------------------------------------------
/tests/check_comm.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <sys/types.h>
 5 | #include <sys/stat.h>
 6 | #include <fcntl.h>
 7 | 
 8 | #include <check.h>
 9 | 
10 | #include "../src/comm.h"
11 | 
12 | static int __getSignalFlag = 0;
13 | 
14 | void process_signal(void)
15 | {
16 |     __getSignalFlag = 1;
17 | }
18 | 
19 | START_TEST(test_mrcuda_comm_listen_for_signal)
20 | {
21 |     char *path = "/tmp/mrcuda.pipe";
22 | 	int fd;
23 |     int ret;
24 | 
25 | 	unlink(path);
26 |     ret = mrcuda_comm_listen_for_signal(path, &process_signal);
27 |     ck_assert(ret == 0);
28 | 
29 |     fd = open(path, O_WRONLY);
30 |     write(fd, "1", sizeof("1"));
31 |     close(fd);
32 | 
33 |     while(!__getSignalFlag)
34 |         sleep(1);
35 | }
36 | END_TEST
37 | 
38 | Suite *comm_suit(void)
39 | {
40 | 	Suite *s;
41 | 	TCase *tc_core;
42 | 
43 | 	s = suite_create("Comm");
44 | 
45 | 	tc_core = tcase_create("Core");
46 | 
47 | 	tcase_add_test(tc_core, test_mrcuda_comm_listen_for_signal);
48 | 	suite_add_tcase(s, tc_core);
49 | 
50 | 	return s;
51 | }
52 | 
53 | int main(void)
54 | {
55 | 	int number_failed;
56 | 	Suite *s;
57 | 	SRunner *sr;
58 | 
59 | 	s = comm_suit();
60 | 	sr = srunner_create(s);
61 | 
62 | 	srunner_run_all(sr, CK_NORMAL);
63 | 	number_failed = srunner_ntests_failed(sr);
64 | 	srunner_free(sr);
65 | 
66 | 	return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
67 | }
68 | 


--------------------------------------------------------------------------------
/tests/check_record.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include "../src/record.h"
 4 | 
 5 | START_TEST(test_mrcuda_record_cudaRegisterFatBinary)
 6 | {
 7 |     void *fatCubin = malloc(sizeof(void *) * 10);
 8 |     mrcuda_record_cudaRegisterFatBinary(fatCubin);
 9 |     ck_assert(mrcudaRecordHeadPtr != NULL);
10 |     ck_assert(mrcudaRecordTailPtr != NULL);
11 |     ck_assert(strcmp(mrcudaRecordTailPtr->functionName, "cudaRegisterFatBinary") == 0);
12 |     ck_assert(mrcudaRecordTailPtr->replayFunction == &mrcuda_replay_cudaRegisterFatBinary);
13 |     ck_assert(mrcudaRecordTailPtr->data.cudaRegisterFatBinary.fatCubin == fatCubin);
14 |     free(fatCubin);
15 | }
16 | END_TEST
17 | 
18 | Suite *comm_suit(void)
19 | {
20 | 	Suite *s;
21 | 	TCase *tc_core;
22 | 
23 | 	s = suite_create("Record");
24 | 
25 | 	tc_core = tcase_create("Core");
26 | 
27 | 	tcase_add_test(tc_core, test_mrcuda_record_cudaRegisterFatBinary);
28 | 	suite_add_tcase(s, tc_core);
29 | 
30 | 	return s;
31 | }
32 | 
33 | int main(void)
34 | {
35 | 	int number_failed;
36 | 	Suite *s;
37 | 	SRunner *sr;
38 | 
39 | 	s = comm_suit();
40 | 	sr = srunner_create(s);
41 | 
42 | 	srunner_run_all(sr, CK_NORMAL);
43 | 	number_failed = srunner_ntests_failed(sr);
44 | 	srunner_free(sr);
45 | 
46 | 	return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/progs/benchmark.memcpybw.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #k=0
 4 | #while [ $k -lt 15 ]
 5 | #do
 6 | #    num=`echo "2^$k" | bc`
 7 | #    j=0
 8 | #    while [ $j -lt 10 ]
 9 | #    do
10 | #        memsize=`echo "2^(20+$j)" | bc`
11 | #        i=0
12 | #        while [ $i -lt  10 ]
13 | #        do
14 | #            echo "mrcuda $memsize $num"
15 | #            taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -s rc015 --switch-threshold=1 -- ./memcpybw $memsize $num
16 | #            i=`expr $i + 1`
17 | #            sleep 1
18 | #        done
19 | #        j=`expr $j + 1`
20 | #    done
21 | #    k=`expr $k + 1`
22 | #done
23 | 
24 | j=0
25 | while [ $j -lt 20 ]
26 | do
27 |     memsize=`echo "2^($j)" | bc`
28 |     i=0
29 |     while [ $i -lt  10 ]
30 |     do
31 |         echo "mrcuda $memsize 1"
32 |         taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -s rc015 --switch-threshold=1 -- ./memcpybw $memsize 1
33 |         i=`expr $i + 1`
34 |         sleep 1
35 |     done
36 |     j=`expr $j + 1`
37 | done
38 | 


--------------------------------------------------------------------------------
/tests/progs/benchmark.nullker.cudamemcpy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #i=0
 4 | #while [ $i -lt  10 ]
 5 | #do
 6 | #    echo "nullker mrcuda $i"
 7 | #    taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -f ~/src/mrCUDA/scripts/sf.in -n 2 -- ./nullker
 8 | #    i=`expr $i + 1`
 9 | #done
10 | #
11 | #sleep 1
12 | #
13 | #i=0
14 | #while [ $i -lt  10 ]
15 | #do
16 | #    echo "nullker native $i"
17 | #    taskset 1 ./nullker
18 | #    i=`expr $i + 1`
19 | #done
20 | #
21 | #sleep 1
22 | 
23 | i=8
24 | while [ $i -lt  10 ]
25 | do
26 |     echo "cudamemcpy mrcuda $i"
27 |     taskset 1 ~/src/mrCUDA/scripts/mrCUDAExec -t IB -f ~/src/mrCUDA/scripts/sf.in -n 2 -- ./cudamemcpy
28 |     i=`expr $i + 1`
29 | done
30 | 
31 | sleep 1
32 | 
33 | i=0
34 | while [ $i -lt  10 ]
35 | do
36 |     echo "cudamemcpy native $i"
37 |     taskset 1 ./cudamemcpy
38 |     i=`expr $i + 1`
39 | done
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/progs/cudamemcpy.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <sys/time.h>
 3 | #include <cuda_runtime.h>
 4 | 
 5 | #define MEMSIZE (1 << 30)
 6 | 
 7 | #define CUDA_SAFE_CALL(func) \
 8 |     { \
 9 |         if ((func) != cudaSuccess ) { \
10 |             fprintf(stderr, "ERROR\n"); \
11 |             exit(EXIT_FAILURE); \
12 |         } \
13 |     }
14 | 
15 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end)
16 | {
17 |     return (end->tv_sec - begin->tv_sec) * 1000
18 |             + (end->tv_usec - begin->tv_usec) / 1000.0;
19 | }
20 |  
21 | __global__ 
22 | void null() 
23 | {
24 | }
25 | 
26 | int main()
27 | {
28 |     int i = 0;
29 |     struct timeval t1, t2;
30 | 	dim3 dimBlock( 1, 1 );
31 | 	dim3 dimGrid( 1, 1 );
32 |     char *pDev0, *pDev1, *pHost;
33 | 
34 |     /* Initialize phase to force migration */
35 |     if ((pHost = (char *)malloc(sizeof(char) * MEMSIZE)) == NULL) {
36 |         perror("MALLOC ERROR:");
37 |         exit(EXIT_FAILURE);
38 |     }
39 | 
40 |     CUDA_SAFE_CALL(cudaSetDevice(0));
41 |     CUDA_SAFE_CALL(cudaMalloc(&pDev0, sizeof(char) * MEMSIZE));
42 |     CUDA_SAFE_CALL(cudaMemcpy(pDev0, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice));
43 |     while (i < 2000) {
44 |         null<<<dimBlock, dimBlock>>>();
45 |         i++;
46 |     }
47 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
48 |     CUDA_SAFE_CALL(cudaMemcpy(pDev0, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice));
49 | 
50 |     CUDA_SAFE_CALL(cudaSetDevice(1));
51 |     CUDA_SAFE_CALL(cudaMalloc(&pDev1, sizeof(char) * MEMSIZE));
52 |     CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice));
53 |     i = 0;
54 |     while (i < 2000) {
55 |         null<<<dimBlock, dimBlock>>>();
56 |         i++;
57 |     }
58 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
59 |     CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, sizeof(char) * MEMSIZE, cudaMemcpyHostToDevice));
60 | 
61 |     //CUDA_SAFE_CALL(cudaSetDevice(0));
62 |     /* mhelper benchmark phase */
63 |     for (int iter = 0; iter < 20; iter++) {
64 |         int size = sizeof(char) * (1 << (10 + iter));
65 |         gettimeofday(&t1, NULL);
66 |         for (int j = 0; j < 1000; j++)
67 |             CUDA_SAFE_CALL(cudaMemcpy(pDev1, pHost, size, cudaMemcpyHostToDevice));
68 |         gettimeofday(&t2, NULL);
69 |         printf("%d %f\n", size, get_elapsed_time(&t1, &t2));
70 |     }
71 | 
72 | 	return EXIT_SUCCESS;
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/tests/progs/gpuaddr.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <cuda_runtime.h>
 3 | 
 4 | #define CUDA_SAFE_CALL(x) \
 5 | { \
 6 |     if ((x) != cudaSuccess) { \
 7 |         fprintf(stderr, "Error!");   \
 8 |         exit(EXIT_FAILURE); \
 9 |     } \
10 | }
11 | 
12 | int main()
13 | {
14 |     float *a;
15 |     CUDA_SAFE_CALL(cudaMalloc(&a, sizeof(float)));
16 |     printf("a is %p\n", a);
17 |     getchar();
18 |     return 0;
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/progs/hello.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 |  
 3 | const int N = 16; 
 4 | const int blocksize = 16; 
 5 |  
 6 | __global__ 
 7 | void hello(char *a, int *b) 
 8 | {
 9 | 	a[threadIdx.x] += b[threadIdx.x];
10 | }
11 |  
12 | int main()
13 | {
14 | 	char a[N] = "Hello \0\0\0\0\0\0";
15 | 	int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
16 |  
17 | 	char *ad;
18 | 	int *bd;
19 | 	const int csize = N*sizeof(char);
20 | 	const int isize = N*sizeof(int);
21 |  
22 | 	printf("%s", a);
23 |  
24 | 	cudaMalloc( (void**)&ad, csize ); 
25 | 	cudaMalloc( (void**)&bd, isize ); 
26 | 	cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); 
27 | 	cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice ); 
28 | 	
29 | 	dim3 dimBlock( blocksize, 1 );
30 | 	dim3 dimGrid( 1, 1 );
31 | 	hello<<<dimGrid, dimBlock>>>(ad, bd);
32 | 	cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost ); 
33 | 	cudaFree( ad );
34 | 	cudaFree( bd );
35 | 	
36 | 	printf("%s\n", a);
37 | 	return EXIT_SUCCESS;
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/progs/hellomul.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <cuda_runtime.h>
 3 | 
 4 | #define CUDA_SAFE_CALL(func) \
 5 |     { \
 6 |         if ((func) != cudaSuccess ) { \
 7 |             fprintf(stderr, "ERROR\n"); \
 8 |             exit(EXIT_FAILURE); \
 9 |         } \
10 |     }
11 |  
12 | const int N = 16; 
13 | const int blocksize = 16; 
14 |  
15 | __global__ 
16 | void hello(char *a, int *b) 
17 | {
18 | 	a[threadIdx.x] += b[threadIdx.x];
19 | }
20 | 
21 | __global__ 
22 | void null() 
23 | {
24 | }
25 | 
26 |  
27 | int main()
28 | {
29 |     int i = 0;
30 | 	char a[N] = "Hello \0\0\0\0\0\0";
31 | 	int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
32 |  
33 | 	char *ad;
34 | 	int *bd;
35 | 	const int csize = N*sizeof(char);
36 | 	const int isize = N*sizeof(int);
37 | 	dim3 dimBlock( blocksize, 1 );
38 | 	dim3 dimGrid( 1, 1 );
39 |  
40 | 	printf("%s", a);
41 |  
42 |     CUDA_SAFE_CALL(cudaSetDevice(1));
43 | 	CUDA_SAFE_CALL(cudaMalloc( (void**)&ad, csize )); 
44 | 	CUDA_SAFE_CALL(cudaMalloc( (void**)&bd, isize )); 
45 | 	CUDA_SAFE_CALL(cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice )); 
46 | 	CUDA_SAFE_CALL(cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice )); 
47 | 
48 |     CUDA_SAFE_CALL(cudaSetDevice(0));
49 |     while (i < 20) {
50 |         null<<<dimBlock, dimBlock>>>();
51 |         i++;
52 |     }
53 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
54 | 
55 |     CUDA_SAFE_CALL(cudaSetDevice(1));
56 |     i = 0;
57 |     while (i < 20) {
58 |         null<<<dimBlock, dimBlock>>>();
59 |         i++;
60 |     }
61 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
62 | 	
63 | 	hello<<<dimGrid, dimBlock>>>(ad, bd);
64 | 	CUDA_SAFE_CALL(cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost )); 
65 | 	
66 | 	printf("%s\n", a);
67 | 	return EXIT_SUCCESS;
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/tests/progs/matmul_mul.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | #include <sys/time.h>
  6 | 
  7 | #define BS (16)
  8 | #define L (16)
  9 | #define M (16)
 10 | #define N (16)
 11 | 
 12 | __global__ void matmul(float *A, float *B, float *C,
 13 |                        int l, int m, int n)
 14 | {
 15 |     int i, j, k;
 16 |     float sum;
 17 | 
 18 |     i = blockIdx.y * blockDim.y + threadIdx.y;
 19 |     j = blockIdx.x * blockDim.x + threadIdx.x;
 20 | 
 21 |     sum = 0.0;
 22 |     for (k = 0; k < m; k++) {
 23 |         sum += A[i * m + k] * B[k * n + j];
 24 |     }
 25 |     C[i*n+j] = sum;
 26 | }
 27 | 
 28 | __global__ void thread_matrix(float *A,
 29 |                        int l, int n)
 30 | {
 31 |     int i, j;
 32 | 
 33 |     i = blockIdx.y * blockDim.y + threadIdx.y;
 34 |     j = blockIdx.x * blockDim.x + threadIdx.x;
 35 | 
 36 |     A[i * n + j] = i * n + j;
 37 | }
 38 | 
 39 | void matmul_cpu(float *A, float *B, float *C,
 40 |                        int l, int m, int n)
 41 | {
 42 |     int i, j, k;
 43 |     for (i = 0; i < l; i++) {
 44 |         for (j = 0; j < n; j++) {
 45 |             float sum = 0.0;
 46 |             for (k = 0; k < m; k++) {
 47 |                 sum += A[i * m + k] * B[k * n + j];
 48 |             }
 49 |             C[i*n+j] = sum;
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | void print_matrix(float *A, int l, int n)
 55 | {
 56 |     int i, j;
 57 |     for (i = 0; i < l; i++) {
 58 |         for (j = 0; j < n; j++) {
 59 |             printf("%f ", A[i * n + j]);
 60 |         }
 61 |         printf("\n");
 62 |     }
 63 | }
 64 | 
 65 | int compare_matrix(float *A, float *B, int l, int n)
 66 | {
 67 |     int i, j;
 68 |     int ret = 0;
 69 |     for (i = 0; i < l; i++) {
 70 |         for (j = 0; j < n; j++) {
 71 |             if(A[i * n + j] != B[i * n + j])
 72 |                 ret = -1;
 73 |         }
 74 |     }
 75 |     return ret;
 76 | }
 77 | 
 78 | void alloc_matrix(float **m_h, float **m_d, int h, int w)
 79 | {
 80 |     *m_h = (float *)malloc(sizeof(float) * h * w);
 81 |     cudaMalloc((void **)m_d, sizeof(float) * h * w);
 82 | }
 83 | 
 84 | void init_matrix(float *m, int h, int w)
 85 | {
 86 |     int i, j;
 87 |     for (i = 0; i < h; i++)
 88 |         for (j = 0; j < w; j++)
 89 |             m[i * w + j] = (float)(random() % 100);
 90 | }
 91 | 
 92 | int check_error(const char *err_msg)
 93 | {
 94 |     cudaError_t err = cudaGetLastError();
 95 |     if (err != cudaSuccess) {
 96 |         fprintf(stderr, "CUDA error: %s: %s.\n",
 97 |                 err_msg, cudaGetErrorString(err));
 98 |         return 1;
 99 |     }
100 |     return 0;
101 | }
102 | 
103 | double get_elapsed_time(struct timeval *begin, struct timeval *end)
104 | {
105 |     return (end->tv_sec - begin->tv_sec) * 1000
106 |             + (end->tv_usec - begin->tv_usec) / 1000.0;
107 | }
108 | 
109 | int main(int argc, char *argv[])
110 | {
111 |     float *Ad1, *Bd1, *Cd1;
112 |     float *Ah1, *Bh1, *Ch1;
113 |     float *Ad2, *Bd2, *Cd2;
114 |     float *Ah2, *Bh2, *Ch2;
115 |     struct timeval t1, t2;
116 |     float *C_cpu;
117 | 
118 |     int num_device = 0;
119 | 
120 |     if (cudaGetDeviceCount(&num_device) != cudaSuccess || num_device < 2) {
121 |         fprintf(stderr, "This program needs at least 2 devices.\n");
122 |         exit(EXIT_FAILURE);
123 |     }
124 | 
125 |     cudaSetDevice(0);
126 | 
127 |     // prepare matrix A
128 |     alloc_matrix(&Ah1, &Ad1, L, M);
129 |     init_matrix(Ah1, L, M);
130 |     cudaMemcpy(Ad1, Ah1, sizeof(float) * L * M,
131 |                cudaMemcpyHostToDevice);
132 |     // do it again for matrix B
133 |     alloc_matrix(&Bh1, &Bd1, M, N);
134 |     init_matrix(Bh1, M, N);
135 |     cudaMemcpy(Bd1, Bh1, sizeof(float) * M * N,
136 |                cudaMemcpyHostToDevice);
137 |     // allocate spaces for matrix C
138 |     alloc_matrix(&Ch1, &Cd1, L, N);
139 | 
140 |     cudaDeviceSynchronize();
141 |     gettimeofday(&t1, NULL);
142 | 
143 |     // launch matmul kernel
144 |     matmul<<<dim3(N / BS, L / BS),
145 |             dim3(BS, BS)>>>(Ad1, Bd1, Cd1, L, M, N);
146 | 
147 |     if (check_error("matmul")) {
148 |         exit(EXIT_FAILURE);
149 |     }
150 | 
151 |     cudaDeviceSynchronize();
152 |     gettimeofday(&t2, NULL);
153 |     printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2));
154 |     
155 |     // obtain the result
156 |     cudaMemcpy(Ch1, Cd1, sizeof(float) * L * N, cudaMemcpyDeviceToHost);
157 |     C_cpu = (float *)malloc(sizeof(float) * L * N);
158 |     matmul_cpu(Ah1, Bh1, C_cpu, L, M, N);
159 |     print_matrix(Ch1, L, N);
160 |     printf("\n");
161 |     print_matrix(C_cpu, L, N);
162 |     printf("\n");
163 | 
164 |     if(compare_matrix(Ch1, C_cpu, L, N) >= 0)
165 |         printf("OK\n");
166 |     else
167 |         printf("ERRRRR\n");
168 | 
169 |     free(C_cpu);
170 | 
171 |     cudaSetDevice(1);
172 | 
173 |     // prepare matrix A
174 |     alloc_matrix(&Ah2, &Ad2, L, M);
175 |     init_matrix(Ah2, L, M);
176 |     cudaMemcpy(Ad2, Ah2, sizeof(float) * L * M,
177 |                cudaMemcpyHostToDevice);
178 |     // do it again for matrix B
179 |     alloc_matrix(&Bh2, &Bd2, M, N);
180 |     init_matrix(Bh2, M, N);
181 |     cudaMemcpy(Bd2, Bh2, sizeof(float) * M * N,
182 |                cudaMemcpyHostToDevice);
183 |     // allocate spaces for matrix C
184 |     alloc_matrix(&Ch2, &Cd2, L, N);
185 | 
186 |     cudaDeviceSynchronize();
187 |     gettimeofday(&t1, NULL);
188 | 
189 |     // launch matmul kernel
190 |     matmul<<<dim3(N / BS, L / BS),
191 |             dim3(BS, BS)>>>(Ad2, Bd2, Cd2, L, M, N);
192 | 
193 |     if (check_error("matmul")) {
194 |         exit(EXIT_FAILURE);
195 |     }
196 | 
197 |     cudaDeviceSynchronize();
198 |     gettimeofday(&t2, NULL);
199 |     printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2));
200 |     
201 |     // obtain the result
202 |     cudaMemcpy(Ch2, Cd2, sizeof(float) * L * N, cudaMemcpyDeviceToHost);
203 |     C_cpu = (float *)malloc(sizeof(float) * L * N);
204 |     matmul_cpu(Ah2, Bh2, C_cpu, L, M, N);
205 |     print_matrix(Ch2, L, N);
206 |     printf("\n");
207 |     print_matrix(C_cpu, L, N);
208 |     printf("\n");
209 | 
210 |     if(compare_matrix(Ch2, C_cpu, L, N) >= 0)
211 |         printf("OK\n");
212 |     else
213 |         printf("ERRRRR\n");
214 | 
215 |     free(C_cpu);
216 | 
217 |     cudaFree(Ad1);
218 |     cudaFree(Bd1);
219 |     cudaFree(Cd1);
220 | 
221 |     cudaFree(Ad2);
222 |     cudaFree(Bd2);
223 |     cudaFree(Cd2);
224 | 
225 |     return 0;
226 | }
227 | 
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/tests/progs/matmul_par.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | #include <sys/time.h>
  6 | 
  7 | #define BS (16)
  8 | #define L (16)
  9 | #define M (16)
 10 | #define N (16)
 11 | 
 12 | __global__ void matmul(float *A, float *B, float *C,
 13 |                        int l, int m, int n)
 14 | {
 15 |     int i, j, k;
 16 |     float sum;
 17 | 
 18 |     i = blockIdx.y * blockDim.y + threadIdx.y;
 19 |     j = blockIdx.x * blockDim.x + threadIdx.x;
 20 | 
 21 |     sum = 0.0;
 22 |     for (k = 0; k < m; k++) {
 23 |         sum += A[i * m + k] * B[k * n + j];
 24 |     }
 25 |     C[i*n+j] = sum;
 26 | }
 27 | 
 28 | __global__ void thread_matrix(float *A,
 29 |                        int l, int n)
 30 | {
 31 |     int i, j;
 32 | 
 33 |     i = blockIdx.y * blockDim.y + threadIdx.y;
 34 |     j = blockIdx.x * blockDim.x + threadIdx.x;
 35 | 
 36 |     A[i * n + j] = i * n + j;
 37 | }
 38 | 
 39 | void matmul_cpu(float *A, float *B, float *C,
 40 |                        int l, int m, int n)
 41 | {
 42 |     int i, j, k;
 43 |     for (i = 0; i < l; i++) {
 44 |         for (j = 0; j < n; j++) {
 45 |             float sum = 0.0;
 46 |             for (k = 0; k < m; k++) {
 47 |                 sum += A[i * m + k] * B[k * n + j];
 48 |             }
 49 |             C[i*n+j] = sum;
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | void print_matrix(float *A, int l, int n)
 55 | {
 56 |     int i, j;
 57 |     for (i = 0; i < l; i++) {
 58 |         for (j = 0; j < n; j++) {
 59 |             printf("%f ", A[i * n + j]);
 60 |         }
 61 |         printf("\n");
 62 |     }
 63 | }
 64 | 
 65 | int compare_matrix(float *A, float *B, int l, int n)
 66 | {
 67 |     int i, j;
 68 |     int ret = 0;
 69 |     for (i = 0; i < l; i++) {
 70 |         for (j = 0; j < n; j++) {
 71 |             if(A[i * n + j] != B[i * n + j])
 72 |                 ret = -1;
 73 |         }
 74 |     }
 75 |     return ret;
 76 | }
 77 | 
 78 | void alloc_matrix(float **m_h, float **m_d, int h, int w)
 79 | {
 80 |     *m_h = (float *)malloc(sizeof(float) * h * w);
 81 |     cudaMalloc((void **)m_d, sizeof(float) * h * w);
 82 | }
 83 | 
 84 | void init_matrix(float *m, int h, int w)
 85 | {
 86 |     int i, j;
 87 |     for (i = 0; i < h; i++)
 88 |         for (j = 0; j < w; j++)
 89 |             m[i * w + j] = (float)(random() % 100);
 90 | }
 91 | 
 92 | int check_error(const char *err_msg)
 93 | {
 94 |     cudaError_t err = cudaGetLastError();
 95 |     if (err != cudaSuccess) {
 96 |         fprintf(stderr, "CUDA error: %s: %s.\n",
 97 |                 err_msg, cudaGetErrorString(err));
 98 |         return 1;
 99 |     }
100 |     return 0;
101 | }
102 | 
103 | double get_elapsed_time(struct timeval *begin, struct timeval *end)
104 | {
105 |     return (end->tv_sec - begin->tv_sec) * 1000
106 |             + (end->tv_usec - begin->tv_usec) / 1000.0;
107 | }
108 | 
109 | int main(int argc, char *argv[])
110 | {
111 |     float *Ad, *Bd, *Cd;
112 |     float *Ah, *Bh, *Ch;
113 |     struct timeval t1, t2;
114 | 
115 |     // prepare matrix A
116 |     alloc_matrix(&Ah, &Ad, L, M);
117 |     init_matrix(Ah, L, M);
118 |     cudaMemcpy(Ad, Ah, sizeof(float) * L * M,
119 |                cudaMemcpyHostToDevice);
120 |     // do it again for matrix B
121 |     alloc_matrix(&Bh, &Bd, M, N);
122 |     init_matrix(Bh, M, N);
123 |     cudaMemcpy(Bd, Bh, sizeof(float) * M * N,
124 |                cudaMemcpyHostToDevice);
125 |     // allocate spaces for matrix C
126 |     alloc_matrix(&Ch, &Cd, L, N);
127 | 
128 |     cudaDeviceSynchronize();
129 |     gettimeofday(&t1, NULL);
130 | 
131 |     // launch matmul kernel
132 |     matmul<<<dim3(N / BS, L / BS),
133 |             dim3(BS, BS)>>>(Ad, Bd, Cd, L, M, N);
134 | 
135 |     if (check_error("matmul")) {
136 |         exit(EXIT_FAILURE);
137 |     }
138 | 
139 |     cudaDeviceSynchronize();
140 |     gettimeofday(&t2, NULL);
141 |     printf("Elapsed time: %f msec\n", get_elapsed_time(&t1, &t2));
142 |     
143 |     // obtain the result
144 |     cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost);
145 |     float *C_cpu = (float *)malloc(sizeof(float) * L * N);
146 |     matmul_cpu(Ah, Bh, C_cpu, L, M, N);
147 |     print_matrix(Ch, L, N);
148 |     printf("\n");
149 |     print_matrix(C_cpu, L, N);
150 |     printf("\n");
151 | 
152 |     if(compare_matrix(Ch, C_cpu, L, N) >= 0)
153 |         printf("OK\n");
154 |     else
155 |         printf("ERRRRR\n");
156 | 
157 |     /* Switch to native */
158 |     /*cudaMalloc(NULL, 0);
159 |     printf("Switched to native.....\n");
160 |     printf("Press enter to continue...\n");
161 |     getchar();*/
162 | 
163 |     /*thread_matrix<<<dim3(N / BS, L / BS),
164 |             dim3(BS, BS)>>>(Cd, L, N);
165 |     cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost);
166 |     print_matrix(Ch, L, N);
167 |     printf("\n");*/
168 | 
169 |     int i;
170 |     for(i = 0; i < 10; i++)
171 |     {
172 |         if(i == 3)
173 |         {
174 |             cudaMalloc(NULL, 0);
175 |             printf("Switched to native.....\n");
176 |             printf("Press enter to continue...\n");
177 |             getchar();
178 |         }
179 |         matmul<<<dim3(N / BS, L / BS),
180 |                 dim3(BS, BS)>>>(Ad, Bd, Cd, L, M, N);
181 |         cudaMemcpy(Ch, Cd, sizeof(float) * L * N, cudaMemcpyDeviceToHost);
182 |         print_matrix(Ch, L, N);
183 |         printf("\n");
184 |         print_matrix(C_cpu, L, N);
185 |         printf("\n");
186 |         if(compare_matrix(Ch, C_cpu, L, N) >= 0)
187 |             printf("OK\n");
188 |         else
189 |             printf("ERRRRR\n");
190 |     }
191 | 
192 |     free(C_cpu);
193 | 
194 |     cudaFree(Ad);
195 |     cudaFree(Bd);
196 |     cudaFree(Cd);
197 | 
198 |     return 0;
199 | }
200 | 
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/tests/progs/memcpybw.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <sys/time.h>
 4 | #include <cuda_runtime.h>
 5 | 
 6 | #define CUDA_SAFE_CALL(func) \
 7 |     { \
 8 |         if ((func) != cudaSuccess ) { \
 9 |             fprintf(stderr, "ERROR\n"); \
10 |             exit(EXIT_FAILURE); \
11 |         } \
12 |     }
13 | 
14 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end)
15 | {
16 |     return (end->tv_sec - begin->tv_sec) * 1000
17 |             + (end->tv_usec - begin->tv_usec) / 1000.0;
18 | }
19 | 
20 | __global__ 
21 | void null() 
22 | {
23 | }
24 | 
25 | int main(int argc, char *argv[])
26 | {
27 |     int i = 0;
28 |     struct timeval t1, t2;
29 | 	dim3 dimBlock( 1, 1 );
30 | 	dim3 dimGrid( 1, 1 );
31 |     char *pDev, *pHost;
32 |     char *endpoint;
33 |     size_t memsize;
34 |     int num;
35 |     size_t secSize;
36 | 
37 |     if (argc < 3) {
38 |         fprintf(stderr, "prog memsize num\n");
39 |         exit(EXIT_FAILURE);
40 |     }
41 | 
42 |     memsize = strtol(argv[1], &endpoint, 10);
43 |     if (*endpoint != '\0') {
44 |         fprintf(stderr, "memsize has to be long int.\n");
45 |         exit(EXIT_FAILURE);
46 |     }
47 | 
48 |     num = (int)strtol(argv[2], &endpoint, 10);
49 |     if (*endpoint != '\0') {
50 |         fprintf(stderr, "num has to be int.\n");
51 |         exit(EXIT_FAILURE);
52 |     }
53 | 
54 |     secSize = memsize / num;
55 | 
56 |     /* Initialize phase to force migration */
57 |     if ((pHost = (char *)malloc(sizeof(char) * memsize)) == NULL) {
58 |         perror("MALLOC ERROR:");
59 |         exit(EXIT_FAILURE);
60 |     }
61 | 
62 |     CUDA_SAFE_CALL(cudaMalloc(&pDev, sizeof(char) * secSize));
63 |     CUDA_SAFE_CALL(cudaMemcpy(pDev, pHost, sizeof(char) * secSize, cudaMemcpyHostToDevice));
64 |     CUDA_SAFE_CALL(cudaFree(pDev));
65 |     gettimeofday(&t1, NULL);
66 |     for (i = 0; i < num; i++) {
67 |         CUDA_SAFE_CALL(cudaMalloc(&pDev, sizeof(char) * secSize));
68 |         CUDA_SAFE_CALL(cudaMemcpy(pDev, pHost, sizeof(char) * secSize, cudaMemcpyHostToDevice));
69 |     }
70 |     gettimeofday(&t2, NULL);
71 |     printf("Elapsed Time: %f\n", get_elapsed_time(&t1, &t2));
72 |     while (i < 2000) {
73 |         null<<<dimBlock, dimBlock>>>();
74 |         i++;
75 |     }
76 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
77 | 	return EXIT_SUCCESS;
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/tests/progs/multigpuaddr.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <cuda_runtime.h>
 3 | 
 4 | #define CUDA_SAFE_CALL(x) \
 5 | { \
 6 |     if ((x) != cudaSuccess) { \
 7 |         fprintf(stderr, "Error!");   \
 8 |         exit(EXIT_FAILURE); \
 9 |     } \
10 | }
11 | 
12 | int main()
13 | {
14 |     float *a, *b;
15 |     CUDA_SAFE_CALL(cudaSetDevice(0));
16 |     CUDA_SAFE_CALL(cudaMalloc(&a, sizeof(float)));
17 |     CUDA_SAFE_CALL(cudaSetDevice(1));
18 |     CUDA_SAFE_CALL(cudaMalloc(&b, sizeof(float)));
19 |     printf("a on device 0 is %p\n", a);
20 |     printf("b on device 1 is %p\n", b);
21 |     return 0;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/progs/nullker.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <sys/time.h>
 3 | #include <cuda_runtime.h>
 4 | 
 5 | #define CUDA_SAFE_CALL(func) \
 6 |     { \
 7 |         if ((func) != cudaSuccess ) { \
 8 |             fprintf(stderr, "ERROR\n"); \
 9 |             exit(EXIT_FAILURE); \
10 |         } \
11 |     }
12 | 
13 | static inline double get_elapsed_time(struct timeval *begin, struct timeval *end)
14 | {
15 |     return (end->tv_sec - begin->tv_sec) * 1000
16 |             + (end->tv_usec - begin->tv_usec) / 1000.0;
17 | }
18 |  
19 | __global__ 
20 | void null() 
21 | {
22 | }
23 |  
24 | int main()
25 | {
26 |     int i = 0;
27 |     struct timeval t1, t2;
28 | 	dim3 dimBlock( 1, 1 );
29 | 	dim3 dimGrid( 1, 1 );
30 | 
31 |     /* Initialize phase to force migration */
32 |     CUDA_SAFE_CALL(cudaSetDevice(0));
33 |     while (i < 20) {
34 |         null<<<dimBlock, dimBlock>>>();
35 |         i++;
36 |     }
37 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
38 |     CUDA_SAFE_CALL(cudaSetDevice(1));
39 |     i = 0;
40 |     while (i < 20) {
41 |         null<<<dimBlock, dimBlock>>>();
42 |         i++;
43 |     }
44 |     CUDA_SAFE_CALL(cudaDeviceSynchronize());
45 | 
46 |     //CUDA_SAFE_CALL(cudaSetDevice(0));
47 |     /* mhelper benchmark phase */
48 |     for (int iter = 0; iter < 15; iter++) {
49 |         int j = (1 << (10 + iter)) - 1;
50 |         i = 0;
51 |         gettimeofday(&t1, NULL);
52 |         while (i < j) {
53 |             null<<<dimBlock, dimBlock>>>();
54 |             i++;
55 |         }
56 |         CUDA_SAFE_CALL(cudaDeviceSynchronize());
57 |         gettimeofday(&t2, NULL);
58 |         printf("%d %f\n", j + 1, get_elapsed_time(&t1, &t2));
59 |     }
60 | 
61 | 	return EXIT_SUCCESS;
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/progs/thread_dev.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <pthread.h>
 3 | #include <sys/types.h>
 4 | #include <sys/syscall.h>
 5 | #include <unistd.h>
 6 | 
 7 | void *thread_main2(void *opaque)
 8 | {
 9 |     int *devMem;
10 |     int device;
11 |     pid_t pid;
12 |     pid = syscall(SYS_gettid);
13 |     printf("Thread 2: thread %d\n", pid);
14 |     cudaGetDevice(&device);
15 |     printf("Thread 2: Device %d\n", device);
16 |     cudaSetDevice(0);
17 |     cudaMalloc(&devMem, sizeof(int) * 16);
18 |     printf("Thread 2: Addr %p\n", devMem);
19 |     cudaGetDevice(&device);
20 |     printf("Thread 2: Device %d\n", device);
21 |     return NULL;
22 | }
23 | 
24 | void *thread_main1(void *opaque)
25 | {
26 |     int *devMem;
27 |     int device;
28 |     pthread_t t;
29 |     pid_t pid;
30 |     pid = syscall(SYS_gettid);
31 |     printf("Thread 1: thread %d\n", pid);
32 |     cudaGetDevice(&device);
33 |     printf("Thread 1: Device %d\n", device);
34 |     cudaSetDevice(1);
35 |     cudaMalloc(&devMem, sizeof(int) * 16);
36 |     printf("Thread 1: Addr %p\n", devMem);
37 |     cudaGetDevice(&device);
38 |     printf("Thread 1: Device %d\n", device);
39 |     pthread_create(&t, NULL, thread_main2, NULL);
40 |     cudaGetDevice(&device);
41 |     printf("Thread 1: Device %d\n", device);
42 |     pthread_join(t, NULL);
43 |     cudaGetDevice(&device);
44 |     printf("Thread 1: Device %d\n", device);
45 |     return NULL;
46 | }
47 |  
48 | int main()
49 | {
50 |     int *devMem;
51 |     int device;
52 |     pthread_t t;
53 |     pid_t pid;
54 |     pid = syscall(SYS_gettid);
55 |     printf("Main: thread %d\n", pid);
56 |     cudaSetDevice(0);
57 |     cudaMalloc(&devMem, sizeof(int) * 32);
58 |     printf("Main: Addr %p\n", devMem);
59 |     pthread_create(&t, NULL, thread_main1, NULL);
60 |     cudaGetDevice(&device);
61 |     printf("Main: Device %d\n", device);
62 |     pthread_join(t, NULL);
63 |     cudaGetDevice(&device);
64 |     printf("Main: Device %d\n", device);
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------