├── nvtx_pmpi_wrappers
    ├── Makefile
    ├── wrap
    │   ├── examples
    │   │   ├── CMakeLists.txt
    │   │   ├── world48.w
    │   │   └── tutorial.w
    │   ├── FAQ
    │   ├── LICENSE
    │   ├── WrapConfig.cmake
    │   ├── README.md
    │   └── wrap.py
    ├── LICENSE
    ├── README.md
    └── nvtx.w
├── README.md
└── one_hop_profiling
    ├── LICENSE
    ├── README.md
    └── one_hop_profiling.pl


/nvtx_pmpi_wrappers/Makefile:
--------------------------------------------------------------------------------
 1 | all: libnvtx_pmpi.so
 2 | libnvtx_pmpi.so: nvtx_pmpi.o
 3 | 	mpicc $^ -shared -o $@ -L$(CUDA_HOME)/lib64 -lnvToolsExt
 4 | nvtx_pmpi.o: nvtx_pmpi.c
 5 | 	mpicc -I$(CUDA_HOME)/include -DPIC -fPIC -c $^ -o $@
 6 | nvtx_pmpi.c: nvtx.w
 7 | 	python2.7 wrap/wrap.py -f -o $@ $^
 8 | 
 9 | .PHONY: clean
10 | clean:
11 | 	rm -f *.o libnvtx_pmpi.so nvtx_pmpi.c
12 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(wrap-examples)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | find_package(MPI REQUIRED)
 5 | include_directories(${MPI_C_INCLUDE_PATH})
 6 | 
 7 | set(WRAP ${PROJECT_SOURCE_DIR}/../wrap.py)
 8 | include(${PROJECT_SOURCE_DIR}/../WrapConfig.cmake)
 9 | 
10 | add_wrapped_file(world48.C world48.w)
11 | add_library(world48 world48.C)
12 | target_link_libraries(world48 ${MPI_C_LIBRARIES})
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Tools and extensions for CUDA profiling
3 | 
4 | Extension | Extends tool | Description
5 | --------- | ------------ | -----------
6 | **[one-hop profiling](/one_hop_profiling)** | NVIDIA Visual Profiler | Remotely profile a CUDA program when the machine actually running it is not accessible from the machine running the NVIDIA Visual Profiler
7 | **[NVTX MPI Wrappers](/nvtx_pmpi_wrappers)** | nvprof | Inserts NVTX ranges for many common Message Passing Interface (MPI) functions.
8 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/FAQ:
--------------------------------------------------------------------------------
 1 | Q:  I'm compiling a shared library to be used for the NAS Parallel 
 2 | Benchmarks.  What do I do?
 3 | 
 4 | A:  This works:
 5 | 
 6 |         ./wrap.py -f -g -o end2end.c end2end.w
 7 |         mpicc -DPIC -fPIC -I.. -DARCH_SANDY_BRIDGE -DARCH_062D -c end2end.c 
 8 |         mpicc -shared -Wl,-soname,libend2end.so -o ../lib/libend2end.so ../msr_core.o ../msr_rapl.o ../blr_util.o end2end.o 
 9 |         mpicc -L../lib -o harness.end2end harness.c -lend2end
10 | 
11 | Items of interest:
12 | 
13 | 1)  The flags to wrap.py will generate fortran wrappers and well as re-entry guards.
14 | 2)  DPIC must be used along with fPIC.
15 | 3)  The above relies on LD_LIBRARY_PATH being correct and it probably shouldn't.
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/one_hop_profiling/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
 4 |  
 5 | Permission is hereby granted, free of charge, to any person obtaining a
 6 | copy of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 |  * Redistributions of source code must retain the above copyright
 7 |    notice, this list of conditions and the following disclaimer.
 8 |  * Redistributions in binary form must reproduce the above copyright
 9 |    notice, this list of conditions and the following disclaimer in the
10 |    documentation and/or other materials provided with the distribution.
11 |  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |    contributors may be used to endorse or promote products derived
13 |    from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/examples/world48.w:
--------------------------------------------------------------------------------
 1 | // -*- c++ -*-
 2 | //
 3 | // world48
 4 | // Todd Gamblin, tgamblinWllnl.gov
 5 | //
 6 | // This file is an example of how to use wrap.py to fool an application
 7 | // into thinking it is running on a different communicator from
 8 | // MPI_Comm_world.
 9 | //
10 | // This was originally intended to allow applications on Blue Gene/Q to
11 | // run with 48 MPI processes per node, rather than just the power of 2
12 | // that IBM provides settings for.  The MPI_Init wrapper here will
13 | // split MPI_Comm_world into 2 groups: one for the first 48 out of every
14 | // 64 ranks and one for the last 16.  The last 16 ranks of every 64 just
15 | // call MPI_Finalize and exit normally inside of MPI_Init.  The rest of
16 | // the ranks continue to execute the rest of the application, thinking
17 | // that the world is only 1/4 as big as the real MPI_COMM_WORLD.
18 | //
19 | // To build:
20 | //    wrap.py world48.w > world48.C
21 | //    mpicc -c world48.C
22 | //    ar cr libworld48.a world48.o
23 | //    ranlib libworld48.a
24 | //
25 | // Link your application with libworld48.a, or build it as a shared lib
26 | // and LD_PRELOAD it to try out this tool.
27 | //
28 | #include <mpi.h>
29 | 
30 | // This is a communicator that will contain the first 48 out of
31 | // every 64 ranks in the application.
32 | static MPI_Comm world48;
33 | 
34 | // This function modifies its parameter by swapping it with world48
35 | // if it is MPI_COMM_WORLD.
36 | inline void swap_world(MPI_Comm& world) {
37 |    if (world == MPI_COMM_WORLD) {
38 |       world = world48;
39 |    }
40 | }
41 | 
42 | // MPI_Init does all the communicator setup
43 | //
44 | {{fn func MPI_Init}}{
45 |    // First call PMPI_Init()
46 |    {{callfn}}
47 | 
48 |    int rank;
49 |    PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
50 | 
51 |    // now keep only the first 48 ranks of each 64.
52 |    int keep = (rank % 64 < 48) ? 1: 0;
53 |    PMPI_Comm_split(MPI_COMM_WORLD,  keep, rank, &world48);
54 | 
55 |    // throw away the remaining ranks.
56 |    if (!keep) {
57 |       PMPI_Finalize();
58 |       exit(0);
59 |    }
60 | }{{endfn}}
61 | 
62 | // This generates interceptors that will catch every MPI routine
63 | // *except* MPI_Init.  The interceptors just make sure that if
64 | // they are called with an argument of type MPI_Comm that has a
65 | // value of MPI_COMM_WORLD, they switch it with world48.
66 | {{fnall func MPI_Init}}{
67 |    {{apply_to_type MPI_Comm swap_world}}
68 |    {{callfn}}
69 | }{{endfnall}}
70 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/README.md:
--------------------------------------------------------------------------------
 1 | NVIDIA NVTX Wrappers for MPI
 2 | ============================
 3 | License: Copyright 2017 NVIDIA CORPORATION, released under 3-clause BSD
 4 | license.
 5 | This software also uses software that is released under a 3-clause BSD license
 6 | by Lawrence Livermore National Laboratory.
 7 | 
 8 | Summary
 9 | -------
10 | The included sources can be used to generate wrappers for common Message
11 | Passing Interface (MPI) routines using the PMPI interface. The included 
12 | sources will explicitly add a *range* using the NVIDIA Tools Extensions (NVTX)
13 | API. When an MPI program is instrumented with the NVIDIA profilers, a range will
14 | appear in the timeline for each traced MPI call.
15 | 
16 | You can read more about this technique [here](https://devblogs.nvidia.com/parallelforall/gpu-pro-tip-track-mpi-calls-nvidia-visual-profiler/).
17 | 
18 | Prequisites
19 | -----------
20 | * A working install of MPI
21 | * The NVIDIA CUDA Toolkit
22 | * Python
23 | * make
24 | 
25 | Building
26 | --------
27 | Because each MPI implementation is subtly different, it is necessary to
28 | generate the wrappers for your installed MPI library. These will be generated
29 | from the file `nvtx.w` and the resulting file will be called `nvtx_pmpi.c`
30 | which will be built into a shared object to be used with your program. To
31 | build, simply run `make` in the top level directory.
32 | 
33 |     $ make
34 | 
35 | Extending
36 | ---------
37 | If you would like to extend the library to include additional MPI calls of
38 | interest or change the way the data is represented, make your changes to
39 | `nvtx.w` and then rebuild. The makefile will automatically regenerate the
40 | wrapper source based on your changes. For more information about how to modify
41 | this file, please see `wrap/README.md`.
42 | 
43 | Usage
44 | -----
45 | The shared object file built above must be preloaded, along with the the NVIDIA
46 | Tools Extensions library when gathering a performance profile. For example:
47 | 
48 |     $ LD_PRELOAD="<path-to-library>/libnvtx_pmpi.so" nvprof -o timeline.prof ./a.out
49 | 
50 | If the program `a.out` uses any of the wrapped MPI calls then these function
51 | calls will appear as ranges in the NVPROF timline when it is later loaded into
52 | the NVIDIA Visual Profiler. Any data movement or kernels used by the MPI
53 | function call will appear in the range.
54 | 
55 | Known Limitations
56 | -----------------
57 | * Asynchronous MPI routines are not implemented because any data movement
58 |   incurred as a result of these calls will not occur during the range.
59 | 


--------------------------------------------------------------------------------
/one_hop_profiling/README.md:
--------------------------------------------------------------------------------
 1 | One-hop profiling
 2 | =================
 3 | 
 4 | This is a script that remotely profiles a CUDA program when the machine actually running it is not directly accessible from the machine running the NVIDIA Visual Profiler.
 5 | 
 6 | Such a setup may look like this:
 7 | 
 8 |     .--------------.      .--------------. ssh  .--------------.
 9 |     |              |      |              +----->+              |
10 |     |              | ssh  |              |      |              |
11 |     |     host     +----->+  login node  |      | compute node |
12 |     |              |      |              |      |              |
13 |     |              |      |              +<-----+              |
14 |     '--------------'      '--------------' scp  '--------------'
15 | 
16 | 
17 |  * The **host** machine is the one which is running NVIDIA Visual Profiler. This machine may run Windows, Linux or OSX. It may or may not have an NVIDIA GPU.
18 |  * The **login node** is where this script will run. We just need ssh, scp and perl here; CUDA need not be installed. This needs to be a Linux machine.
19 |  * The **compute node** is where the actual CUDA application will run and be profiled. The profiling data generated will be copied over to the login node so that it can be used by Visual Profiler on the host. This needs to be a Linux machine.
20 | 
21 | Usage instructions:
22 | -------------------
23 | 
24 | **Setting up the login node**
25 | 
26 | 1. Copy or download the [`one_hop_profiling.pl`](/one_hop_profiling/one_hop_profiling.pl) script to the login node.
27 | 2. Give the script execution permissions using the command: `chmod +x one_hop_profiling.pl`
28 | 3. Edit the script and add compute node details. This file has extensive documentation in terms of comments about which variables needed to be edited.
29 | 4. Install an SSH key to allow the login node to SSH into the compute node without a password. You can find instructions on how to do this [here](https://askubuntu.com/a/46935).
30 | 
31 | **Setting up the compute node**
32 | 
33 | 1. Ensure that the CUDA program you want to profile is present on the compute node.
34 | 2. Ensure that the CUDA toolkit is installed, and nvprof is runnable and in the PATH.
35 | 
36 | **Setting up the host machine**
37 | 
38 | 1. Ensure that the CUDA toolkit is installed on this machine, and that the toolkit version is the same as the one present on the compute node.
39 | 
40 | **Capturing the profile**
41 | 
42 | 1. Run the Visual Profiler on this host machine.
43 | 2. Create a new session (Ctrl + N)
44 | 3. Connect to the login node by adding a remote connection as usual.
45 | 4. Click on `Manage...` Toolkit/Script.
46 | 5. Select the `Custom Script` radio button. Browse and select the profiling script on the login node. Click Finish.
47 | 6. Enter the executable file path on the remote machine in the `File` textbox. You will have to type this in. Remember that NVVP is connected only to the middle machine. It has no idea that the end machine exists, so the browse button will not be able to show you the paths on that machine.
48 | 7. `Next`/`Finish` to run as usual.
49 | 8. A profile will be captured and the timeline will be displayed.
50 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010, Lawrence Livermore National Security, LLC.  
 2 | Produced at the Lawrence Livermore National Laboratory  
 3 | Written by Todd Gamblin, tgamblin@llnl.gov.
 4 | LLNL-CODE-417602
 5 | All rights reserved.  
 6 | 
 7 | This file is part of Libra. For details, see http://github.com/tgamblin/libra.
 8 | Please also read the LICENSE file for further information.
 9 | 
10 | Redistribution and use in source and binary forms, with or without modification, are
11 | permitted provided that the following conditions are met:
12 | 
13 |  * Redistributions of source code must retain the above copyright notice, this list of
14 |    conditions and the disclaimer below.
15 |  * Redistributions in binary form must reproduce the above copyright notice, this list of
16 |    conditions and the disclaimer (as noted below) in the documentation and/or other materials
17 |    provided with the distribution.
18 |  * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
19 |    or promote products derived from this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
22 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24 | LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 |   
32 | Additional BSD Notice  
33 |   
34 | 1. This notice is required to be provided under our contract with the U.S. Department of 
35 |    Energy (DOE).  This work was produced at Lawrence Livermore National Laboratory under 
36 |    Contract No. DE-AC52-07NA27344 with the DOE.  
37 |   
38 | 2. Neither the United States Government nor Lawrence Livermore National Security, LLC nor 
39 |    any of their employees, makes any warranty, express or implied, or assumes any liability 
40 |    or responsibility for the accuracy, completeness, or usefulness of any information, 
41 |    apparatus, product, or process disclosed, or represents that its use would not infringe 
42 |    privately-owned rights.  
43 |   
44 | 3.  Also, reference herein to any specific commercial products, process, or services by trade
45 |     name, trademark, manufacturer or otherwise does not necessarily constitute or imply its 
46 |     endorsement, recommendation, or favoring by the United States Government or Lawrence 
47 |     Livermore National Security, LLC. The views and opinions of authors expressed herein do 
48 |     not necessarily state or reflect those of the United States Government or Lawrence 
49 |     Livermore National Security, LLC, and shall not be used for advertising or product 
50 |     endorsement purposes.  
51 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/nvtx.w:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
 2 |  *
 3 |  * Redistribution and use in source and binary forms, with or without
 4 |  * modification, are permitted provided that the following conditions
 5 |  * are met:
 6 |  *  * Redistributions of source code must retain the above copyright
 7 |  *    notice, this list of conditions and the following disclaimer.
 8 |  *  * Redistributions in binary form must reproduce the above copyright
 9 |  *    notice, this list of conditions and the following disclaimer in the
10 |  *    documentation and/or other materials provided with the distribution.
11 |  *  * Neither the name of NVIDIA CORPORATION nor the names of its
12 |  *    contributors may be used to endorse or promote products derived
13 |  *    from this software without specific prior written permission.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #include <nvToolsExt.h>
28 | nvtxDomainHandle_t nvtx_mpi_domain;
29 | 
30 | // Initialize handles to NVTX registered strings
31 | {{foreachfn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany
32 |   MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv
33 |   MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast
34 |   MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}}
35 |   nvtxStringHandle_t nvtx_{{name}}_message = 0;
36 | {{endforeachfn}}
37 | 
38 | // Setup event category name and register strings
39 | {{fn name MPI_Init}}
40 |   nvtx_mpi_domain = nvtxDomainCreateA("MPI");
41 | 
42 |   // Register string for each MPI function
43 |   {{foreachfn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany
44 |   MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv
45 |   MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast
46 |   MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}}
47 |   nvtx_{{name}}_message = nvtxDomainRegisterStringA(nvtx_mpi_domain, "{{name}}");
48 |   {{endforeachfn}}
49 | 
50 |   {{callfn}}
51 | {{endfn}}
52 | 
53 | // Wrap select MPI functions with NVTX ranges
54 | {{fn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany
55 | MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv
56 | MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast
57 | MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}}
58 |   nvtxEventAttributes_t eventAttrib = {0};
59 |   eventAttrib.version = NVTX_VERSION;
60 |   eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
61 |   eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
62 |   eventAttrib.message.registered  = nvtx_{{name}}_message;
63 |   eventAttrib.category = 999;
64 | 
65 |   nvtxDomainRangePushEx(nvtx_mpi_domain, &eventAttrib);
66 |   {{callfn}}
67 |   nvtxDomainRangePop(nvtx_mpi_domain);
68 | {{endfn}}
69 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/WrapConfig.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # @file WrapConfig.cmake
 3 | #       Contains macros for using the wrap script in a build environment.
 4 | #
 5 | # @author Todd Gamblin
 6 | # @date 19 May 2011
 7 | if(NOT DEFINED WRAP)
 8 |   message(FATAL_ERROR
 9 |     "WRAP variable must be set to location of wrap.py before including WrapConfig.cmake!")
10 | endif()
11 | 
12 | if (NOT Wrap_CONFIG_LOADED)
13 |   set(Wrap_CONFIG_LOADED TRUE)
14 | 
15 |   # This variable allows users to use the wrap.py script directly, if desired.
16 |   set(Wrap_EXECUTABLE ${WRAP})
17 | 
18 |   # add_wrapped_file(file_name wrapper_name [flags])
19 |   #
20 |   # This macro adds a command to generate <file_name> from <wrapper_name> to the
21 |   # build.  Properties on <file_name> are also set so that CMake knows that it
22 |   # is generated.
23 |   #
24 |   # Optionally, flags may be supplied to pass to the wrapper generator.
25 |   #
26 |   function(add_wrapped_file file_name wrapper_name)
27 |     set(file_path    ${CMAKE_CURRENT_BINARY_DIR}/${file_name})
28 |     set(wrapper_path ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name})
29 | 
30 |     # Play nice with FindPythonInterp -- use the interpreter if it was found,
31 |     # otherwise use the script directly.
32 |     if (PYTHON_EXECUTABLE)
33 |       set(command ${PYTHON_EXECUTABLE})
34 |       set(script_arg ${Wrap_EXECUTABLE})
35 |     else()
36 |       set(command ${Wrap_EXECUTABLE})
37 |       set(script_arg "")
38 |     endif()
39 | 
40 |     # Backward compatibility for old FindMPIs that did not have MPI_C_INCLUDE_PATH
41 |     if (NOT MPI_C_INCLUDE_PATH)
42 |       set(MPI_C_INCLUDE_PATH ${MPI_INCLUDE_PATH})
43 |     endif()
44 |     if (NOT MPI_C_COMPILER)
45 |       set(MPI_C_COMPILER ${MPI_COMPILER})
46 |     endif()
47 | 
48 |     # Play nice with FindMPI.  This will deduce the appropriate MPI compiler to use
49 |     # for generating wrappers
50 |     if (MPI_C_INCLUDE_PATH)
51 |       set(wrap_includes "")
52 |       foreach(include ${MPI_C_INCLUDE_PATH})
53 |         set(wrap_includes ${wrap_includes} -I ${include})
54 |       endforeach()
55 |     endif()
56 |     set(wrap_compiler   -c ${CMAKE_C_COMPILER})
57 |     if (MPI_C_COMPILER)
58 |       set(wrap_compiler -c ${MPI_C_COMPILER})
59 |     endif()
60 | 
61 |     if (ARGN)
62 |       # Prefer directly passed in flags.
63 |       list(GET ARGN 0 wrap_flags)
64 |     else()
65 |       # Otherwise, look in the source file properties
66 |       get_source_file_property(wrap_flags ${wrapper_name} WRAP_FLAGS)
67 |       if (wrap_flags STREQUAL NOTFOUND)
68 |         # If no spefific flags, grab them from the WRAP_FLAGS environment variable.
69 |         set(wrap_flags "")
70 |         if (NOT WRAP_FLAGS STREQUAL "")
71 |           set(wrap_flags "${WRAP_FLAGS}")
72 |         endif()
73 |       endif()
74 |     endif()
75 | 
76 |     # Mark target file as generated so the build system knows what to do w/it
77 |     set_source_files_properties(${file_path} PROPERTIES GENERATED TRUE)
78 | 
79 |     # Add a command to automatically wrap files.
80 |     add_custom_command(
81 |       OUTPUT  ${file_path}
82 |       COMMAND ${command}
83 |       ARGS    ${script_arg} ${wrap_compiler} ${wrap_includes} ${wrap_flags} ${wrapper_path} -o ${file_path}
84 |       WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
85 |       DEPENDS ${wrapper_path}
86 |       COMMENT "Generating ${file_name} from ${wrapper_name}"
87 |       VERBATIM)
88 | 
89 |     # Add generated files to list of things to be cleaned for the directory.
90 |     get_directory_property(cleanfiles ADDITIONAL_MAKE_CLEAN_FILES)
91 |     list(APPEND cleanfiles ${file_name})
92 |     set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${cleanfiles}")
93 |   endfunction()
94 | 
95 | endif()
96 | 


--------------------------------------------------------------------------------
/one_hop_profiling/one_hop_profiling.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | use File::Basename;
  4 | use Cwd 'abs_path';
  5 | use strict;
  6 | 
  7 | # ==============================================================================
  8 | # 
  9 | # ONE-HOP PROFILING - v1.0
 10 | # -----------------
 11 | #   https://github.com/NVIDIA/cuda-profiler/tree/master/one_hop_profiling
 12 | #
 13 | # ==============================================================================
 14 | 
 15 | 
 16 | # The following variables pertain to the compute node. Edit them to
 17 | # correctly reflect your setup.
 18 | 
 19 | # User name / IP used to ssh into the compute node.
 20 | # Be sure to escape the "@" sign. E.g.: "user_name\@192.168.1.1"
 21 | my $compute_node_hostname = "";
 22 | 
 23 | # Path on the compute node to the CUDA bin directory. nvprof will be located
 24 | # here. This path is usually "/usr/local/cuda-[version]/bin"
 25 | my $cuda_path = "/usr/local/cuda-9.0/bin";
 26 | 
 27 | # Path on the compute node to the CUDA libraries.
 28 | # This path is usually "/usr/local/cuda-[version]/lib64"
 29 | my $cuda_ld_library_path = "/usr/local/cuda-9.0/lib64";
 30 | 
 31 | # Environment variable(s) to be set on the compute node before running
 32 | # application (optional). E.g. "VARIABLE=value"
 33 | my $env = "";
 34 | 
 35 | # ==============================================================================
 36 | 
 37 | 
 38 | my $cmd;
 39 | if(@ARGV == 1) {
 40 |     # Do not print anything here. This step is required because the NVIDIA
 41 |     # Visual Profiler queries device info as the first step.
 42 |     $cmd = "ssh $compute_node_hostname LD_LIBRARY_PATH=$cuda_ld_library_path:\$LD_LIBRARY_PATH PATH=$cuda_path:\$PATH nvprof $ARGV[0]";
 43 |     system($cmd);
 44 |     exit $? >> 8;
 45 | }
 46 | 
 47 | 
 48 | # The NVIDIA Visual Profiler wants us to generate an nvprof output file on this
 49 | # machine. We modify the '-o' argument value and generate the output file on
 50 | # the compute node, in the same directory that the executable is located. We
 51 | # later copy this file back into the directory on this machine that the Visual
 52 | # Profiler wants it to be in, and then delete the original on the compute node.
 53 | #
 54 | # As a result, the Visual Profiler never knows that we redirected the command
 55 | # to one more remote. As far as it is concerned, the output came from this
 56 | # machine.
 57 | 
 58 | my $i;
 59 | my $nvprof_options = "";
 60 | my $exe_options = "";
 61 | 
 62 | for($i = 0; $i < @ARGV; $i++) {
 63 |     last if($ARGV[$i] eq "-o");
 64 |     $nvprof_options = "$nvprof_options $ARGV[$i]";
 65 | }
 66 | 
 67 | $i++; # Leave -o
 68 | my $output_file_name = basename($ARGV[$i]);
 69 | my $copy_path = dirname($ARGV[$i]);
 70 | $nvprof_options = "$nvprof_options -f -o $output_file_name";
 71 | 
 72 | $i++;
 73 | my $exe_path = dirname($ARGV[$i]);
 74 | my $exe_name = basename($ARGV[$i]);
 75 | 
 76 | $i++;
 77 | for(; $i < @ARGV; $i++) {
 78 |     $exe_options = "$exe_options $ARGV[$i]";
 79 | }
 80 | 
 81 | my $nvprof_command = "$nvprof_options ./$exe_name $exe_options";
 82 | 
 83 | $cmd = "ssh $compute_node_hostname \"cd $exe_path;LD_LIBRARY_PATH=$cuda_ld_library_path:\$LD_LIBRARY_PATH PATH=$cuda_path:\$PATH $env nvprof $nvprof_command\"";
 84 | 
 85 | system($cmd);
 86 | if($?) {
 87 |     exit $? >> 8;
 88 | }
 89 | 
 90 | # Replace %p with * to copy all files generated. %p is specified if multiple
 91 | # processes are to be profiled, in which case, the %p is replaced by the
 92 | # process id of the profiled application.
 93 | $output_file_name =~ s/%p/\*/g;
 94 | 
 95 | # Copy the file from the compute node to this machine (i.e. the login node)
 96 | # via scp.
 97 | $cmd = "scp $compute_node_hostname:$exe_path/$output_file_name $copy_path";
 98 | system($cmd);
 99 | 
100 | # Delete the original file on the compute node
101 | $cmd = "ssh $compute_node_hostname rm $exe_path/$output_file_name";
102 | system($cmd);
103 | exit $? >> 8;
104 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/examples/tutorial.w:
--------------------------------------------------------------------------------
  1 | /// -*- c++ -*-
  2 | /// Tutorial wrapper script for wrap.py
  3 | /// by Todd Gamblin tgamblin@llnl.gov
  4 | ///
  5 | /// This shows sample usage of many of the builtin macros in the wrapper generator.
  6 | ///
  7 | /// Run it through wrap.py like this to see sample output:
  8 | ///     wrap.py -o output.txt example.w
  9 | ///
 10 | /// Note that this won't compile; this file is just a simple tutorial with examples.
 11 | ///
 12 | 
 13 | // Say you just want to generate wrappers for some functions.  That's easy with fn and fnall.
 14 | // This simple formulation will generate wrappers for MPI_Send and MPI_Recv:
 15 | {{fn foo MPI_Send MPI_Recv}}
 16 |   {{callfn}}
 17 | {{endfn}}
 18 | 
 19 | // Usually, we add some braces to that so that the editor gets the indentation right  You
 20 | // don't *need* the braces, but they look nice and help emacs understand where your nested
 21 | // scopes are in C mode.
 22 | {{fn foo MPI_Send MPI_Recv}} {
 23 |   {{callfn}}
 24 | }
 25 | {{endfn}}
 26 | 
 27 | // If you generate this file and look at the output, you'll see full wrapper functions for
 28 | // MPI_Send and MPI_Recv.  The 'callfn' macro tells the wrapper generator to generate a
 29 | // delegating call from MPI_Send (or MPI_Recv) to PMPI_Send (or PMPI_Recv).  That's all
 30 | // it takes!  All the cruft is handled for you by wrap.py.
 31 | 
 32 | // But what's that 'foo' above, you say?  foo is your "loop variable".  It can be used
 33 | // to refer to the name of the function inside the wrapper:
 34 | {{fn foo MPI_Send MPI_Recv}} {
 35 |   // 'foo' here evaluates to just the name of the function.
 36 |   my_global_function_pointer = {{foo}};
 37 | }
 38 | {{endfn}}
 39 | 
 40 | // Usually you'll want to insert your own code in the wrappers.  Say you wanted to time
 41 | // every MPI function.  You could use 'fnall'.  Note that with fnall, the functions you
 42 | // list after the loop variable are *excluded* from generation.  So this will generate
 43 | // wrappers for every MPI function *except* MPI_Send and MPI_Recv:
 44 | {{fnall foo MPI_Send MPI_Recv}} {
 45 |   double start_time = get_time_in_nanoseconds();
 46 |   {{callfn}}
 47 |   double end_time = get_time_in_nanoseconds();
 48 |   printf("{{foo}} took %f nanoseconds to run!\n", (end_time - start_time));
 49 | }
 50 | {{endfnall}}
 51 | 
 52 | // Ok, so now you can make wrappers.  What if you want to iterate over all the MPI
 53 | // calls, but just their names, without generating wrappers?  There are macros for
 54 | // that too.
 55 | 
 56 | // foreachfn iterates over function names that wrap.py found in the mpi.h header.
 57 | // forallfn is like fnall, but it again iterates over everything *except* specified
 58 | // functions.
 59 | {{foreachfn foo MPI_Send MPI_Recv}} {
 60 |   // With foreachfn and forallfn, wrappers aren't generated by default.  You have
 61 |   // to put some macros in the nested scope to get something to happen.  Luckily,
 62 |   // in iterative constructs like fn, fnall, foreachfn, and forallfn, the wrapper
 63 |   // generator inserts special variables into the nested scope.  You can get at
 64 |   // them using macros like so:
 65 | 
 66 |   // The return type of the function (this is a simple string):
 67 |   {{ret_type}}
 68 | 
 69 |   // The name of the function (the name comes from the foreachfn "loop" macro above)
 70 |   {{foo}}
 71 | 
 72 |   // A unique number, starting at zero and increasing each time it is evaluated
 73 |   // this is a holdover from the MPE wrapper generator.
 74 |   {{fn_num}}
 75 | 
 76 |   // You can use regular expression substitutions on variables and print the result.
 77 |   // This, for example, renames MPI_ functions to have NQJ_ prefixes instead.  Here
 78 |   // it prints out either NQJ_Send or NQJ_Recv, depending on which iteration of the
 79 |   // foreachfn loop we're on.
 80 |   {{sub {{foo}} MPI_ NQJ_}}
 81 | 
 82 |   // You can rename things or define new values with def.
 83 |   // Note that def itself doesn't print anything:
 84 |   {{def my_var {{ret_type}}}}
 85 |   {{my_var}}
 86 | 
 87 |   // Suppose you wanted to substitute MPI for NQJ *once*, then use that value
 88 |   // repeatedly in this scope:
 89 |   {{def nqjfun {{sub {{foo}} MPI_ NQJ_}}}}
 90 |   {{nqjfun}}  {{nqjfun}}  {{nqjfun}}
 91 | 
 92 |   // Not everything in wrap.py is a scalar!  There are also list values.  These
 93 |   // are important for dealing with parameter lists and
 94 | 
 95 |   // Formal parameters:
 96 |   {{formals}}
 97 |   {{formals 0}}
 98 |   {{formals 1}}
 99 | 
100 |   // Types of formals:
101 |   {{types}}
102 |   {{types 0}}
103 |   {{types 1}}
104 | 
105 |   // Argument names:
106 |   {{args}}
107 |   {{args 0}}
108 |   {{args 1}}
109 |   // -- or --
110 |   {{0}}
111 |   {{1}}
112 | 
113 |   // Lists, when printed, are printed separated by commas.  This is so that you
114 |   // can easily make lists of parameters or arguments out of them.  You can modify
115 |   // the builtin lists using the 'list' macro, which creates or modifies lists.
116 |   // Here are some examples using list:
117 | 
118 |   // Create a list of your own strings.  This prints out foo, bar, baz.
119 |   {{list foo bar baz}}
120 | 
121 |   // Add newarg to the beginning of the args list and print the result:
122 |   {{list newarg {{args}}}}
123 | 
124 |   // Add newarg to the end of the args list:
125 |   {{list {{args}} newarg}}
126 | 
127 |   // Make a variable for the new list, then print it out:
128 |   {{def new_list {{list {{args}} newarg}} }}
129 |   {{new_list}}
130 | 
131 |   // Get a list of only those formal parameters that have MPI handle types:
132 |   {{filter '^MPI_' {{formals}}}}
133 | 
134 |   // Below are some more complicated (but useful!) expressions.
135 |   // Note that these use macros not fully explained here.  See the documentation
136 |   // for details on what zip does or what sub does when applied to a list.
137 | 
138 |   // replace void with FOO in the first type in the parameter list
139 |   {{sub {{types 0}} void FOO}}
140 | 
141 |   // replace void with FOO in all types in the parameter list
142 |   {{sub {{types}} int FOO}}
143 | 
144 |   // replace void with FOO in all types in the parameter list,
145 |   // and join that with the arg names for a new prototype
146 |   {{ret_type}} {{foo}}({{zip {{sub {{types}} void FOO}} {{args}}}});
147 | 
148 |   // replace every parameter type with the return type
149 |   {{ret_type}} {{foo}}({{zip {{sub {{types}} '.*' {{ret_type}}}} {{args}}}});
150 | 
151 |   // replace any MPI type with MPI_Foo in the parameter list
152 |   {{ret_type}} {{foo}}({{zip {{sub {{types}} 'MPI_.*' MPI_Foo}} {{args}}}});
153 | 
154 | 
155 |   // The apply_to_type macro generates code to apply a callable thing
156 |   // (function, macro, functor) to every parameter of a particular type
157 | 
158 |   // This will generate analyze_comm(comm) calls for each MPI_Comm parameter
159 |   {{apply_to_type MPI_Comm analyze_comm}}
160 | 
161 |   // This will call some_function on every int parameter to the call
162 |   {{apply_to_type int some_function}}
163 | }
164 | {{endforeachfn}}
165 | 
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/README.md:
--------------------------------------------------------------------------------
  1 | wrap.py
  2 | ===========================
  3 | a [PMPI](http://www.open-mpi.org/faq/?category=perftools#PMPI) wrapper generator
  4 | 
  5 | by Todd Gamblin, tgamblin@llnl.gov, https://github.com/tgamblin/wrap
  6 | 
  7 |     Usage: wrap.py [-fgd] [-i pmpi_init] [-c mpicc_name] [-o file] wrapper.w [...]
  8 |      Python script for creating PMPI wrappers. Roughly follows the syntax of
  9 |        the Argonne PMPI wrapper generator, with some enhancements.
 10 |      Options:"
 11 |        -d             Just dump function declarations parsed out of mpi.h
 12 |        -f             Generate fortran wrappers in addition to C wrappers.
 13 |        -g             Generate reentry guards around wrapper functions.
 14 |        -c exe         Provide name of MPI compiler (for parsing mpi.h).
 15 |                       Default is \'mpicc\'.
 16 |        -s             Skip writing #includes, #defines, and other
 17 |                       front-matter (for non-C output).
 18 |        -i pmpi_init   Specify proper binding for the fortran pmpi_init
 19 |                       function.  Default is \'pmpi_init_\'.  Wrappers
 20 |                       compiled for PIC will guess the right binding
 21 |                       automatically (use -DPIC when you compile dynamic
 22 |                       libs).
 23 |        -o file        Send output to a file instead of stdout.
 24 | 
 25 | 
 26 | Thanks to these people for their suggestions and contributions:
 27 | 
 28 | * David Lecomber, Allinea
 29 | * Barry Rountree, LLNL
 30 | 
 31 | Known Bugs:
 32 | 
 33 | * Certain fortran bindings need some bugfixes and may not work.
 34 | 
 35 | Tutorial
 36 | -----------------------------
 37 | For a thorough tutorial, look at `examples/tutorial.w`!  It walks you through
 38 | the process of using `wrap.py`.  It is also legal `wrap.py` code, so you
 39 | can run `wrap.py` on it and see the output to better understand what's
 40 | going on.
 41 | 
 42 | 
 43 | CMake Integration
 44 | -----------------------------
 45 | `wrap.py` includes a `WrapConfig.cmake` file.  You can use this in your CMake project to automatically generate rules to generate wrap.py code.
 46 | 
 47 | Here's an example.  Suppose you put `wrap.py` in a subdirectory of your project called wrap, and your project looks like this:
 48 | 
 49 |     project/
 50 |         CMakeLists.txt
 51 |         wrap/
 52 |             wrap.py
 53 |             WrapConfig.cmake
 54 | In your top-level CMakeLists.txt file, you can now do this:
 55 | 
 56 |     # wrap.py setup -- grab the add_wrapped_file macro.
 57 |     set(WRAP ${PROJECT_SOURCE_DIR}/wrap/wrap.py)
 58 |     include(wrap/WrapConfig.cmake)
 59 | 
 60 | If you have a wrapped source file, you can use the wrapper auto-generation like this:
 61 | 
 62 |     add_wrapped_file(wrappers.C wrappers.w)
 63 |     add_library(tool_library wrappers.C)
 64 | 
 65 | The `add_wrapped_file` function takes care of the dependences and code generation for you.  If you need fortran support, call it like this:
 66 | 
 67 |     add_wrapped_file(wrappers.C wrappers.w -f)
 68 | 
 69 | And note that if you generate a header that your .C files depend on, you need to explicitly include it in a target's sources, unlike non-generated headers.  e.g.:
 70 | 
 71 |     add_wrapped_file(my-header.h my-header.w)
 72 |     add_library(tool_library
 73 |         tool.C         # say that this includes my-header.h
 74 |         my-header.h)   # you need to add my-header.h here.
 75 | 
 76 | If you don't do this, then the header dependence won't be accounted for when tool.C is built.
 77 | 
 78 | Wrapper file syntax
 79 | -----------------------------
 80 | Wrap syntax is a superset of the syntax defined in Appendix C of
 81 | the MPE manual [1], but many commands from the original wrapper
 82 | generator are now deprecated.
 83 | 
 84 | 
 85 | The following two macros generate skeleton wrappers and allow
 86 | delegation via `{{callfn}}`:
 87 | 
 88 | * `fn` iterates over only the listed
 89 | functions.
 90 | * `fnall` iterates over all functions *minus* the named functions.
 91 | 
 92 |     {{fnall <iterator variable name> <function A> <function b> ... }}
 93 |       // code here
 94 |     {{endfnall}}
 95 | 
 96 |     {{fn <iterator variable name> <function A> <function B> ... }}
 97 |     {{endfn}
 98 | 
 99 |     {{callfn}}
100 | 
101 | `callfn` expands to the call of the function being profiled.
102 | 
103 | `fnall` defines a wrapper to be used on all functions except the functions named.  fn is identical to fnall except that it only generates wrappers for functions named explicitly.
104 | 
105 |     {{fn FOO MPI_Abort}}
106 |     	// Do-nothing wrapper for {{FOO}}
107 |     {{endfn}}
108 | 
109 | generates (in part):
110 | 
111 |     /* ================== C Wrappers for MPI_Abort ================== */
112 |     _EXTERN_C_ int PMPI_Abort(MPI_Comm arg_0, int arg_1);
113 |     _EXTERN_C_ int MPI_Abort(MPI_Comm arg_0, int arg_1) {
114 |         int return_val = 0;
115 | 
116 |     // Do-nothing wrapper for MPI_Abort
117 |         return return_val;
118 |     }
119 | 
120 | `foreachfn` and `forallfn` are the counterparts of `fn` and `fnall`, but they don't generate the
121 | skeletons (and therefore you can't delegate with `{{callfn}}`).  However, you
122 | can use things like `fn_name` (or `foo`) and `argTypeList`, `retType`, `argList`, etc.
123 | 
124 | They're not designed for making wrappers, but declarations of lots of variables and other things you need to declare per MPI function.  e.g., say you wanted a static variable per MPI call for some flag.
125 | 
126 |     {{forallfn <iterator variable name> <function A> <function B> ... }}
127 |       // code here
128 |     {{endforallfn}
129 | 
130 |     {foreachfn <iterator variable name> <function A> <function B> ... }}
131 |       // code here
132 |     {{endforeachfn}}
133 | 
134 | 
135 | The code between {{forallfn}} and {{endforallfn}} is copied once
136 | for every function profiled, except for the functions listed.
137 | For example:
138 | 
139 |     {{forallfn fn_name}}
140 |       static int {{fn_name}}_ncalls_{{fileno}};
141 |     {{endforallfn}}
142 | 
143 | might expand to:
144 | 
145 |     static int MPI_Send_ncalls_1;
146 |     static int MPI_Recv_ncalls_1;
147 |     ...
148 | 
149 | etc.
150 | 
151 | * `{{get_arg <argnum>}}` OR `{{<argnum>}}`
152 | 	Arguments to the function being profiled may be referenced by
153 | 	number, starting with 0 and increasing.  e.g., in a wrapper file:
154 | 
155 |         void process_argc_and_argv(int *argc, char ***argv) {
156 |         // do stuff to argc and argv.
157 |         }
158 | 
159 |         {{fn fn_name MPI_Init}}
160 |             process_argc_and_argv({{0}}, {{1}});
161 |         {{callfn}}
162 |         {{endfn}}
163 |     Note that `{{0}}` is just a synonym for `{{get_arg 0}}`
164 | 
165 | * `{{ret_val}}`
166 | 	ReturnVal expands to the variable that is used to hold the return
167 | 	value of the function being profiled.   (was: `{{returnVal}}`)
168 | 
169 | * `{{fn_num}}`
170 | 	This is a number, starting from zero.  It is incremented every time
171 | 	it is used.
172 | 
173 | * `{{ret_type}}`
174 | 	The return type of the function. (was: `{{retType}}`)
175 | 
176 | * `{{formals}}`
177 | 	Essentially what would be in a formal declaration for the function.
178 | 	Can be used this with forallfn and foreachfn; these don't generate
179 | 	prototypes, they just iterate over the functions without making a
180 |     skeleton.  (was: `{{argTypeList}}`)
181 | 
182 | * `{{args}}`
183 | 	Names of the arguments in a comma-separated list, e.g.:
184 |     `buf, type, count, comm`
185 | 
186 | * `{{argList}}`
187 | 	Same as `{{args}}`, but with parentheses around the list, e.g.:
188 |     `(buf, type, count, comm)`
189 | 
190 | * `{{applyToType <type> <callable>}}`
191 |     This macro must be nested inside either a fn or fnall block.
192 |     Within the functions being wrapped by fn or fnall, this macro will
193 |     apply `<callable>` to any arguments of the function with type
194 |     `<type>`.   For example, you might write a wrapper file like this:
195 | 
196 |         #define my_macro(comm) do_something_to(comm);
197 |         {{fn fn_name MPI_Send MPI_Isend MPI_Ibsend}}
198 |             {{applyToType MPI_Comm my_macro}}
199 |             {{callfn}}
200 |         {{endfn}}
201 | 
202 | Now the generated wrappers to `MPI_Send`, `MPI_Isend`, and `MPI_Ibsend` will do something like this:
203 | 
204 |     int MPI_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) {
205 |       int _wrap_py_return_val = 0;
206 |       my_macro(comm);
207 |       PMPI_Isend(buf, count, datatype, dest, tag, comm, request);
208 |     }
209 | 
210 | * `{{sub <new_string> <old_string> <regexp> <substitution>}}`
211 |     Declares `<new_string>` in the current scope and gives it the value
212 |     of `<old_string>` with all instances of `<regexp>` replaced with
213 |     `<substitution>`.  You may use any valid python regexp for `<regexp>`
214 |     and any valid substitution value for `<substitution>`.  The regexps
215 |     follow the same syntax as Python's re.sub(), and they may be single
216 |     or double quoted (though it's not necessary unless you use spaces in
217 |     the expressions).
218 | 
219 |     Example:
220 | 
221 |         {{forallfn foo}}
222 |             {{sub nqjfoo foo '^MPI_' NQJ_}}
223 |             {{nqjfoo}}
224 |         {{endforallfn}}
225 | 
226 |   This will print `NQJ_xxx` instead of `MPI_xxx` for each MPI function.
227 | 
228 | * `{{fileno}}`
229 | 	An integral index representing which wrapper file the macro
230 | 	came from.  This is useful when decalring file-global variables
231 | 	to prevent name collisions.  Identifiers declared outside
232 | 	functions should end with _{{fileno}}.  For example:
233 | 
234 | 		static double overhead_time_{{fileno}};
235 | 
236 | 	might expand to
237 | 
238 | 		static double overhead_time_0;
239 | 
240 | 
241 | * `{{vardecl <type> <arg> <arg> ...}}` *(not yet supported)*
242 | 	Declare variables within a wrapper definition.  Wrap will decorate
243 |     the variable name to prevent collisions.
244 | 
245 | * `{{<varname>}}` *(not yet supported)*
246 | 	Access a variable declared by `{{vardecl}}`.
247 | 
248 | Notes on the fortran wrappers
249 | -------------------------------
250 |     #if (!defined(MPICH_HAS_C2F) && defined(MPICH_NAME) && (MPICH_NAME == 1))
251 | 	    /* MPICH call */
252 |         return_val = MPI_Abort((MPI_Comm)(*arg_0), *arg_1);
253 | 	#else
254 |         /* MPI-2 safe call */
255 | 	    return_val = MPI_Abort(MPI_Comm_f2c(*arg_0), *arg_1);
256 | 	#endif
257 | 
258 | This is the part of the wrapper that delegates from Fortran
259 | to C.  There are two ways to do that.  The MPI-2 way is to
260 | call the appropriate _f2c call on the handle and pass that
261 | to the C function.  The f2c/c2f calls are also available in
262 | some versions of MPICH1, but not all of them (I believe they
263 | were backported), so you can do the MPI-2 thing if
264 | `MPICH_HAS_C2F` is defined.
265 | 
266 | If c2f functions are not around, then the script tries to
267 | figure out if it's dealing with MPICH1, where all the
268 | handles are ints.  In that case, you can just pass the int
269 | through.
270 | 
271 | Right now, if it's not *specifically* MPICH1, wrap.py does
272 | the MPI-2 thing.  From what Barry was telling me, your MPI
273 | environment might have int handles, but it is not MPICH1.
274 | So you could either define all the `MPI_Foo_c2f`/`MPI_Foo_f2c`
275 | calls to identity macros, e.g.:
276 | 
277 |     #define MPI_File_c2f(x) (x)
278 |     #define MPI_File_f2c(x) (x)
279 | 
280 | or you could add something to wrap.py to force the
281 | int-passing behavior.  I'm not sure if you have to care
282 | about this, but I thought I'd point it out.
283 | 
284 | -s, or 'structural' mode
285 | -------------------------------
286 | 
287 | If you use the `-s` option, this skips the includes and defines used for C
288 | wrapper functions.  This is useful if you want to use wrap to generate
289 | non-C files, such as XML.
290 | 
291 | If you use -s, we recommend that you avoid using `{{fn}}` and `{{fnall}}`,
292 | as these generate proper wrapper functions that rely on some of the
293 | header information.  Instead, use `{{foreachfn}}` and `{{forallfn}}`, as
294 | these do not generate wrappers around each iteration of the macro.
295 | 
296 | e.g. if you want to generate a simple XML file with descriptions of the
297 | MPI arguments, you might write this in a wrapper file:
298 | 
299 |     {{forallfn fun}}
300 |         <function name="{{fun}}" args="{{args}}"/>
301 |     {{endforallfn}}
302 | 
303 | We don't disallow `{{fnall}}` or `{{fn}}` with `-s`, but If you used
304 | `{{fnall}}` here, each XML tag would have a C wrapper function around it,
305 | which is probably NOT what you want.
306 | 
307 | 
308 | 1. Anthony Chan, William Gropp and Weing Lusk.  *User's Guide for MPE:
309 | Extensions for MPI Programs*.  ANL/MCS-TM-ANL-98/xx.
310 | ftp://ftp.mcs.anl.gov/pub/mpi/mpeman.pdf
311 | 
312 | 
313 | 


--------------------------------------------------------------------------------
/nvtx_pmpi_wrappers/wrap/wrap.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | #################################################################################################
   3 | # Copyright (c) 2010, Lawrence Livermore National Security, LLC.
   4 | # Produced at the Lawrence Livermore National Laboratory
   5 | # Written by Todd Gamblin, tgamblin@llnl.gov.
   6 | # LLNL-CODE-417602
   7 | # All rights reserved.
   8 | #
   9 | # This file is part of Libra. For details, see http://github.com/tgamblin/libra.
  10 | # Please also read the LICENSE file for further information.
  11 | #
  12 | # Redistribution and use in source and binary forms, with or without modification, are
  13 | # permitted provided that the following conditions are met:
  14 | #
  15 | #  * Redistributions of source code must retain the above copyright notice, this list of
  16 | #    conditions and the disclaimer below.
  17 | #  * Redistributions in binary form must reproduce the above copyright notice, this list of
  18 | #    conditions and the disclaimer (as noted below) in the documentation and/or other materials
  19 | #    provided with the distribution.
  20 | #  * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
  21 | #    or promote products derived from this software without specific prior written permission.
  22 | #
  23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  24 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  25 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  26 | # LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
  27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  28 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  30 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32 | #################################################################################################
  33 | usage_string = \
  34 | '''Usage: wrap.py [-fgd] [-i pmpi_init] [-c mpicc_name] [-o file] wrapper.w [...]
  35 |  Python script for creating PMPI wrappers. Roughly follows the syntax of
  36 |    the Argonne PMPI wrapper generator, with some enhancements.
  37 |  Options:"
  38 |    -d             Just dump function declarations parsed out of mpi.h
  39 |    -f             Generate fortran wrappers in addition to C wrappers.
  40 |    -g             Generate reentry guards around wrapper functions.
  41 |    -s             Skip writing #includes, #defines, and other front-matter (for non-C output).
  42 |    -c exe         Provide name of MPI compiler (for parsing mpi.h).  Default is \'mpicc\'.
  43 |    -I dir         Provide an extra include directory to use when parsing mpi.h.
  44 |    -i pmpi_init   Specify proper binding for the fortran pmpi_init function.
  45 |                   Default is \'pmpi_init_\'.  Wrappers compiled for PIC will guess the
  46 |                   right binding automatically (use -DPIC when you compile dynamic libs).
  47 |    -o file        Send output to a file instead of stdout.
  48 | 
  49 |  by Todd Gamblin, tgamblin@llnl.gov
  50 | '''
  51 | import tempfile, getopt, subprocess, sys, os, re, StringIO, types, itertools
  52 | 
  53 | # Default values for command-line parameters
  54 | mpicc = 'mpicc'                    # Default name for the MPI compiler
  55 | includes = []                      # Default set of directories to inlucde when parsing mpi.h
  56 | pmpi_init_binding = "pmpi_init_"   # Default binding for pmpi_init
  57 | output_fortran_wrappers = False    # Don't print fortran wrappers by default
  58 | output_guards = False              # Don't print reentry guards by default
  59 | skip_headers = False               # Skip header information and defines (for non-C output)
  60 | dump_prototypes = False            # Just exit and dump MPI protos if false.
  61 | 
  62 | # Possible legal bindings for the fortran version of PMPI_Init()
  63 | pmpi_init_bindings = ["PMPI_INIT", "pmpi_init", "pmpi_init_", "pmpi_init__"]
  64 | 
  65 | # Possible function return types to consider, used for declaration parser.
  66 | # In general, all MPI calls we care about return int.  We include double
  67 | # to grab MPI_Wtick and MPI_Wtime, but we'll ignore the f2c and c2f calls
  68 | # that return MPI_Datatypes and other such things.
  69 | rtypes = ['int', 'double' ]
  70 | 
  71 | # If we find these strings in a declaration, exclude it from consideration.
  72 | exclude_strings = [ "c2f", "f2c", "typedef" ]
  73 | 
  74 | # Regular expressions for start and end of declarations in mpi.h. These are
  75 | # used to get the declaration strings out for parsing with formal_re below.
  76 | begin_decl_re = re.compile("(" + "|".join(rtypes) + ")\s+(MPI_\w+)\s*\(")
  77 | exclude_re =    re.compile("|".join(exclude_strings))
  78 | end_decl_re =   re.compile("\).*\;")
  79 | 
  80 | # Regular Expression for splitting up args. Matching against this
  81 | # returns three groups: type info, arg name, and array info
  82 | formal_re = re.compile(
  83 |     "\s*(" +                       # Start type
  84 |     "(?:const)?\s*" +              # Initial const
  85 |     "\w+"                          # Type name (note: doesn't handle 'long long', etc. right now)
  86 |     ")\s*(" +                      # End type, begin pointers
  87 |     "(?:\s*\*(?:\s*const)?)*" +    # Look for 0 or more pointers with optional 'const'
  88 |     ")\s*"                         # End pointers
  89 |     "(?:(\w+)\s*)?" +              # Argument name. Optional.
  90 |      "(\[.*\])?\s*$"               # Array type.  Also optional. Works for multidimensions b/c it's greedy.
  91 |     )
  92 | 
  93 | # Fortran wrapper suffix
  94 | f_wrap_suffix = "_fortran_wrapper"
  95 | 
  96 | # Initial includes and defines for wrapper files.
  97 | wrapper_includes = '''
  98 | #include <mpi.h>
  99 | #include <stdio.h>
 100 | #include <stdlib.h>
 101 | 
 102 | #ifndef _EXTERN_C_
 103 | #ifdef __cplusplus
 104 | #define _EXTERN_C_ extern "C"
 105 | #else /* __cplusplus */
 106 | #define _EXTERN_C_
 107 | #endif /* __cplusplus */
 108 | #endif /* _EXTERN_C_ */
 109 | 
 110 | #ifdef MPICH_HAS_C2F
 111 | _EXTERN_C_ void *MPIR_ToPointer(int);
 112 | #endif // MPICH_HAS_C2F
 113 | 
 114 | #ifdef PIC
 115 | /* For shared libraries, declare these weak and figure out which one was linked
 116 |    based on which init wrapper was called.  See mpi_init wrappers.  */
 117 | #pragma weak pmpi_init
 118 | #pragma weak PMPI_INIT
 119 | #pragma weak pmpi_init_
 120 | #pragma weak pmpi_init__
 121 | #endif /* PIC */
 122 | 
 123 | _EXTERN_C_ void pmpi_init(MPI_Fint *ierr);
 124 | _EXTERN_C_ void PMPI_INIT(MPI_Fint *ierr);
 125 | _EXTERN_C_ void pmpi_init_(MPI_Fint *ierr);
 126 | _EXTERN_C_ void pmpi_init__(MPI_Fint *ierr);
 127 | 
 128 | '''
 129 | 
 130 | # Default modifiers for generated bindings
 131 | default_modifiers = ["_EXTERN_C_"]  # _EXTERN_C_ is #defined (or not) in wrapper_includes. See above.
 132 | 
 133 | # Set of MPI Handle types
 134 | mpi_handle_types = set(["MPI_Comm", "MPI_Errhandler", "MPI_File", "MPI_Group", "MPI_Info",
 135 |                         "MPI_Op", "MPI_Request", "MPI_Status", "MPI_Datatype", "MPI_Win" ])
 136 | 
 137 | # MPI Calls that have array parameters, and mappings from the array parameter positions to the position
 138 | # of the 'count' paramters that determine their size
 139 | mpi_array_calls = {
 140 |     "MPI_Startall"           : { 1:0 },
 141 |     "MPI_Testall"            : { 1:0, 3:0 },
 142 |     "MPI_Testany"            : { 1:0 },
 143 |     "MPI_Testsome"           : { 1:0, 4:0 },
 144 |     "MPI_Type_create_struct" : { 3:0 },
 145 |     "MPI_Type_get_contents"  : { 6:1 },
 146 |     "MPI_Type_struct"        : { 3:0 },
 147 |     "MPI_Waitall"            : { 1:0, 2:0 },
 148 |     "MPI_Waitany"            : { 1:0 },
 149 |     "MPI_Waitsome"           : { 1:0, 4:0 }
 150 | }
 151 | 
 152 | 
 153 | def find_matching_paren(string, index, lparen='(', rparen=')'):
 154 |     """Find the closing paren corresponding to the open paren at <index>
 155 |        in <string>.  Optionally, can provide other characters to match on.
 156 |        If found, returns the index of the matching parenthesis.  If not found,
 157 |        returns -1.
 158 |     """
 159 |     if not string[index] == lparen:
 160 |         raise ValueError("Character at index %d is '%s'. Expected '%s'"
 161 |                          % (index, string[index], lparen))
 162 |     index += 1
 163 |     count = 1
 164 |     while index < len(string) and count > 0:
 165 |         while index < len(string) and string[index] not in (lparen, rparen):
 166 |             index += 1
 167 |         if string[index] == lparen:
 168 |             count += 1
 169 |         elif string[index] == rparen:
 170 |             count -= 1
 171 | 
 172 |     if count == 0:
 173 |         return index
 174 |     else:
 175 |         return -1
 176 | 
 177 | 
 178 | def isindex(str):
 179 |     """True if a string is something we can index an array with."""
 180 |     try:
 181 |         int(str)
 182 |         return True
 183 |     except ValueError:
 184 |         return False
 185 | 
 186 | def once(function):
 187 |     if not hasattr(function, "did_once"):
 188 |         function()
 189 |         function.did_once = True
 190 | 
 191 | # Returns MPI_Blah_[f2c,c2f] prefix for a handle type.  MPI_Datatype is a special case.
 192 | def conversion_prefix(handle_type):
 193 |     if handle_type == "MPI_Datatype":
 194 |         return "MPI_Type"
 195 |     else:
 196 |         return handle_type
 197 | 
 198 | # Special join function for joining lines together.  Puts "\n" at the end too.
 199 | def joinlines(list, sep="\n"):
 200 |     if list:
 201 |         return sep.join(list) + sep
 202 |     else:
 203 |         return ""
 204 | 
 205 | # Possible types of Tokens in input.
 206 | LBRACE, RBRACE, TEXT, IDENTIFIER = range(4)
 207 | 
 208 | class Token:
 209 |     """Represents tokens; generated from input by lexer and fed to parse()."""
 210 |     def __init__(self, type, value, line=0):
 211 |         self.type = type    # Type of token
 212 |         self.value = value  # Text value
 213 |         self.line = line
 214 | 
 215 |     def __str__(self):
 216 |         return "'%s'" % re.sub(r'\n', "\\\\n", self.value)
 217 | 
 218 |     def isa(self, type):
 219 |         return self.type == type
 220 | 
 221 | 
 222 | class LineTrackingLexer(object):
 223 |     """Base class for Lexers that keep track of line numbers."""
 224 |     def __init__(self, lexicon):
 225 |         self.line_no = -1
 226 |         self.scanner = re.Scanner(lexicon)
 227 | 
 228 |     def make_token(self, type, value):
 229 |         token = Token(type, value, self.line_no)
 230 |         self.line_no += value.count("\n")
 231 |         return token
 232 | 
 233 |     def lex(self, text):
 234 |         self.line_no = 0
 235 |         tokens, remainder = self.scanner.scan(text)
 236 |         if remainder:
 237 |             sys.stderr.write("Unlexable input:\n%s\n" % remainder)
 238 |             sys.exit(1)
 239 |         self.line_no = -1
 240 |         return tokens
 241 | 
 242 | class OuterRegionLexer(LineTrackingLexer):
 243 |     def __init__(self):
 244 |         super(OuterRegionLexer, self).__init__([
 245 |             (r'{{',                     self.lbrace),
 246 |             (r'}}',                     self.rbrace),
 247 |             (r'({(?!{)|}(?!})|[^{}])*', self.text)])
 248 |     def lbrace(self, scanner, token): return self.make_token(LBRACE, token)
 249 |     def rbrace(self, scanner, token): return self.make_token(RBRACE, token)
 250 |     def text(self, scanner, token):   return self.make_token(TEXT, token)
 251 | 
 252 | class OuterCommentLexer(OuterRegionLexer):
 253 |     def __init__(self):
 254 |         super(OuterRegionLexer, self).__init__([
 255 |             (r'/\*(.|[\r\n])*?\*/',                self.text),   # multiline comment
 256 |             (r'//(.|[\r\n])*?(?=[\r\n])',          self.text),   # single line comment
 257 |             (r'{{',                                self.lbrace),
 258 |             (r'}}',                                self.rbrace),
 259 |             (r'({(?!{)|}(?!})|/(?![/*])|[^{}/])*', self.text)])
 260 | 
 261 | class InnerLexer(OuterRegionLexer):
 262 |     def __init__(self):
 263 |         super(OuterRegionLexer, self).__init__([
 264 |             (r'{{',                               self.lbrace),
 265 |             (r'}}',                               self.rbrace),
 266 |             (r'(["\'])?((?:(?!\1)[^\\]|\\.)*)\1', self.quoted_id),
 267 |             (r'([^\s]+)',                         self.identifier),
 268 |             (r'\s+', None)])
 269 |     def identifier(self, scanner, token): return self.make_token(IDENTIFIER, token)
 270 |     def quoted_id(self, scanner, token):
 271 |         # remove quotes from quoted ids.  Note that ids and quoted ids are pretty much the same thing;
 272 |         # the quotes are just optional.  You only need them if you need spaces in your expression.
 273 |         return self.make_token(IDENTIFIER, re.sub(r'^["\'](.*)["\']$', '\\1', token))
 274 | 
 275 | # Global current filename and function name for error msgs
 276 | cur_filename = ""
 277 | cur_function = None
 278 | 
 279 | class WrapSyntaxError:
 280 |     """Simple Class for syntax errors raised by the wrapper generator (rather than python)"""
 281 |     pass
 282 | 
 283 | def syntax_error(msg):
 284 |     # TODO: make line numbers actually work.
 285 |     sys.stderr.write("%s:%d: %s\n" % (cur_filename, 0, msg))
 286 |     if cur_function:
 287 |         sys.stderr.write("    While handling %s.\n" % cur_function)
 288 |     raise WrapSyntaxError
 289 | 
 290 | ################################################################################
 291 | # MPI Semantics:
 292 | #   Classes in this section describe MPI declarations and types.  These are used
 293 | #   to parse the mpi.h header and to generate wrapper code.
 294 | ################################################################################
 295 | class Scope:
 296 |     """ This is the very basic class for scopes in the wrapper generator.  Scopes
 297 |         are hierarchical and support nesting.  They contain string keys mapped
 298 |         to either string values or to macro functions.
 299 |         Scopes also keep track of the particular macro they correspond to (macro_name).
 300 |     """
 301 |     def __init__(self, enclosing_scope=None):
 302 |         self.map = {}
 303 |         self.enclosing_scope = enclosing_scope
 304 |         self.macro_name = None           # For better debugging error messages
 305 | 
 306 |     def __getitem__(self, key):
 307 |         if key in self.map:         return self.map[key]
 308 |         elif self.enclosing_scope:  return self.enclosing_scope[key]
 309 |         else:                       raise KeyError(key + " is not in scope.")
 310 | 
 311 |     def __contains__(self, key):
 312 |         if key in self.map:         return True
 313 |         elif self.enclosing_scope:  return key in self.enclosing_scope
 314 |         else:                       return False
 315 | 
 316 |     def __setitem__(self, key, value):
 317 |         self.map[key] = value
 318 | 
 319 |     def include(self, map):
 320 |         """Add entire contents of the map (or scope) to this scope."""
 321 |         self.map.update(map)
 322 | 
 323 | ################################################################################
 324 | # MPI Semantics:
 325 | #   Classes in this section describe MPI declarations and types.  These are used
 326 | #   to parse the mpi.h header and to generate wrapper code.
 327 | ################################################################################
 328 | # Map from function name to declaration created from mpi.h.
 329 | mpi_functions = {}
 330 | 
 331 | class Param:
 332 |     """Descriptor for formal parameters of MPI functions.
 333 |        Doesn't represent a full parse, only the initial type information,
 334 |        name, and array info of the argument split up into strings.
 335 |     """
 336 |     def __init__(self, type, pointers, name, array, pos):
 337 |         self.type = type               # Name of arg's type (might include things like 'const')
 338 |         self.pointers = pointers       # Pointers
 339 |         self.name = name               # Formal parameter name (from header or autogenerated)
 340 |         self.array = array             # Any array type information after the name
 341 |         self.pos = pos                 # Position of arg in declartion
 342 |         self.decl = None               # This gets set later by Declaration
 343 | 
 344 |     def setDeclaration(self, decl):
 345 |         """Needs to be called by Declaration to finish initing the arg."""
 346 |         self.decl = decl
 347 | 
 348 |     def isHandleArray(self):
 349 |         """True if this Param represents an array of MPI handle values."""
 350 |         return (self.decl.name in mpi_array_calls
 351 |                 and self.pos in mpi_array_calls[self.decl.name])
 352 | 
 353 |     def countParam(self):
 354 |         """If this Param is a handle array, returns the Param that represents the count of its elements"""
 355 |         return self.decl.args[mpi_array_calls[self.decl.name][self.pos]]
 356 | 
 357 |     def isHandle(self):
 358 |         """True if this Param is one of the MPI builtin handle types."""
 359 |         return self.type in mpi_handle_types
 360 | 
 361 |     def isStatus(self):
 362 |         """True if this Param is an MPI_Status.  MPI_Status is handled differently
 363 |            in c2f/f2c calls from the other handle types.
 364 |         """
 365 |         return self.type == "MPI_Status"
 366 | 
 367 |     def fortranFormal(self):
 368 |         """Prints out a formal parameter for a fortran wrapper."""
 369 |         # There are only a few possible fortran arg types in our wrappers, since
 370 |         # everything is a pointer.
 371 |         if self.type == "MPI_Aint" or self.type.endswith("_function"):
 372 |             ftype = self.type
 373 |         else:
 374 |             ftype = "MPI_Fint"
 375 | 
 376 |         # Arrays don't come in as pointers (they're passed as arrays)
 377 |         # Everything else is a pointer.
 378 |         if self.pointers:
 379 |             pointers = self.pointers
 380 |         elif self.array:
 381 |             pointers = ""
 382 |         else:
 383 |             pointers = "*"
 384 | 
 385 |         # Put it all together and return the fortran wrapper type here.
 386 |         arr = self.array or ''
 387 |         return "%s %s%s%s" % (ftype, pointers, self.name, arr)
 388 | 
 389 |     def cType(self):
 390 |         if not self.type:
 391 |             return ''
 392 |         else:
 393 |             arr = self.array or ''
 394 |             pointers = self.pointers or ''
 395 |             return "%s%s%s" % (self.type, pointers, arr)
 396 | 
 397 |     def cFormal(self):
 398 |         """Prints out a formal parameter for a C wrapper."""
 399 |         if not self.type:
 400 |             return self.name  # special case for '...'
 401 |         else:
 402 |             arr = self.array or ''
 403 |             pointers = self.pointers or ''
 404 |             return "%s %s%s%s" % (self.type, pointers, self.name, arr)
 405 | 
 406 |     def castType(self):
 407 |         arr = self.array or ''
 408 |         pointers = self.pointers or ''
 409 |         if '[]' in arr:
 410 |             if arr.count('[') > 1:
 411 |                 pointers += '(*)'   # need extra parens for, e.g., int[][3] -> int(*)[3]
 412 |             else:
 413 |                 pointers += '*'     # justa single array; can pass pointer.
 414 |             arr = arr.replace('[]', '')
 415 |         return "%s%s%s" % (self.type, pointers, arr)
 416 | 
 417 |     def __str__(self):
 418 |         return self.cFormal()
 419 | 
 420 | 
 421 | class Declaration:
 422 |     """ Descriptor for simple MPI function declarations.
 423 |         Contains return type, name of function, and a list of args.
 424 |     """
 425 |     def __init__(self, rtype, name):
 426 |         self.rtype = rtype
 427 |         self.name = name
 428 |         self.args = []
 429 | 
 430 |     def addArgument(self, arg):
 431 |         arg.setDeclaration(self)
 432 |         self.args.append(arg)
 433 | 
 434 |     def __iter__(self):
 435 |         for arg in self.args: yield arg
 436 | 
 437 |     def __str__(self):
 438 |         return self.prototype()
 439 | 
 440 |     def retType(self):
 441 |         return self.rtype
 442 | 
 443 |     def formals(self):
 444 |         return [arg.cFormal() for arg in self.args]
 445 | 
 446 |     def types(self):
 447 |         return [arg.cType() for arg in self.args]
 448 | 
 449 |     def argsNoEllipsis(self):
 450 |         return filter(lambda arg: arg.name != "...", self.args)
 451 | 
 452 |     def returnsErrorCode(self):
 453 |         """This is a special case for MPI_Wtime and MPI_Wtick.
 454 |            These functions actually return a double value instead of an int error code.
 455 |         """
 456 |         return self.rtype == "int"
 457 | 
 458 |     def argNames(self):
 459 |         return [arg.name for arg in self.argsNoEllipsis()]
 460 | 
 461 |     def getArgName(self, index):
 462 |         return self.argsNoEllipsis()[index].name
 463 | 
 464 |     def fortranFormals(self):
 465 |         formals = map(Param.fortranFormal, self.argsNoEllipsis())
 466 |         if self.name == "MPI_Init": formals = []    # Special case for init: no args in fortran
 467 | 
 468 |         ierr = []
 469 |         if self.returnsErrorCode(): ierr = ["MPI_Fint *ierr"]
 470 |         return formals + ierr
 471 | 
 472 |     def fortranArgNames(self):
 473 |         names = self.argNames()
 474 |         if self.name == "MPI_Init": names = []
 475 | 
 476 |         ierr = []
 477 |         if self.returnsErrorCode(): ierr = ["ierr"]
 478 |         return names + ierr
 479 | 
 480 |     def prototype(self, modifiers=""):
 481 |         if modifiers: modifiers = joinlines(modifiers, " ")
 482 |         return "%s%s %s(%s)" % (modifiers, self.retType(), self.name, ", ".join(self.formals()))
 483 | 
 484 |     def pmpi_prototype(self, modifiers=""):
 485 |         if modifiers: modifiers = joinlines(modifiers, " ")
 486 |         return "%s%s P%s(%s)" % (modifiers, self.retType(), self.name, ", ".join(self.formals()))
 487 | 
 488 |     def fortranPrototype(self, name=None, modifiers=""):
 489 |         if not name: name = self.name
 490 |         if modifiers: modifiers = joinlines(modifiers, " ")
 491 | 
 492 |         if self.returnsErrorCode():
 493 |             rtype = "void"  # Fortran calls use ierr parameter instead
 494 |         else:
 495 |             rtype = self.rtype
 496 |         return "%s%s %s(%s)" % (modifiers, rtype, name, ", ".join(self.fortranFormals()))
 497 | 
 498 | 
 499 | types = set()
 500 | all_pointers = set()
 501 | 
 502 | def enumerate_mpi_declarations(mpicc, includes):
 503 |     """ Invokes mpicc's C preprocessor on a C file that includes mpi.h.
 504 |         Parses the output for declarations, and yields each declaration to
 505 |         the caller.
 506 |     """
 507 |     # Create an input file that just includes <mpi.h>
 508 |     tmpfile = tempfile.NamedTemporaryFile('w+b', -1, '.c')
 509 |     tmpname = "%s" % tmpfile.name
 510 |     tmpfile.write('#include <mpi.h>')
 511 |     tmpfile.write("\n")
 512 |     tmpfile.flush()
 513 | 
 514 |     # Run the mpicc -E on the temp file and pipe the output
 515 |     # back to this process for parsing.
 516 |     string_includes = ["-I"+dir for dir in includes]
 517 |     mpicc_cmd = "%s -E %s" % (mpicc, " ".join(string_includes))
 518 |     try:
 519 |         popen = subprocess.Popen("%s %s" % (mpicc_cmd, tmpname), shell=True,
 520 |                                  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 521 |     except IOError:
 522 |         sys.stderr.write("IOError: couldn't run '" + mpicc_cmd + "' for parsing mpi.h\n")
 523 |         sys.exit(1)
 524 | 
 525 |     # Parse out the declarations from the MPI file
 526 |     mpi_h = popen.stdout
 527 |     for line in mpi_h:
 528 |         line = line.strip()
 529 |         begin = begin_decl_re.search(line)
 530 |         if begin and not exclude_re.search(line):
 531 |             # Grab return type and fn name from initial parse
 532 |             return_type, fn_name = begin.groups()
 533 | 
 534 |             # Accumulate rest of declaration (possibly multi-line)
 535 |             while not end_decl_re.search(line):
 536 |                 line += " " + mpi_h.next().strip()
 537 | 
 538 |             # Split args up by commas so we can parse them independently
 539 |             fn_and_paren = r'(%s\s*\()' % fn_name
 540 |             match = re.search(fn_and_paren, line)
 541 |             lparen = match.start(1) + len(match.group(1)) - 1
 542 |             rparen = find_matching_paren(line, lparen)
 543 |             if rparen < 0:
 544 |                 raise ValueError("Malformed declaration in header: '%s'" % line)
 545 | 
 546 |             arg_string = line[lparen+1:rparen]
 547 |             arg_list = map(lambda s: s.strip(), arg_string.split(","))
 548 | 
 549 |             # Handle functions that take no args specially
 550 |             if arg_list == ['void']:
 551 |                 arg_list = []
 552 | 
 553 |             # Parse formal parameter descriptors out of args
 554 |             decl = Declaration(return_type, fn_name)
 555 |             arg_num = 0
 556 |             for arg in arg_list:
 557 |                 if arg == '...':   # Special case for Pcontrol.
 558 |                     decl.addArgument(Param(None, None, '...', None, arg_num))
 559 |                 else:
 560 |                     match = formal_re.match(arg)
 561 |                     if not match:
 562 |                         sys.stderr.write("MATCH FAILED FOR: '%s' in %s\n" % (arg, fn_name))
 563 |                         sys.exit(1)
 564 | 
 565 |                     type, pointers, name, array = match.groups()
 566 |                     types.add(type)
 567 |                     all_pointers.add(pointers)
 568 |                     # If there's no name, make one up.
 569 |                     if not name: name = "arg_" + str(arg_num)
 570 | 
 571 |                     decl.addArgument(Param(type.strip(), pointers, name, array, arg_num))
 572 |                 arg_num += 1
 573 | 
 574 |             yield decl
 575 | 
 576 |     mpi_h.close()
 577 |     return_code = popen.wait()
 578 |     if return_code != 0:
 579 |         sys.stderr.write("Error: Couldn't run '%s' for parsing mpi.h.\n" % mpicc_cmd)
 580 |         sys.stderr.write("       Process exited with code %d.\n" % return_code)
 581 |         sys.exit(1)
 582 | 
 583 |     # Do some cleanup once we're done reading.
 584 |     tmpfile.close()
 585 | 
 586 | 
 587 | def write_enter_guard(out, decl):
 588 |     """Prevent us from entering wrapper functions if we're already in a wrapper function.
 589 |        Just call the PMPI function w/o the wrapper instead."""
 590 |     if output_guards:
 591 |         out.write("    if (in_wrapper) return P%s(%s);\n" % (decl.name, ", ".join(decl.argNames())))
 592 |         out.write("    in_wrapper = 1;\n")
 593 | 
 594 | def write_exit_guard(out):
 595 |     """After a call, set in_wrapper back to 0 so we can enter the next call."""
 596 |     if output_guards:
 597 |         out.write("    in_wrapper = 0;\n")
 598 | 
 599 | 
 600 | def write_c_wrapper(out, decl, return_val, write_body):
 601 |     """Write the C wrapper for an MPI function."""
 602 |     # Write the PMPI prototype here in case mpi.h doesn't define it
 603 |     # (sadly the case with some MPI implementaitons)
 604 |     out.write(decl.pmpi_prototype(default_modifiers))
 605 |     out.write(";\n")
 606 | 
 607 |     # Now write the wrapper function, which will call the PMPI function we declared.
 608 |     out.write(decl.prototype(default_modifiers))
 609 |     out.write(" { \n")
 610 |     out.write("    %s %s = 0;\n" % (decl.retType(), return_val))
 611 | 
 612 |     write_enter_guard(out, decl)
 613 |     write_body(out)
 614 |     write_exit_guard(out)
 615 | 
 616 |     out.write("    return %s;\n" % return_val)
 617 |     out.write("}\n\n")
 618 | 
 619 | 
 620 | def write_fortran_binding(out, decl, delegate_name, binding, stmts=None):
 621 |     """Outputs a wrapper for a particular fortran binding that delegates to the
 622 |        primary Fortran wrapper.  Optionally takes a list of statements to execute
 623 |        before delegating.
 624 |     """
 625 |     out.write(decl.fortranPrototype(binding, default_modifiers))
 626 |     out.write(" { \n")
 627 |     if stmts:
 628 |         out.write(joinlines(map(lambda s: "    " + s, stmts)))
 629 |     if decl.returnsErrorCode():
 630 |         # regular MPI fortran functions use an error code
 631 |         out.write("    %s(%s);\n" % (delegate_name, ", ".join(decl.fortranArgNames())))
 632 |     else:
 633 |         # wtick and wtime return a value
 634 |         out.write("    return %s(%s);\n" % (delegate_name, ", ".join(decl.fortranArgNames())))
 635 |     out.write("}\n\n")
 636 | 
 637 | 
 638 | class FortranDelegation:
 639 |     """Class for constructing a call to a Fortran wrapper delegate function.  Provides
 640 |        storage for local temporary variables, copies of parameters, callsites for MPI-1 and
 641 |        MPI-2, and writebacks to local pointer types.
 642 |     """
 643 |     def __init__(self, decl, return_val):
 644 |         self.decl = decl
 645 |         self.return_val = return_val
 646 | 
 647 |         self.temps = set()
 648 |         self.copies = []
 649 |         self.writebacks = []
 650 |         self.actuals = []
 651 |         self.mpich_actuals = []
 652 | 
 653 |     def addTemp(self, type, name):
 654 |         """Adds a temp var with a particular name.  Adds the same var only once."""
 655 |         temp = "    %s %s;" % (type, name)
 656 |         self.temps.add(temp)
 657 | 
 658 |     def addActual(self, actual):
 659 |         self.actuals.append(actual)
 660 |         self.mpich_actuals.append(actual)
 661 | 
 662 |     def addActualMPICH(self, actual):
 663 |         self.mpich_actuals.append(actual)
 664 | 
 665 |     def addActualMPI2(self, actual):
 666 |         self.actuals.append(actual)
 667 | 
 668 |     def addWriteback(self, stmt):
 669 |         self.writebacks.append("    %s" % stmt)
 670 | 
 671 |     def addCopy(self, stmt):
 672 |         self.copies.append("    %s" % stmt)
 673 | 
 674 |     def write(self, out):
 675 |         assert len(self.actuals) == len(self.mpich_actuals)
 676 | 
 677 |         call = "    %s = %s" % (self.return_val, self.decl.name)
 678 |         mpich_call = "%s(%s);\n" % (call, ", ".join(self.mpich_actuals))
 679 |         mpi2_call = "%s(%s);\n" % (call, ", ".join(self.actuals))
 680 | 
 681 |         out.write("    %s %s = 0;\n" % (self.decl.retType(), self.return_val))
 682 |         if mpich_call == mpi2_call and not (self.temps or self.copies or self.writebacks):
 683 |             out.write(mpich_call)
 684 |         else:
 685 |             out.write("#if (!defined(MPICH_HAS_C2F) && defined(MPICH_NAME) && (MPICH_NAME == 1)) /* MPICH test */\n")
 686 |             out.write(mpich_call)
 687 |             out.write("#else /* MPI-2 safe call */\n")
 688 |             out.write(joinlines(self.temps))
 689 |             out.write(joinlines(self.copies))
 690 |             out.write(mpi2_call)
 691 |             out.write(joinlines(self.writebacks))
 692 |             out.write("#endif /* MPICH test */\n")
 693 | 
 694 | 
 695 | def write_fortran_wrappers(out, decl, return_val):
 696 |     """Writes primary fortran wrapper that handles arg translation.
 697 |        Also outputs bindings for this wrapper for different types of fortran compilers.
 698 |     """
 699 |     delegate_name = decl.name + f_wrap_suffix
 700 |     out.write(decl.fortranPrototype(delegate_name, ["static"]))
 701 |     out.write(" { \n")
 702 | 
 703 |     call = FortranDelegation(decl, return_val)
 704 | 
 705 |     if decl.name == "MPI_Init":
 706 |         # Use out.write() here so it comes at very beginning of wrapper function
 707 |         out.write("    int argc = 0;\n");
 708 |         out.write("    char ** argv = NULL;\n");
 709 |         call.addActual("&argc");
 710 |         call.addActual("&argv");
 711 |         call.write(out)
 712 |         out.write("    *ierr = %s;\n" % return_val)
 713 |         out.write("}\n\n")
 714 | 
 715 |         # Write out various bindings that delegate to the main fortran wrapper
 716 |         write_fortran_binding(out, decl, delegate_name, "MPI_INIT",   ["fortran_init = 1;"])
 717 |         write_fortran_binding(out, decl, delegate_name, "mpi_init",   ["fortran_init = 2;"])
 718 |         write_fortran_binding(out, decl, delegate_name, "mpi_init_",  ["fortran_init = 3;"])
 719 |         write_fortran_binding(out, decl, delegate_name, "mpi_init__", ["fortran_init = 4;"])
 720 |         return
 721 | 
 722 |     # This look processes the rest of the call for all other routines.
 723 |     for arg in decl.args:
 724 |         if arg.name == "...":   # skip ellipsis
 725 |             continue
 726 | 
 727 |         if not (arg.pointers or arg.array):
 728 |             if not arg.isHandle():
 729 |                 # These are pass-by-value arguments, so just deref and pass thru
 730 |                 dereferenced = "*%s" % arg.name
 731 |                 call.addActual(dereferenced)
 732 |             else:
 733 |                 # Non-ptr, non-arr handles need to be converted with MPI_Blah_f2c
 734 |                 # No special case for MPI_Status here because MPI_Statuses are never passed by value.
 735 |                 call.addActualMPI2("%s_f2c(*%s)" % (conversion_prefix(arg.type), arg.name))
 736 |                 call.addActualMPICH("(%s)(*%s)" % (arg.type, arg.name))
 737 | 
 738 |         else:
 739 |             if not arg.isHandle():
 740 |                 # Non-MPI handle pointer types can be passed w/o dereferencing, but need to
 741 |                 # cast to correct pointer type first (from MPI_Fint*).
 742 |                 call.addActual("(%s)%s" % (arg.castType(), arg.name))
 743 |             else:
 744 |                 # For MPI-1, assume ints, cross fingers, and pass things straight through.
 745 |                 call.addActualMPICH("(%s*)%s" % (arg.type, arg.name))
 746 |                 conv = conversion_prefix(arg.type)
 747 |                 temp = "temp_%s" % arg.name
 748 | 
 749 |                 # For MPI-2, other pointer and array types need temporaries and special conversions.
 750 |                 if not arg.isHandleArray():
 751 |                     call.addTemp(arg.type, temp)
 752 |                     call.addActualMPI2("&%s" % temp)
 753 | 
 754 |                     if arg.isStatus():
 755 |                         call.addCopy("%s_f2c(%s, &%s);"  % (conv, arg.name, temp))
 756 |                         call.addWriteback("%s_c2f(&%s, %s);" % (conv, temp, arg.name))
 757 |                     else:
 758 |                         call.addCopy("%s = %s_f2c(*%s);"  % (temp, conv, arg.name))
 759 |                         call.addWriteback("*%s = %s_c2f(%s);" % (arg.name, conv, temp))
 760 |                 else:
 761 |                     # Make temporary variables for the array and the loop var
 762 |                     temp_arr_type = "%s*" % arg.type
 763 |                     call.addTemp(temp_arr_type, temp)
 764 |                     call.addTemp("int", "i")
 765 | 
 766 |                     # generate a copy and a writeback statement for this type of handle
 767 |                     if arg.isStatus():
 768 |                         copy = "    %s_f2c(&%s[i], &%s[i])"  % (conv, arg.name, temp)
 769 |                         writeback = "    %s_c2f(&%s[i], &%s[i])" % (conv, temp, arg.name)
 770 |                     else:
 771 |                         copy = "    temp_%s[i] = %s_f2c(%s[i])"  % (arg.name, conv, arg.name)
 772 |                         writeback = "    %s[i] = %s_c2f(temp_%s[i])" % (arg.name, conv, arg.name)
 773 | 
 774 |                     # Generate the call surrounded by temp array allocation, copies, writebacks, and temp free
 775 |                     count = "*%s" % arg.countParam().name
 776 |                     call.addCopy("%s = (%s)malloc(sizeof(%s) * %s);" %
 777 |                                  (temp, temp_arr_type, arg.type, count))
 778 |                     call.addCopy("for (i=0; i < %s; i++)" % count)
 779 |                     call.addCopy("%s;" % copy)
 780 |                     call.addActualMPI2(temp)
 781 |                     call.addWriteback("for (i=0; i < %s; i++)" % count)
 782 |                     call.addWriteback("%s;" % writeback)
 783 |                     call.addWriteback("free(%s);" % temp)
 784 | 
 785 |     call.write(out)
 786 |     if decl.returnsErrorCode():
 787 |         out.write("    *ierr = %s;\n" % return_val)
 788 |     else:
 789 |         out.write("    return %s;\n" % return_val)
 790 |     out.write("}\n\n")
 791 | 
 792 |     # Write out various bindings that delegate to the main fortran wrapper
 793 |     write_fortran_binding(out, decl, delegate_name, decl.name.upper())
 794 |     write_fortran_binding(out, decl, delegate_name, decl.name.lower())
 795 |     write_fortran_binding(out, decl, delegate_name, decl.name.lower() + "_")
 796 |     write_fortran_binding(out, decl, delegate_name, decl.name.lower() + "__")
 797 | 
 798 | 
 799 | ################################################################################
 800 | # Macros:
 801 | #   - functions annotated as @macro or @bodymacro define the global macros and
 802 | #     basic pieces of the generator.
 803 | #   - include_decl is used to include MPI declarations into function scopes.
 804 | ################################################################################
 805 | # Table of global macros
 806 | macros = {}
 807 | 
 808 | # This decorator adds macro functions to the outermost function scope.
 809 | def macro(macro_name, **attrs):
 810 |     def decorate(fun):
 811 |         macros[macro_name] = fun # Add macro to outer scope under supplied name
 812 |         fun.has_body = False     # By default, macros have no body.
 813 |         for key in attrs:        # Optionally set/override attributes
 814 |             setattr(fun, key, attrs[key])
 815 |         return fun
 816 |     return decorate
 817 | 
 818 | def handle_list(list_name, list, args):
 819 |     """This function handles indexing lists used as macros in the wrapper generator.
 820 |        There are two syntaxes:
 821 |        {{<list_name>}}          Evaluates to the whole list, e.g. 'foo, bar, baz'
 822 |        {{<list_name> <index>}}  Evaluates to a particular element of a list.
 823 |     """
 824 |     if not args:
 825 |         return list
 826 |     else:
 827 |         len(args) == 1 or syntax_error("Wrong number of args for list expression.")
 828 |         try:
 829 |             return list[int(args[0])]
 830 |         except ValueError:
 831 |             syntax_error("Invald index value: '%s'" % args[0])
 832 |         except IndexError:
 833 |             syntax_error("Index out of range in '%s': %d" % (list_name, index))
 834 | 
 835 | class TypeApplier:
 836 |     """This class implements a Macro function for applying something callable to
 837 |        args in a decl with a particular type.
 838 |     """
 839 |     def __init__(self, decl):
 840 |         self.decl = decl
 841 | 
 842 |     def __call__(self, out, scope, args, children):
 843 |         len(args) == 2 or syntax_error("Wrong number of args in apply macro.")
 844 |         type, macro_name = args
 845 |         for arg in self.decl.args:
 846 |             if arg.cType() == type:
 847 |                 out.write("%s(%s);\n" % (macro_name, arg.name))
 848 | 
 849 | def include_decl(scope, decl):
 850 |     """This function is used by macros to include attributes MPI declarations in their scope."""
 851 |     scope["ret_type"] = decl.retType()
 852 |     scope["args"]     = decl.argNames()
 853 |     scope["nargs"]    = len(decl.argNames())
 854 |     scope["types"]    = decl.types()
 855 |     scope["formals"]  = decl.formals()
 856 |     scope["apply_to_type"] = TypeApplier(decl)
 857 |     scope.function_name  = decl.name
 858 | 
 859 |     # These are old-stype, deprecated names.
 860 |     def get_arg(out, scope, args, children):
 861 |         return handle_list("args", decl.argNames(), args)
 862 |     scope["get_arg"]     = get_arg
 863 |     scope["applyToType"] = scope["apply_to_type"]
 864 |     scope["retType"]     = scope["ret_type"]
 865 |     scope["argList"]     = "(%s)" % ", ".join(scope["args"])
 866 |     scope["argTypeList"] = "(%s)" % ", ".join(scope["formals"])
 867 | 
 868 | def all_but(fn_list):
 869 |     """Return a list of all mpi functions except those in fn_list"""
 870 |     all_mpi = set(mpi_functions.keys())
 871 |     diff = all_mpi - set(fn_list)
 872 |     return [x for x in diff]
 873 | 
 874 | @macro("foreachfn", has_body=True)
 875 | def foreachfn(out, scope, args, children):
 876 |     """Iterate over all functions listed in args."""
 877 |     args or syntax_error("Error: foreachfn requires function name argument.")
 878 |     global cur_function
 879 | 
 880 |     fn_var = args[0]
 881 |     for fn_name in args[1:]:
 882 |         cur_function = fn_name
 883 |         if not fn_name in mpi_functions:
 884 |             syntax_error(fn_name + " is not an MPI function")
 885 | 
 886 |         fn = mpi_functions[fn_name]
 887 |         fn_scope = Scope(scope)
 888 |         fn_scope[fn_var] = fn_name
 889 |         include_decl(fn_scope, fn)
 890 | 
 891 |         for child in children:
 892 |             child.evaluate(out, fn_scope)
 893 |     cur_function = None
 894 | 
 895 | @macro("fn", has_body=True)
 896 | def fn(out, scope, args, children):
 897 |     """Iterate over listed functions and generate skeleton too."""
 898 |     args or syntax_error("Error: fn requires function name argument.")
 899 |     global cur_function
 900 | 
 901 |     fn_var = args[0]
 902 |     for fn_name in args[1:]:
 903 |         cur_function = fn_name
 904 |         if not fn_name in mpi_functions:
 905 |             syntax_error(fn_name + " is not an MPI function")
 906 | 
 907 |         fn = mpi_functions[fn_name]
 908 |         return_val = "_wrap_py_return_val"
 909 | 
 910 |         fn_scope = Scope(scope)
 911 |         fn_scope[fn_var] = fn_name
 912 |         include_decl(fn_scope, fn)
 913 | 
 914 |         fn_scope["ret_val"] = return_val
 915 |         fn_scope["returnVal"]  = fn_scope["ret_val"]  # deprecated name.
 916 | 
 917 |         c_call = "%s = P%s(%s);" % (return_val, fn.name, ", ".join(fn.argNames()))
 918 |         if fn_name == "MPI_Init" and output_fortran_wrappers:
 919 |             def callfn(out, scope, args, children):
 920 |                 # All this is to deal with fortran, since fortran's MPI_Init() function is different
 921 |                 # from C's.  We need to make sure to delegate specifically to the fortran init wrapping.
 922 |                 # For dynamic libs, we use weak symbols to pick it automatically.  For static libs, need
 923 |                 # to rely on input from the user via pmpi_init_binding and the -i option.
 924 |                 out.write("    if (fortran_init) {\n")
 925 |                 out.write("#ifdef PIC\n")
 926 |                 out.write("        if (!PMPI_INIT && !pmpi_init && !pmpi_init_ && !pmpi_init__) {\n")
 927 |                 out.write("            fprintf(stderr, \"ERROR: Couldn't find fortran pmpi_init function.  Link against static library instead.\\n\");\n")
 928 |                 out.write("            exit(1);\n")
 929 |                 out.write("        }")
 930 |                 out.write("        switch (fortran_init) {\n")
 931 |                 out.write("        case 1: PMPI_INIT(&%s);   break;\n" % return_val)
 932 |                 out.write("        case 2: pmpi_init(&%s);   break;\n" % return_val)
 933 |                 out.write("        case 3: pmpi_init_(&%s);  break;\n" % return_val)
 934 |                 out.write("        case 4: pmpi_init__(&%s); break;\n" % return_val)
 935 |                 out.write("        default:\n")
 936 |                 out.write("            fprintf(stderr, \"NO SUITABLE FORTRAN MPI_INIT BINDING\\n\");\n")
 937 |                 out.write("            break;\n")
 938 |                 out.write("        }\n")
 939 |                 out.write("#else /* !PIC */\n")
 940 |                 out.write("        %s(&%s);\n" % (pmpi_init_binding, return_val))
 941 |                 out.write("#endif /* !PIC */\n")
 942 |                 out.write("    } else {\n")
 943 |                 out.write("        %s\n" % c_call)
 944 |                 out.write("    }\n")
 945 | 
 946 |             fn_scope["callfn"] = callfn
 947 | 
 948 |             def write_fortran_init_flag():
 949 |                 output.write("static int fortran_init = 0;\n")
 950 |             once(write_fortran_init_flag)
 951 | 
 952 |         else:
 953 |             fn_scope["callfn"] = c_call
 954 | 
 955 |         def write_body(out):
 956 |             for child in children:
 957 |                 child.evaluate(out, fn_scope)
 958 | 
 959 |         out.write("/* ================== C Wrappers for %s ================== */\n" % fn_name)
 960 |         write_c_wrapper(out, fn, return_val, write_body)
 961 |         if output_fortran_wrappers:
 962 |             out.write("/* =============== Fortran Wrappers for %s =============== */\n" % fn_name)
 963 |             write_fortran_wrappers(out, fn, return_val)
 964 |             out.write("/* ================= End Wrappers for %s ================= */\n\n\n" % fn_name)
 965 |     cur_function = None
 966 | 
 967 | @macro("forallfn", has_body=True)
 968 | def forallfn(out, scope, args, children):
 969 |     """Iterate over all but the functions listed in args."""
 970 |     args or syntax_error("Error: forallfn requires function name argument.")
 971 |     foreachfn(out, scope, [args[0]] + all_but(args[1:]), children)
 972 | 
 973 | @macro("fnall", has_body=True)
 974 | def fnall(out, scope, args, children):
 975 |     """Iterate over all but listed functions and generate skeleton too."""
 976 |     args or syntax_error("Error: fnall requires function name argument.")
 977 |     fn(out, scope, [args[0]] + all_but(args[1:]), children)
 978 | 
 979 | @macro("sub")
 980 | def sub(out, scope, args, children):
 981 |     """{{sub <string> <regexp> <substitution>}}
 982 |        Replaces value of <string> with all instances of <regexp> replaced with <substitution>.
 983 |     """
 984 |     len(args) == 3 or syntax_error("'sub' macro takes exactly 4 arguments.")
 985 |     string, regex, substitution = args
 986 |     if isinstance(string, list):
 987 |         return [re.sub(regex, substitution, s) for s in string]
 988 |     if not isinstance(regex, str):
 989 |         syntax_error("Invalid regular expression in 'sub' macro: '%s'" % regex)
 990 |     else:
 991 |         return re.sub(regex, substitution, string)
 992 | 
 993 | @macro("zip")
 994 | def zip_macro(out, scope, args, children):
 995 |     len(args) == 2 or syntax_error("'zip' macro takes exactly 2 arguments.")
 996 |     if not all([isinstance(a, list) for a in args]):
 997 |         syntax_error("Arguments to 'zip' macro must be lists.")
 998 |     a, b = args
 999 |     return ["%s %s" % x for x in zip(a, b)]
1000 | 
1001 | @macro("def")
1002 | def def_macro(out, scope, args, children):
1003 |     len(args) == 2 or syntax_error("'def' macro takes exactly 2 arguments.")
1004 |     scope[args[0]] = args[1]
1005 | 
1006 | @macro("list")
1007 | def list_macro(out, scope, args, children):
1008 |     result = []
1009 |     for arg in args:
1010 |         if isinstance(arg, list):
1011 |             result.extend(arg)
1012 |         else:
1013 |             result.append(arg)
1014 |     return result
1015 | 
1016 | @macro("filter")
1017 | def filter_macro(out, scope, args, children):
1018 |     """{{filter <regex> <list>}}
1019 |        Returns a list containing all elements of <list> that <regex> matches.
1020 |     """
1021 |     len(args) == 2 or syntax_error("'filter' macro takes exactly 2 arguments.")
1022 |     regex, l = args
1023 |     if not isinstance(l, list):
1024 |         syntax_error("Invalid list in 'filter' macro: '%s'" % str(list))
1025 |     if not isinstance(regex, str):
1026 |         syntax_error("Invalid regex in 'filter' macro: '%s'" % str(regex))
1027 |     def match(s):
1028 |         return re.search(regex, s)
1029 |     return filter(match, l)
1030 | 
1031 | @macro("fn_num")
1032 | def fn_num(out, scope, args, children):
1033 |     val = fn_num.val
1034 |     fn_num.val += 1
1035 |     return val
1036 | fn_num.val = 0  # init the counter here.
1037 | 
1038 | 
1039 | ################################################################################
1040 | # Parser support:
1041 | #   - Chunk class for bits of parsed text on which macros are executed.
1042 | #   - parse() function uses a Lexer to examine a file.
1043 | ################################################################################
1044 | class Chunk:
1045 |     """Represents a piece of a wrapper file.  Is either a text chunk
1046 |        or a macro chunk with children to which the macro should be applied.
1047 |        macros are evaluated lazily, so the macro is just a string until
1048 |        execute is called and it is fetched from its enclosing scope."""
1049 |     def __init__(self):
1050 |         self.macro    = None
1051 |         self.args     = []
1052 |         self.text     = None
1053 |         self.children = []
1054 | 
1055 |     def iwrite(self, file, level, text):
1056 |         """Write indented text."""
1057 |         for x in xrange(level):
1058 |             file.write("  ")
1059 |         file.write(text)
1060 | 
1061 |     def write(self, file=sys.stdout, l=0):
1062 |         if self.macro: self.iwrite(file, l, "{{%s %s}}" % (self.macro, " ".join([str(arg) for arg in self.args])))
1063 |         if self.text:  self.iwrite(file, l, "TEXT\n")
1064 |         for child in self.children:
1065 |             child.write(file, l+1)
1066 | 
1067 |     def execute(self, out, scope):
1068 |         """This function executes a chunk.  For strings, lists, text chunks, etc., this just
1069 |            entails returning the chunk's value.  For callable macros, this executes and returns
1070 |            the chunk's value.
1071 |         """
1072 |         if not self.macro:
1073 |             out.write(self.text)
1074 |         else:
1075 |             if not self.macro in scope:
1076 |                 error_msg = "Invalid macro: '%s'" % self.macro
1077 |                 if scope.function_name:
1078 |                     error_msg += " for " + scope.function_name
1079 |                 syntax_error(error_msg)
1080 | 
1081 |             value = scope[self.macro]
1082 |             if hasattr(value, "__call__"):
1083 |                 # It's a macro, so we need to execute it.  But first evaluate its args.
1084 |                 def eval_arg(arg):
1085 |                     if isinstance(arg, Chunk):
1086 |                         return arg.execute(out, scope)
1087 |                     else:
1088 |                         return arg
1089 |                 args = [eval_arg(arg) for arg in self.args]
1090 |                 return value(out, scope, args, self.children)
1091 |             elif isinstance(value, list):
1092 |                 # Special case for handling lists and list indexing
1093 |                 return handle_list(self.macro, value, self.args)
1094 |             else:
1095 |                 # Just return the value of anything else
1096 |                 return value
1097 | 
1098 |     def stringify(self, value):
1099 |         """Used by evaluate() to print the return values of chunks out to the output file."""
1100 |         if isinstance(value, list):
1101 |             return ", ".join(value)
1102 |         else:
1103 |             return str(value)
1104 | 
1105 |     def evaluate(self, out, scope):
1106 |         """This is an 'interactive' version of execute.  This should be called when
1107 |            the chunk's value (if any) should be written out.  Body macros and the outermost
1108 |            scope should use this instead of execute().
1109 |         """
1110 |         value = self.execute(out, scope)
1111 |         if value is not None:  # Note the distinction here -- 0 is false but we want to print it!
1112 |             out.write(self.stringify(value))
1113 | 
1114 | class Parser:
1115 |     """Parser for the really simple wrappergen grammar.
1116 |        This parser has support for multiple lexers.  self.tokens is a list of iterables, each
1117 |        representing a new token stream.  You can add additional tokens to be lexed using push_tokens.
1118 |        This will cause the pushed tokens to be handled before any others.  This allows us to switch
1119 |        lexers while parsing, so that the outer part of the file is processed in a language-agnostic
1120 |        way, but stuff inside macros is handled as its own macro language.
1121 |     """
1122 |     def __init__(self, macros):
1123 |         self.macros = macros
1124 |         self.macro_lexer = InnerLexer()
1125 |         self.tokens = iter([]) # iterators over tokens, handled in order.  Starts empty.
1126 |         self.token = None      # last accepted token
1127 |         self.next = None       # next token
1128 | 
1129 |     def gettok(self):
1130 |         """Puts the next token in the input stream into self.next."""
1131 |         try:
1132 |             self.next = self.tokens.next()
1133 |         except StopIteration:
1134 |             self.next = None
1135 | 
1136 |     def push_tokens(self, iterable):
1137 |         """Adds all tokens in some iterable to the token stream."""
1138 |         self.tokens = itertools.chain(iter(iterable), iter([self.next]), self.tokens)
1139 |         self.gettok()
1140 | 
1141 |     def accept(self, id):
1142 |         """Puts the next symbol in self.token if we like it.  Then calls gettok()"""
1143 |         if self.next.isa(id):
1144 |             self.token = self.next
1145 |             self.gettok()
1146 |             return True
1147 |         return False
1148 | 
1149 |     def unexpected_token(self):
1150 |         syntax_error("Unexpected token: %s." % self.next)
1151 | 
1152 |     def expect(self, id):
1153 |         """Like accept(), but fails if we don't like the next token."""
1154 |         if self.accept(id):
1155 |             return True
1156 |         else:
1157 |             if self.next:
1158 |                 self.unexpected_token()
1159 |             else:
1160 |                 syntax_error("Unexpected end of file.")
1161 |             sys.exit(1)
1162 | 
1163 |     def is_body_macro(self, name):
1164 |         """Shorthand for testing whether a particular name is the name of a macro that has a body.
1165 |            Need this for parsing the language b/c things like {{fn}} need a corresponding {{endfn}}.
1166 |         """
1167 |         return name in self.macros and self.macros[name].has_body
1168 | 
1169 |     def macro(self, accept_body_macros=True):
1170 |         # lex inner-macro text as wrapper language if we encounter text here.
1171 |         if self.accept(TEXT):
1172 |             self.push_tokens(self.macro_lexer.lex(self.token.value))
1173 | 
1174 |         # Now proceed with parsing the macro language's tokens
1175 |         chunk = Chunk()
1176 |         self.expect(IDENTIFIER)
1177 |         chunk.macro = self.token.value
1178 | 
1179 |         if not accept_body_macros and self.is_body_macro(chunk.macro):
1180 |             syntax_error("Cannot use body macros in expression context: '%s'" % chunk.macro)
1181 |             eys.exit(1)
1182 | 
1183 |         while True:
1184 |             if self.accept(LBRACE):
1185 |                 chunk.args.append(self.macro(False))
1186 |             elif self.accept(IDENTIFIER):
1187 |                 chunk.args.append(self.token.value)
1188 |             elif self.accept(TEXT):
1189 |                 self.push_tokens(self.macro_lexer.lex(self.token.value))
1190 |             else:
1191 |                 self.expect(RBRACE)
1192 |                 break
1193 |         return chunk
1194 | 
1195 |     def text(self, end_macro = None):
1196 |         chunks = []
1197 |         while self.next:
1198 |             if self.accept(TEXT):
1199 |                 chunk = Chunk()
1200 |                 chunk.text = self.token.value
1201 |                 chunks.append(chunk)
1202 |             elif self.accept(LBRACE):
1203 |                 chunk = self.macro()
1204 |                 name = chunk.macro
1205 | 
1206 |                 if name == end_macro:
1207 |                     # end macro: just break and don't append
1208 |                     break
1209 |                 elif isindex(chunk.macro):
1210 |                     # Special case for indices -- raw number macros index 'args' list
1211 |                     chunk.macro = "args"
1212 |                     chunk.args = [name]
1213 |                 elif self.is_body_macro(name):
1214 |                     chunk.children = self.text("end"+name)
1215 |                 chunks.append(chunk)
1216 |             else:
1217 |                 self.unexpected_token()
1218 | 
1219 |         return chunks
1220 | 
1221 |     def parse(self, text):
1222 |         if skip_headers:
1223 |             outer_lexer = OuterRegionLexer()   # Not generating C code, text is text.
1224 |         else:
1225 |             outer_lexer = OuterCommentLexer()  # C code. Considers C-style comments.
1226 |         self.push_tokens(outer_lexer.lex(text))
1227 |         return self.text()
1228 | 
1229 | ################################################################################
1230 | # Main script:
1231 | #   Get arguments, set up outer scope, parse files, generator wrappers.
1232 | ################################################################################
1233 | def usage():
1234 |     sys.stderr.write(usage_string)
1235 |     sys.exit(2)
1236 | 
1237 | # Let the user specify another mpicc to get mpi.h from
1238 | output = sys.stdout
1239 | output_filename = None
1240 | 
1241 | try:
1242 |     opts, args = getopt.gnu_getopt(sys.argv[1:], "fsgdc:o:i:I:")
1243 | except getopt.GetoptError, err:
1244 |     sys.stderr.write(err + "\n")
1245 |     usage()
1246 | 
1247 | for opt, arg in opts:
1248 |     if opt == "-d": dump_prototypes = True
1249 |     if opt == "-f": output_fortran_wrappers = True
1250 |     if opt == "-s": skip_headers = True
1251 |     if opt == "-g": output_guards = True
1252 |     if opt == "-c": mpicc = arg
1253 |     if opt == "-o": output_filename = arg
1254 |     if opt == "-I":
1255 |         stripped = arg.strip()
1256 |         if stripped: includes.append(stripped)
1257 |     if opt == "-i":
1258 |         if not arg in pmpi_init_bindings:
1259 |             sys.stderr.write("ERROR: PMPI_Init binding must be one of:\n    %s\n" % " ".join(possible_bindings))
1260 |             usage()
1261 |         else:
1262 |             pmpi_init_binding = arg
1263 | 
1264 | if len(args) < 1 and not dump_prototypes:
1265 |     usage()
1266 | 
1267 | # Parse mpi.h and put declarations into a map.
1268 | for decl in enumerate_mpi_declarations(mpicc, includes):
1269 |     mpi_functions[decl.name] = decl
1270 |     if dump_prototypes: print decl
1271 | 
1272 | # Fail gracefully if we didn't find anything.
1273 | if not mpi_functions:
1274 |     sys.stderr.write("Error: Found no declarations in mpi.h.\n")
1275 |     sys.exit(1)
1276 | 
1277 | # If we're just dumping prototypes, we can just exit here.
1278 | if dump_prototypes: sys.exit(0)
1279 | 
1280 | # Open the output file here if it was specified
1281 | if output_filename:
1282 |     try:
1283 |         output = open(output_filename, "w")
1284 |     except IOError:
1285 |         sys.stderr.write("Error: couldn't open file " + arg + " for writing.\n")
1286 |         sys.exit(1)
1287 | 
1288 | try:
1289 |     # Start with some headers and definitions.
1290 |     if not skip_headers:
1291 |         output.write(wrapper_includes)
1292 |         if output_guards: output.write("static int in_wrapper = 0;\n")
1293 | 
1294 |     # Parse each file listed on the command line and execute
1295 |     # it once it's parsed.
1296 |     fileno = 0
1297 |     for f in args:
1298 |         cur_filename = f
1299 |         file = open(cur_filename)
1300 | 
1301 |         # Outer scope contains fileno and the fundamental macros.
1302 |         outer_scope = Scope()
1303 |         outer_scope["fileno"] = str(fileno)
1304 |         outer_scope.include(macros)
1305 | 
1306 |         parser = Parser(macros)
1307 |         chunks = parser.parse(file.read())
1308 | 
1309 |         for chunk in chunks:
1310 |             chunk.evaluate(output, Scope(outer_scope))
1311 |         fileno += 1
1312 | 
1313 | except WrapSyntaxError:
1314 |     output.close()
1315 |     if output_filename: os.remove(output_filename)
1316 |     sys.exit(1)
1317 | 
1318 | output.close()
1319 | 


--------------------------------------------------------------------------------