├── nvtx_pmpi_wrappers ├── Makefile ├── wrap │ ├── examples │ │ ├── CMakeLists.txt │ │ ├── world48.w │ │ └── tutorial.w │ ├── FAQ │ ├── LICENSE │ ├── WrapConfig.cmake │ ├── README.md │ └── wrap.py ├── LICENSE ├── README.md └── nvtx.w ├── README.md └── one_hop_profiling ├── LICENSE ├── README.md └── one_hop_profiling.pl /nvtx_pmpi_wrappers/Makefile: -------------------------------------------------------------------------------- 1 | all: libnvtx_pmpi.so 2 | libnvtx_pmpi.so: nvtx_pmpi.o 3 | mpicc $^ -shared -o $@ -L$(CUDA_HOME)/lib64 -lnvToolsExt 4 | nvtx_pmpi.o: nvtx_pmpi.c 5 | mpicc -I$(CUDA_HOME)/include -DPIC -fPIC -c $^ -o $@ 6 | nvtx_pmpi.c: nvtx.w 7 | python2.7 wrap/wrap.py -f -o $@ $^ 8 | 9 | .PHONY: clean 10 | clean: 11 | rm -f *.o libnvtx_pmpi.so nvtx_pmpi.c 12 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(wrap-examples) 2 | cmake_minimum_required(VERSION 2.8) 3 | 4 | find_package(MPI REQUIRED) 5 | include_directories(${MPI_C_INCLUDE_PATH}) 6 | 7 | set(WRAP ${PROJECT_SOURCE_DIR}/../wrap.py) 8 | include(${PROJECT_SOURCE_DIR}/../WrapConfig.cmake) 9 | 10 | add_wrapped_file(world48.C world48.w) 11 | add_library(world48 world48.C) 12 | target_link_libraries(world48 ${MPI_C_LIBRARIES}) 13 | 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Tools and extensions for CUDA profiling 3 | 4 | Extension | Extends tool | Description 5 | --------- | ------------ | ----------- 6 | **[one-hop profiling](/one_hop_profiling)** | NVIDIA Visual Profiler | Remotely profile a CUDA program when the machine actually running it is not accessible from the machine running the NVIDIA Visual Profiler 7 | **[NVTX MPI Wrappers](/nvtx_pmpi_wrappers)** | nvprof | Inserts NVTX ranges for many common Message Passing Interface (MPI) functions. 8 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/FAQ: -------------------------------------------------------------------------------- 1 | Q: I'm compiling a shared library to be used for the NAS Parallel 2 | Benchmarks. What do I do? 3 | 4 | A: This works: 5 | 6 | ./wrap.py -f -g -o end2end.c end2end.w 7 | mpicc -DPIC -fPIC -I.. -DARCH_SANDY_BRIDGE -DARCH_062D -c end2end.c 8 | mpicc -shared -Wl,-soname,libend2end.so -o ../lib/libend2end.so ../msr_core.o ../msr_rapl.o ../blr_util.o end2end.o 9 | mpicc -L../lib -o harness.end2end harness.c -lend2end 10 | 11 | Items of interest: 12 | 13 | 1) The flags to wrap.py will generate fortran wrappers and well as re-entry guards. 14 | 2) DPIC must be used along with fPIC. 15 | 3) The above relies on LD_LIBRARY_PATH being correct and it probably shouldn't. 16 | 17 | 18 | -------------------------------------------------------------------------------- /one_hop_profiling/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a 6 | copy of this software and associated documentation files (the "Software"), 7 | to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | and/or sell copies of the Software, and to permit persons to whom the 10 | Software is furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of NVIDIA CORPORATION nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/examples/world48.w: -------------------------------------------------------------------------------- 1 | // -*- c++ -*- 2 | // 3 | // world48 4 | // Todd Gamblin, tgamblinWllnl.gov 5 | // 6 | // This file is an example of how to use wrap.py to fool an application 7 | // into thinking it is running on a different communicator from 8 | // MPI_Comm_world. 9 | // 10 | // This was originally intended to allow applications on Blue Gene/Q to 11 | // run with 48 MPI processes per node, rather than just the power of 2 12 | // that IBM provides settings for. The MPI_Init wrapper here will 13 | // split MPI_Comm_world into 2 groups: one for the first 48 out of every 14 | // 64 ranks and one for the last 16. The last 16 ranks of every 64 just 15 | // call MPI_Finalize and exit normally inside of MPI_Init. The rest of 16 | // the ranks continue to execute the rest of the application, thinking 17 | // that the world is only 1/4 as big as the real MPI_COMM_WORLD. 18 | // 19 | // To build: 20 | // wrap.py world48.w > world48.C 21 | // mpicc -c world48.C 22 | // ar cr libworld48.a world48.o 23 | // ranlib libworld48.a 24 | // 25 | // Link your application with libworld48.a, or build it as a shared lib 26 | // and LD_PRELOAD it to try out this tool. 27 | // 28 | #include 29 | 30 | // This is a communicator that will contain the first 48 out of 31 | // every 64 ranks in the application. 32 | static MPI_Comm world48; 33 | 34 | // This function modifies its parameter by swapping it with world48 35 | // if it is MPI_COMM_WORLD. 36 | inline void swap_world(MPI_Comm& world) { 37 | if (world == MPI_COMM_WORLD) { 38 | world = world48; 39 | } 40 | } 41 | 42 | // MPI_Init does all the communicator setup 43 | // 44 | {{fn func MPI_Init}}{ 45 | // First call PMPI_Init() 46 | {{callfn}} 47 | 48 | int rank; 49 | PMPI_Comm_rank(MPI_COMM_WORLD, &rank); 50 | 51 | // now keep only the first 48 ranks of each 64. 52 | int keep = (rank % 64 < 48) ? 1: 0; 53 | PMPI_Comm_split(MPI_COMM_WORLD, keep, rank, &world48); 54 | 55 | // throw away the remaining ranks. 56 | if (!keep) { 57 | PMPI_Finalize(); 58 | exit(0); 59 | } 60 | }{{endfn}} 61 | 62 | // This generates interceptors that will catch every MPI routine 63 | // *except* MPI_Init. The interceptors just make sure that if 64 | // they are called with an argument of type MPI_Comm that has a 65 | // value of MPI_COMM_WORLD, they switch it with world48. 66 | {{fnall func MPI_Init}}{ 67 | {{apply_to_type MPI_Comm swap_world}} 68 | {{callfn}} 69 | }{{endfnall}} 70 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/README.md: -------------------------------------------------------------------------------- 1 | NVIDIA NVTX Wrappers for MPI 2 | ============================ 3 | License: Copyright 2017 NVIDIA CORPORATION, released under 3-clause BSD 4 | license. 5 | This software also uses software that is released under a 3-clause BSD license 6 | by Lawrence Livermore National Laboratory. 7 | 8 | Summary 9 | ------- 10 | The included sources can be used to generate wrappers for common Message 11 | Passing Interface (MPI) routines using the PMPI interface. The included 12 | sources will explicitly add a *range* using the NVIDIA Tools Extensions (NVTX) 13 | API. When an MPI program is instrumented with the NVIDIA profilers, a range will 14 | appear in the timeline for each traced MPI call. 15 | 16 | You can read more about this technique [here](https://devblogs.nvidia.com/parallelforall/gpu-pro-tip-track-mpi-calls-nvidia-visual-profiler/). 17 | 18 | Prequisites 19 | ----------- 20 | * A working install of MPI 21 | * The NVIDIA CUDA Toolkit 22 | * Python 23 | * make 24 | 25 | Building 26 | -------- 27 | Because each MPI implementation is subtly different, it is necessary to 28 | generate the wrappers for your installed MPI library. These will be generated 29 | from the file `nvtx.w` and the resulting file will be called `nvtx_pmpi.c` 30 | which will be built into a shared object to be used with your program. To 31 | build, simply run `make` in the top level directory. 32 | 33 | $ make 34 | 35 | Extending 36 | --------- 37 | If you would like to extend the library to include additional MPI calls of 38 | interest or change the way the data is represented, make your changes to 39 | `nvtx.w` and then rebuild. The makefile will automatically regenerate the 40 | wrapper source based on your changes. For more information about how to modify 41 | this file, please see `wrap/README.md`. 42 | 43 | Usage 44 | ----- 45 | The shared object file built above must be preloaded, along with the the NVIDIA 46 | Tools Extensions library when gathering a performance profile. For example: 47 | 48 | $ LD_PRELOAD="/libnvtx_pmpi.so" nvprof -o timeline.prof ./a.out 49 | 50 | If the program `a.out` uses any of the wrapped MPI calls then these function 51 | calls will appear as ranges in the NVPROF timline when it is later loaded into 52 | the NVIDIA Visual Profiler. Any data movement or kernels used by the MPI 53 | function call will appear in the range. 54 | 55 | Known Limitations 56 | ----------------- 57 | * Asynchronous MPI routines are not implemented because any data movement 58 | incurred as a result of these calls will not occur during the range. 59 | -------------------------------------------------------------------------------- /one_hop_profiling/README.md: -------------------------------------------------------------------------------- 1 | One-hop profiling 2 | ================= 3 | 4 | This is a script that remotely profiles a CUDA program when the machine actually running it is not directly accessible from the machine running the NVIDIA Visual Profiler. 5 | 6 | Such a setup may look like this: 7 | 8 | .--------------. .--------------. ssh .--------------. 9 | | | | +----->+ | 10 | | | ssh | | | | 11 | | host +----->+ login node | | compute node | 12 | | | | | | | 13 | | | | +<-----+ | 14 | '--------------' '--------------' scp '--------------' 15 | 16 | 17 | * The **host** machine is the one which is running NVIDIA Visual Profiler. This machine may run Windows, Linux or OSX. It may or may not have an NVIDIA GPU. 18 | * The **login node** is where this script will run. We just need ssh, scp and perl here; CUDA need not be installed. This needs to be a Linux machine. 19 | * The **compute node** is where the actual CUDA application will run and be profiled. The profiling data generated will be copied over to the login node so that it can be used by Visual Profiler on the host. This needs to be a Linux machine. 20 | 21 | Usage instructions: 22 | ------------------- 23 | 24 | **Setting up the login node** 25 | 26 | 1. Copy or download the [`one_hop_profiling.pl`](/one_hop_profiling/one_hop_profiling.pl) script to the login node. 27 | 2. Give the script execution permissions using the command: `chmod +x one_hop_profiling.pl` 28 | 3. Edit the script and add compute node details. This file has extensive documentation in terms of comments about which variables needed to be edited. 29 | 4. Install an SSH key to allow the login node to SSH into the compute node without a password. You can find instructions on how to do this [here](https://askubuntu.com/a/46935). 30 | 31 | **Setting up the compute node** 32 | 33 | 1. Ensure that the CUDA program you want to profile is present on the compute node. 34 | 2. Ensure that the CUDA toolkit is installed, and nvprof is runnable and in the PATH. 35 | 36 | **Setting up the host machine** 37 | 38 | 1. Ensure that the CUDA toolkit is installed on this machine, and that the toolkit version is the same as the one present on the compute node. 39 | 40 | **Capturing the profile** 41 | 42 | 1. Run the Visual Profiler on this host machine. 43 | 2. Create a new session (Ctrl + N) 44 | 3. Connect to the login node by adding a remote connection as usual. 45 | 4. Click on `Manage...` Toolkit/Script. 46 | 5. Select the `Custom Script` radio button. Browse and select the profiling script on the login node. Click Finish. 47 | 6. Enter the executable file path on the remote machine in the `File` textbox. You will have to type this in. Remember that NVVP is connected only to the middle machine. It has no idea that the end machine exists, so the browse button will not be able to show you the paths on that machine. 48 | 7. `Next`/`Finish` to run as usual. 49 | 8. A profile will be captured and the timeline will be displayed. 50 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Lawrence Livermore National Security, LLC. 2 | Produced at the Lawrence Livermore National Laboratory 3 | Written by Todd Gamblin, tgamblin@llnl.gov. 4 | LLNL-CODE-417602 5 | All rights reserved. 6 | 7 | This file is part of Libra. For details, see http://github.com/tgamblin/libra. 8 | Please also read the LICENSE file for further information. 9 | 10 | Redistribution and use in source and binary forms, with or without modification, are 11 | permitted provided that the following conditions are met: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this list of 14 | conditions and the disclaimer below. 15 | * Redistributions in binary form must reproduce the above copyright notice, this list of 16 | conditions and the disclaimer (as noted below) in the documentation and/or other materials 17 | provided with the distribution. 18 | * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse 19 | or promote products derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 22 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 23 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 24 | LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 28 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | 32 | Additional BSD Notice 33 | 34 | 1. This notice is required to be provided under our contract with the U.S. Department of 35 | Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under 36 | Contract No. DE-AC52-07NA27344 with the DOE. 37 | 38 | 2. Neither the United States Government nor Lawrence Livermore National Security, LLC nor 39 | any of their employees, makes any warranty, express or implied, or assumes any liability 40 | or responsibility for the accuracy, completeness, or usefulness of any information, 41 | apparatus, product, or process disclosed, or represents that its use would not infringe 42 | privately-owned rights. 43 | 44 | 3. Also, reference herein to any specific commercial products, process, or services by trade 45 | name, trademark, manufacturer or otherwise does not necessarily constitute or imply its 46 | endorsement, recommendation, or favoring by the United States Government or Lawrence 47 | Livermore National Security, LLC. The views and opinions of authors expressed herein do 48 | not necessarily state or reflect those of the United States Government or Lawrence 49 | Livermore National Security, LLC, and shall not be used for advertising or product 50 | endorsement purposes. 51 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/nvtx.w: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | #include 28 | nvtxDomainHandle_t nvtx_mpi_domain; 29 | 30 | // Initialize handles to NVTX registered strings 31 | {{foreachfn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany 32 | MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv 33 | MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast 34 | MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}} 35 | nvtxStringHandle_t nvtx_{{name}}_message = 0; 36 | {{endforeachfn}} 37 | 38 | // Setup event category name and register strings 39 | {{fn name MPI_Init}} 40 | nvtx_mpi_domain = nvtxDomainCreateA("MPI"); 41 | 42 | // Register string for each MPI function 43 | {{foreachfn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany 44 | MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv 45 | MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast 46 | MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}} 47 | nvtx_{{name}}_message = nvtxDomainRegisterStringA(nvtx_mpi_domain, "{{name}}"); 48 | {{endforeachfn}} 49 | 50 | {{callfn}} 51 | {{endfn}} 52 | 53 | // Wrap select MPI functions with NVTX ranges 54 | {{fn name MPI_Send MPI_Recv MPI_Allreduce MPI_Reduce MPI_Wait MPI_Waitany 55 | MPI_Waitall MPI_Waitsome MPI_Gather MPI_Gatherv MPI_Scatter MPI_Scatterv 56 | MPI_Allgather MPI_Allgatherv MPI_Alltoall MPI_Alltoallv MPI_Alltoallw MPI_Bcast 57 | MPI_Sendrecv MPI_Barrier MPI_Isend MPI_Irecv}} 58 | nvtxEventAttributes_t eventAttrib = {0}; 59 | eventAttrib.version = NVTX_VERSION; 60 | eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; 61 | eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED; 62 | eventAttrib.message.registered = nvtx_{{name}}_message; 63 | eventAttrib.category = 999; 64 | 65 | nvtxDomainRangePushEx(nvtx_mpi_domain, &eventAttrib); 66 | {{callfn}} 67 | nvtxDomainRangePop(nvtx_mpi_domain); 68 | {{endfn}} 69 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/WrapConfig.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # @file WrapConfig.cmake 3 | # Contains macros for using the wrap script in a build environment. 4 | # 5 | # @author Todd Gamblin 6 | # @date 19 May 2011 7 | if(NOT DEFINED WRAP) 8 | message(FATAL_ERROR 9 | "WRAP variable must be set to location of wrap.py before including WrapConfig.cmake!") 10 | endif() 11 | 12 | if (NOT Wrap_CONFIG_LOADED) 13 | set(Wrap_CONFIG_LOADED TRUE) 14 | 15 | # This variable allows users to use the wrap.py script directly, if desired. 16 | set(Wrap_EXECUTABLE ${WRAP}) 17 | 18 | # add_wrapped_file(file_name wrapper_name [flags]) 19 | # 20 | # This macro adds a command to generate from to the 21 | # build. Properties on are also set so that CMake knows that it 22 | # is generated. 23 | # 24 | # Optionally, flags may be supplied to pass to the wrapper generator. 25 | # 26 | function(add_wrapped_file file_name wrapper_name) 27 | set(file_path ${CMAKE_CURRENT_BINARY_DIR}/${file_name}) 28 | set(wrapper_path ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name}) 29 | 30 | # Play nice with FindPythonInterp -- use the interpreter if it was found, 31 | # otherwise use the script directly. 32 | if (PYTHON_EXECUTABLE) 33 | set(command ${PYTHON_EXECUTABLE}) 34 | set(script_arg ${Wrap_EXECUTABLE}) 35 | else() 36 | set(command ${Wrap_EXECUTABLE}) 37 | set(script_arg "") 38 | endif() 39 | 40 | # Backward compatibility for old FindMPIs that did not have MPI_C_INCLUDE_PATH 41 | if (NOT MPI_C_INCLUDE_PATH) 42 | set(MPI_C_INCLUDE_PATH ${MPI_INCLUDE_PATH}) 43 | endif() 44 | if (NOT MPI_C_COMPILER) 45 | set(MPI_C_COMPILER ${MPI_COMPILER}) 46 | endif() 47 | 48 | # Play nice with FindMPI. This will deduce the appropriate MPI compiler to use 49 | # for generating wrappers 50 | if (MPI_C_INCLUDE_PATH) 51 | set(wrap_includes "") 52 | foreach(include ${MPI_C_INCLUDE_PATH}) 53 | set(wrap_includes ${wrap_includes} -I ${include}) 54 | endforeach() 55 | endif() 56 | set(wrap_compiler -c ${CMAKE_C_COMPILER}) 57 | if (MPI_C_COMPILER) 58 | set(wrap_compiler -c ${MPI_C_COMPILER}) 59 | endif() 60 | 61 | if (ARGN) 62 | # Prefer directly passed in flags. 63 | list(GET ARGN 0 wrap_flags) 64 | else() 65 | # Otherwise, look in the source file properties 66 | get_source_file_property(wrap_flags ${wrapper_name} WRAP_FLAGS) 67 | if (wrap_flags STREQUAL NOTFOUND) 68 | # If no spefific flags, grab them from the WRAP_FLAGS environment variable. 69 | set(wrap_flags "") 70 | if (NOT WRAP_FLAGS STREQUAL "") 71 | set(wrap_flags "${WRAP_FLAGS}") 72 | endif() 73 | endif() 74 | endif() 75 | 76 | # Mark target file as generated so the build system knows what to do w/it 77 | set_source_files_properties(${file_path} PROPERTIES GENERATED TRUE) 78 | 79 | # Add a command to automatically wrap files. 80 | add_custom_command( 81 | OUTPUT ${file_path} 82 | COMMAND ${command} 83 | ARGS ${script_arg} ${wrap_compiler} ${wrap_includes} ${wrap_flags} ${wrapper_path} -o ${file_path} 84 | WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" 85 | DEPENDS ${wrapper_path} 86 | COMMENT "Generating ${file_name} from ${wrapper_name}" 87 | VERBATIM) 88 | 89 | # Add generated files to list of things to be cleaned for the directory. 90 | get_directory_property(cleanfiles ADDITIONAL_MAKE_CLEAN_FILES) 91 | list(APPEND cleanfiles ${file_name}) 92 | set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${cleanfiles}") 93 | endfunction() 94 | 95 | endif() 96 | -------------------------------------------------------------------------------- /one_hop_profiling/one_hop_profiling.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use File::Basename; 4 | use Cwd 'abs_path'; 5 | use strict; 6 | 7 | # ============================================================================== 8 | # 9 | # ONE-HOP PROFILING - v1.0 10 | # ----------------- 11 | # https://github.com/NVIDIA/cuda-profiler/tree/master/one_hop_profiling 12 | # 13 | # ============================================================================== 14 | 15 | 16 | # The following variables pertain to the compute node. Edit them to 17 | # correctly reflect your setup. 18 | 19 | # User name / IP used to ssh into the compute node. 20 | # Be sure to escape the "@" sign. E.g.: "user_name\@192.168.1.1" 21 | my $compute_node_hostname = ""; 22 | 23 | # Path on the compute node to the CUDA bin directory. nvprof will be located 24 | # here. This path is usually "/usr/local/cuda-[version]/bin" 25 | my $cuda_path = "/usr/local/cuda-9.0/bin"; 26 | 27 | # Path on the compute node to the CUDA libraries. 28 | # This path is usually "/usr/local/cuda-[version]/lib64" 29 | my $cuda_ld_library_path = "/usr/local/cuda-9.0/lib64"; 30 | 31 | # Environment variable(s) to be set on the compute node before running 32 | # application (optional). E.g. "VARIABLE=value" 33 | my $env = ""; 34 | 35 | # ============================================================================== 36 | 37 | 38 | my $cmd; 39 | if(@ARGV == 1) { 40 | # Do not print anything here. This step is required because the NVIDIA 41 | # Visual Profiler queries device info as the first step. 42 | $cmd = "ssh $compute_node_hostname LD_LIBRARY_PATH=$cuda_ld_library_path:\$LD_LIBRARY_PATH PATH=$cuda_path:\$PATH nvprof $ARGV[0]"; 43 | system($cmd); 44 | exit $? >> 8; 45 | } 46 | 47 | 48 | # The NVIDIA Visual Profiler wants us to generate an nvprof output file on this 49 | # machine. We modify the '-o' argument value and generate the output file on 50 | # the compute node, in the same directory that the executable is located. We 51 | # later copy this file back into the directory on this machine that the Visual 52 | # Profiler wants it to be in, and then delete the original on the compute node. 53 | # 54 | # As a result, the Visual Profiler never knows that we redirected the command 55 | # to one more remote. As far as it is concerned, the output came from this 56 | # machine. 57 | 58 | my $i; 59 | my $nvprof_options = ""; 60 | my $exe_options = ""; 61 | 62 | for($i = 0; $i < @ARGV; $i++) { 63 | last if($ARGV[$i] eq "-o"); 64 | $nvprof_options = "$nvprof_options $ARGV[$i]"; 65 | } 66 | 67 | $i++; # Leave -o 68 | my $output_file_name = basename($ARGV[$i]); 69 | my $copy_path = dirname($ARGV[$i]); 70 | $nvprof_options = "$nvprof_options -f -o $output_file_name"; 71 | 72 | $i++; 73 | my $exe_path = dirname($ARGV[$i]); 74 | my $exe_name = basename($ARGV[$i]); 75 | 76 | $i++; 77 | for(; $i < @ARGV; $i++) { 78 | $exe_options = "$exe_options $ARGV[$i]"; 79 | } 80 | 81 | my $nvprof_command = "$nvprof_options ./$exe_name $exe_options"; 82 | 83 | $cmd = "ssh $compute_node_hostname \"cd $exe_path;LD_LIBRARY_PATH=$cuda_ld_library_path:\$LD_LIBRARY_PATH PATH=$cuda_path:\$PATH $env nvprof $nvprof_command\""; 84 | 85 | system($cmd); 86 | if($?) { 87 | exit $? >> 8; 88 | } 89 | 90 | # Replace %p with * to copy all files generated. %p is specified if multiple 91 | # processes are to be profiled, in which case, the %p is replaced by the 92 | # process id of the profiled application. 93 | $output_file_name =~ s/%p/\*/g; 94 | 95 | # Copy the file from the compute node to this machine (i.e. the login node) 96 | # via scp. 97 | $cmd = "scp $compute_node_hostname:$exe_path/$output_file_name $copy_path"; 98 | system($cmd); 99 | 100 | # Delete the original file on the compute node 101 | $cmd = "ssh $compute_node_hostname rm $exe_path/$output_file_name"; 102 | system($cmd); 103 | exit $? >> 8; 104 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/examples/tutorial.w: -------------------------------------------------------------------------------- 1 | /// -*- c++ -*- 2 | /// Tutorial wrapper script for wrap.py 3 | /// by Todd Gamblin tgamblin@llnl.gov 4 | /// 5 | /// This shows sample usage of many of the builtin macros in the wrapper generator. 6 | /// 7 | /// Run it through wrap.py like this to see sample output: 8 | /// wrap.py -o output.txt example.w 9 | /// 10 | /// Note that this won't compile; this file is just a simple tutorial with examples. 11 | /// 12 | 13 | // Say you just want to generate wrappers for some functions. That's easy with fn and fnall. 14 | // This simple formulation will generate wrappers for MPI_Send and MPI_Recv: 15 | {{fn foo MPI_Send MPI_Recv}} 16 | {{callfn}} 17 | {{endfn}} 18 | 19 | // Usually, we add some braces to that so that the editor gets the indentation right You 20 | // don't *need* the braces, but they look nice and help emacs understand where your nested 21 | // scopes are in C mode. 22 | {{fn foo MPI_Send MPI_Recv}} { 23 | {{callfn}} 24 | } 25 | {{endfn}} 26 | 27 | // If you generate this file and look at the output, you'll see full wrapper functions for 28 | // MPI_Send and MPI_Recv. The 'callfn' macro tells the wrapper generator to generate a 29 | // delegating call from MPI_Send (or MPI_Recv) to PMPI_Send (or PMPI_Recv). That's all 30 | // it takes! All the cruft is handled for you by wrap.py. 31 | 32 | // But what's that 'foo' above, you say? foo is your "loop variable". It can be used 33 | // to refer to the name of the function inside the wrapper: 34 | {{fn foo MPI_Send MPI_Recv}} { 35 | // 'foo' here evaluates to just the name of the function. 36 | my_global_function_pointer = {{foo}}; 37 | } 38 | {{endfn}} 39 | 40 | // Usually you'll want to insert your own code in the wrappers. Say you wanted to time 41 | // every MPI function. You could use 'fnall'. Note that with fnall, the functions you 42 | // list after the loop variable are *excluded* from generation. So this will generate 43 | // wrappers for every MPI function *except* MPI_Send and MPI_Recv: 44 | {{fnall foo MPI_Send MPI_Recv}} { 45 | double start_time = get_time_in_nanoseconds(); 46 | {{callfn}} 47 | double end_time = get_time_in_nanoseconds(); 48 | printf("{{foo}} took %f nanoseconds to run!\n", (end_time - start_time)); 49 | } 50 | {{endfnall}} 51 | 52 | // Ok, so now you can make wrappers. What if you want to iterate over all the MPI 53 | // calls, but just their names, without generating wrappers? There are macros for 54 | // that too. 55 | 56 | // foreachfn iterates over function names that wrap.py found in the mpi.h header. 57 | // forallfn is like fnall, but it again iterates over everything *except* specified 58 | // functions. 59 | {{foreachfn foo MPI_Send MPI_Recv}} { 60 | // With foreachfn and forallfn, wrappers aren't generated by default. You have 61 | // to put some macros in the nested scope to get something to happen. Luckily, 62 | // in iterative constructs like fn, fnall, foreachfn, and forallfn, the wrapper 63 | // generator inserts special variables into the nested scope. You can get at 64 | // them using macros like so: 65 | 66 | // The return type of the function (this is a simple string): 67 | {{ret_type}} 68 | 69 | // The name of the function (the name comes from the foreachfn "loop" macro above) 70 | {{foo}} 71 | 72 | // A unique number, starting at zero and increasing each time it is evaluated 73 | // this is a holdover from the MPE wrapper generator. 74 | {{fn_num}} 75 | 76 | // You can use regular expression substitutions on variables and print the result. 77 | // This, for example, renames MPI_ functions to have NQJ_ prefixes instead. Here 78 | // it prints out either NQJ_Send or NQJ_Recv, depending on which iteration of the 79 | // foreachfn loop we're on. 80 | {{sub {{foo}} MPI_ NQJ_}} 81 | 82 | // You can rename things or define new values with def. 83 | // Note that def itself doesn't print anything: 84 | {{def my_var {{ret_type}}}} 85 | {{my_var}} 86 | 87 | // Suppose you wanted to substitute MPI for NQJ *once*, then use that value 88 | // repeatedly in this scope: 89 | {{def nqjfun {{sub {{foo}} MPI_ NQJ_}}}} 90 | {{nqjfun}} {{nqjfun}} {{nqjfun}} 91 | 92 | // Not everything in wrap.py is a scalar! There are also list values. These 93 | // are important for dealing with parameter lists and 94 | 95 | // Formal parameters: 96 | {{formals}} 97 | {{formals 0}} 98 | {{formals 1}} 99 | 100 | // Types of formals: 101 | {{types}} 102 | {{types 0}} 103 | {{types 1}} 104 | 105 | // Argument names: 106 | {{args}} 107 | {{args 0}} 108 | {{args 1}} 109 | // -- or -- 110 | {{0}} 111 | {{1}} 112 | 113 | // Lists, when printed, are printed separated by commas. This is so that you 114 | // can easily make lists of parameters or arguments out of them. You can modify 115 | // the builtin lists using the 'list' macro, which creates or modifies lists. 116 | // Here are some examples using list: 117 | 118 | // Create a list of your own strings. This prints out foo, bar, baz. 119 | {{list foo bar baz}} 120 | 121 | // Add newarg to the beginning of the args list and print the result: 122 | {{list newarg {{args}}}} 123 | 124 | // Add newarg to the end of the args list: 125 | {{list {{args}} newarg}} 126 | 127 | // Make a variable for the new list, then print it out: 128 | {{def new_list {{list {{args}} newarg}} }} 129 | {{new_list}} 130 | 131 | // Get a list of only those formal parameters that have MPI handle types: 132 | {{filter '^MPI_' {{formals}}}} 133 | 134 | // Below are some more complicated (but useful!) expressions. 135 | // Note that these use macros not fully explained here. See the documentation 136 | // for details on what zip does or what sub does when applied to a list. 137 | 138 | // replace void with FOO in the first type in the parameter list 139 | {{sub {{types 0}} void FOO}} 140 | 141 | // replace void with FOO in all types in the parameter list 142 | {{sub {{types}} int FOO}} 143 | 144 | // replace void with FOO in all types in the parameter list, 145 | // and join that with the arg names for a new prototype 146 | {{ret_type}} {{foo}}({{zip {{sub {{types}} void FOO}} {{args}}}}); 147 | 148 | // replace every parameter type with the return type 149 | {{ret_type}} {{foo}}({{zip {{sub {{types}} '.*' {{ret_type}}}} {{args}}}}); 150 | 151 | // replace any MPI type with MPI_Foo in the parameter list 152 | {{ret_type}} {{foo}}({{zip {{sub {{types}} 'MPI_.*' MPI_Foo}} {{args}}}}); 153 | 154 | 155 | // The apply_to_type macro generates code to apply a callable thing 156 | // (function, macro, functor) to every parameter of a particular type 157 | 158 | // This will generate analyze_comm(comm) calls for each MPI_Comm parameter 159 | {{apply_to_type MPI_Comm analyze_comm}} 160 | 161 | // This will call some_function on every int parameter to the call 162 | {{apply_to_type int some_function}} 163 | } 164 | {{endforeachfn}} 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/README.md: -------------------------------------------------------------------------------- 1 | wrap.py 2 | =========================== 3 | a [PMPI](http://www.open-mpi.org/faq/?category=perftools#PMPI) wrapper generator 4 | 5 | by Todd Gamblin, tgamblin@llnl.gov, https://github.com/tgamblin/wrap 6 | 7 | Usage: wrap.py [-fgd] [-i pmpi_init] [-c mpicc_name] [-o file] wrapper.w [...] 8 | Python script for creating PMPI wrappers. Roughly follows the syntax of 9 | the Argonne PMPI wrapper generator, with some enhancements. 10 | Options:" 11 | -d Just dump function declarations parsed out of mpi.h 12 | -f Generate fortran wrappers in addition to C wrappers. 13 | -g Generate reentry guards around wrapper functions. 14 | -c exe Provide name of MPI compiler (for parsing mpi.h). 15 | Default is \'mpicc\'. 16 | -s Skip writing #includes, #defines, and other 17 | front-matter (for non-C output). 18 | -i pmpi_init Specify proper binding for the fortran pmpi_init 19 | function. Default is \'pmpi_init_\'. Wrappers 20 | compiled for PIC will guess the right binding 21 | automatically (use -DPIC when you compile dynamic 22 | libs). 23 | -o file Send output to a file instead of stdout. 24 | 25 | 26 | Thanks to these people for their suggestions and contributions: 27 | 28 | * David Lecomber, Allinea 29 | * Barry Rountree, LLNL 30 | 31 | Known Bugs: 32 | 33 | * Certain fortran bindings need some bugfixes and may not work. 34 | 35 | Tutorial 36 | ----------------------------- 37 | For a thorough tutorial, look at `examples/tutorial.w`! It walks you through 38 | the process of using `wrap.py`. It is also legal `wrap.py` code, so you 39 | can run `wrap.py` on it and see the output to better understand what's 40 | going on. 41 | 42 | 43 | CMake Integration 44 | ----------------------------- 45 | `wrap.py` includes a `WrapConfig.cmake` file. You can use this in your CMake project to automatically generate rules to generate wrap.py code. 46 | 47 | Here's an example. Suppose you put `wrap.py` in a subdirectory of your project called wrap, and your project looks like this: 48 | 49 | project/ 50 | CMakeLists.txt 51 | wrap/ 52 | wrap.py 53 | WrapConfig.cmake 54 | In your top-level CMakeLists.txt file, you can now do this: 55 | 56 | # wrap.py setup -- grab the add_wrapped_file macro. 57 | set(WRAP ${PROJECT_SOURCE_DIR}/wrap/wrap.py) 58 | include(wrap/WrapConfig.cmake) 59 | 60 | If you have a wrapped source file, you can use the wrapper auto-generation like this: 61 | 62 | add_wrapped_file(wrappers.C wrappers.w) 63 | add_library(tool_library wrappers.C) 64 | 65 | The `add_wrapped_file` function takes care of the dependences and code generation for you. If you need fortran support, call it like this: 66 | 67 | add_wrapped_file(wrappers.C wrappers.w -f) 68 | 69 | And note that if you generate a header that your .C files depend on, you need to explicitly include it in a target's sources, unlike non-generated headers. e.g.: 70 | 71 | add_wrapped_file(my-header.h my-header.w) 72 | add_library(tool_library 73 | tool.C # say that this includes my-header.h 74 | my-header.h) # you need to add my-header.h here. 75 | 76 | If you don't do this, then the header dependence won't be accounted for when tool.C is built. 77 | 78 | Wrapper file syntax 79 | ----------------------------- 80 | Wrap syntax is a superset of the syntax defined in Appendix C of 81 | the MPE manual [1], but many commands from the original wrapper 82 | generator are now deprecated. 83 | 84 | 85 | The following two macros generate skeleton wrappers and allow 86 | delegation via `{{callfn}}`: 87 | 88 | * `fn` iterates over only the listed 89 | functions. 90 | * `fnall` iterates over all functions *minus* the named functions. 91 | 92 | {{fnall ... }} 93 | // code here 94 | {{endfnall}} 95 | 96 | {{fn ... }} 97 | {{endfn} 98 | 99 | {{callfn}} 100 | 101 | `callfn` expands to the call of the function being profiled. 102 | 103 | `fnall` defines a wrapper to be used on all functions except the functions named. fn is identical to fnall except that it only generates wrappers for functions named explicitly. 104 | 105 | {{fn FOO MPI_Abort}} 106 | // Do-nothing wrapper for {{FOO}} 107 | {{endfn}} 108 | 109 | generates (in part): 110 | 111 | /* ================== C Wrappers for MPI_Abort ================== */ 112 | _EXTERN_C_ int PMPI_Abort(MPI_Comm arg_0, int arg_1); 113 | _EXTERN_C_ int MPI_Abort(MPI_Comm arg_0, int arg_1) { 114 | int return_val = 0; 115 | 116 | // Do-nothing wrapper for MPI_Abort 117 | return return_val; 118 | } 119 | 120 | `foreachfn` and `forallfn` are the counterparts of `fn` and `fnall`, but they don't generate the 121 | skeletons (and therefore you can't delegate with `{{callfn}}`). However, you 122 | can use things like `fn_name` (or `foo`) and `argTypeList`, `retType`, `argList`, etc. 123 | 124 | They're not designed for making wrappers, but declarations of lots of variables and other things you need to declare per MPI function. e.g., say you wanted a static variable per MPI call for some flag. 125 | 126 | {{forallfn ... }} 127 | // code here 128 | {{endforallfn} 129 | 130 | {foreachfn ... }} 131 | // code here 132 | {{endforeachfn}} 133 | 134 | 135 | The code between {{forallfn}} and {{endforallfn}} is copied once 136 | for every function profiled, except for the functions listed. 137 | For example: 138 | 139 | {{forallfn fn_name}} 140 | static int {{fn_name}}_ncalls_{{fileno}}; 141 | {{endforallfn}} 142 | 143 | might expand to: 144 | 145 | static int MPI_Send_ncalls_1; 146 | static int MPI_Recv_ncalls_1; 147 | ... 148 | 149 | etc. 150 | 151 | * `{{get_arg }}` OR `{{}}` 152 | Arguments to the function being profiled may be referenced by 153 | number, starting with 0 and increasing. e.g., in a wrapper file: 154 | 155 | void process_argc_and_argv(int *argc, char ***argv) { 156 | // do stuff to argc and argv. 157 | } 158 | 159 | {{fn fn_name MPI_Init}} 160 | process_argc_and_argv({{0}}, {{1}}); 161 | {{callfn}} 162 | {{endfn}} 163 | Note that `{{0}}` is just a synonym for `{{get_arg 0}}` 164 | 165 | * `{{ret_val}}` 166 | ReturnVal expands to the variable that is used to hold the return 167 | value of the function being profiled. (was: `{{returnVal}}`) 168 | 169 | * `{{fn_num}}` 170 | This is a number, starting from zero. It is incremented every time 171 | it is used. 172 | 173 | * `{{ret_type}}` 174 | The return type of the function. (was: `{{retType}}`) 175 | 176 | * `{{formals}}` 177 | Essentially what would be in a formal declaration for the function. 178 | Can be used this with forallfn and foreachfn; these don't generate 179 | prototypes, they just iterate over the functions without making a 180 | skeleton. (was: `{{argTypeList}}`) 181 | 182 | * `{{args}}` 183 | Names of the arguments in a comma-separated list, e.g.: 184 | `buf, type, count, comm` 185 | 186 | * `{{argList}}` 187 | Same as `{{args}}`, but with parentheses around the list, e.g.: 188 | `(buf, type, count, comm)` 189 | 190 | * `{{applyToType }}` 191 | This macro must be nested inside either a fn or fnall block. 192 | Within the functions being wrapped by fn or fnall, this macro will 193 | apply `` to any arguments of the function with type 194 | ``. For example, you might write a wrapper file like this: 195 | 196 | #define my_macro(comm) do_something_to(comm); 197 | {{fn fn_name MPI_Send MPI_Isend MPI_Ibsend}} 198 | {{applyToType MPI_Comm my_macro}} 199 | {{callfn}} 200 | {{endfn}} 201 | 202 | Now the generated wrappers to `MPI_Send`, `MPI_Isend`, and `MPI_Ibsend` will do something like this: 203 | 204 | int MPI_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { 205 | int _wrap_py_return_val = 0; 206 | my_macro(comm); 207 | PMPI_Isend(buf, count, datatype, dest, tag, comm, request); 208 | } 209 | 210 | * `{{sub }}` 211 | Declares `` in the current scope and gives it the value 212 | of `` with all instances of `` replaced with 213 | ``. You may use any valid python regexp for `` 214 | and any valid substitution value for ``. The regexps 215 | follow the same syntax as Python's re.sub(), and they may be single 216 | or double quoted (though it's not necessary unless you use spaces in 217 | the expressions). 218 | 219 | Example: 220 | 221 | {{forallfn foo}} 222 | {{sub nqjfoo foo '^MPI_' NQJ_}} 223 | {{nqjfoo}} 224 | {{endforallfn}} 225 | 226 | This will print `NQJ_xxx` instead of `MPI_xxx` for each MPI function. 227 | 228 | * `{{fileno}}` 229 | An integral index representing which wrapper file the macro 230 | came from. This is useful when decalring file-global variables 231 | to prevent name collisions. Identifiers declared outside 232 | functions should end with _{{fileno}}. For example: 233 | 234 | static double overhead_time_{{fileno}}; 235 | 236 | might expand to 237 | 238 | static double overhead_time_0; 239 | 240 | 241 | * `{{vardecl ...}}` *(not yet supported)* 242 | Declare variables within a wrapper definition. Wrap will decorate 243 | the variable name to prevent collisions. 244 | 245 | * `{{}}` *(not yet supported)* 246 | Access a variable declared by `{{vardecl}}`. 247 | 248 | Notes on the fortran wrappers 249 | ------------------------------- 250 | #if (!defined(MPICH_HAS_C2F) && defined(MPICH_NAME) && (MPICH_NAME == 1)) 251 | /* MPICH call */ 252 | return_val = MPI_Abort((MPI_Comm)(*arg_0), *arg_1); 253 | #else 254 | /* MPI-2 safe call */ 255 | return_val = MPI_Abort(MPI_Comm_f2c(*arg_0), *arg_1); 256 | #endif 257 | 258 | This is the part of the wrapper that delegates from Fortran 259 | to C. There are two ways to do that. The MPI-2 way is to 260 | call the appropriate _f2c call on the handle and pass that 261 | to the C function. The f2c/c2f calls are also available in 262 | some versions of MPICH1, but not all of them (I believe they 263 | were backported), so you can do the MPI-2 thing if 264 | `MPICH_HAS_C2F` is defined. 265 | 266 | If c2f functions are not around, then the script tries to 267 | figure out if it's dealing with MPICH1, where all the 268 | handles are ints. In that case, you can just pass the int 269 | through. 270 | 271 | Right now, if it's not *specifically* MPICH1, wrap.py does 272 | the MPI-2 thing. From what Barry was telling me, your MPI 273 | environment might have int handles, but it is not MPICH1. 274 | So you could either define all the `MPI_Foo_c2f`/`MPI_Foo_f2c` 275 | calls to identity macros, e.g.: 276 | 277 | #define MPI_File_c2f(x) (x) 278 | #define MPI_File_f2c(x) (x) 279 | 280 | or you could add something to wrap.py to force the 281 | int-passing behavior. I'm not sure if you have to care 282 | about this, but I thought I'd point it out. 283 | 284 | -s, or 'structural' mode 285 | ------------------------------- 286 | 287 | If you use the `-s` option, this skips the includes and defines used for C 288 | wrapper functions. This is useful if you want to use wrap to generate 289 | non-C files, such as XML. 290 | 291 | If you use -s, we recommend that you avoid using `{{fn}}` and `{{fnall}}`, 292 | as these generate proper wrapper functions that rely on some of the 293 | header information. Instead, use `{{foreachfn}}` and `{{forallfn}}`, as 294 | these do not generate wrappers around each iteration of the macro. 295 | 296 | e.g. if you want to generate a simple XML file with descriptions of the 297 | MPI arguments, you might write this in a wrapper file: 298 | 299 | {{forallfn fun}} 300 | 301 | {{endforallfn}} 302 | 303 | We don't disallow `{{fnall}}` or `{{fn}}` with `-s`, but If you used 304 | `{{fnall}}` here, each XML tag would have a C wrapper function around it, 305 | which is probably NOT what you want. 306 | 307 | 308 | 1. Anthony Chan, William Gropp and Weing Lusk. *User's Guide for MPE: 309 | Extensions for MPI Programs*. ANL/MCS-TM-ANL-98/xx. 310 | ftp://ftp.mcs.anl.gov/pub/mpi/mpeman.pdf 311 | 312 | 313 | -------------------------------------------------------------------------------- /nvtx_pmpi_wrappers/wrap/wrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ################################################################################################# 3 | # Copyright (c) 2010, Lawrence Livermore National Security, LLC. 4 | # Produced at the Lawrence Livermore National Laboratory 5 | # Written by Todd Gamblin, tgamblin@llnl.gov. 6 | # LLNL-CODE-417602 7 | # All rights reserved. 8 | # 9 | # This file is part of Libra. For details, see http://github.com/tgamblin/libra. 10 | # Please also read the LICENSE file for further information. 11 | # 12 | # Redistribution and use in source and binary forms, with or without modification, are 13 | # permitted provided that the following conditions are met: 14 | # 15 | # * Redistributions of source code must retain the above copyright notice, this list of 16 | # conditions and the disclaimer below. 17 | # * Redistributions in binary form must reproduce the above copyright notice, this list of 18 | # conditions and the disclaimer (as noted below) in the documentation and/or other materials 19 | # provided with the distribution. 20 | # * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse 21 | # or promote products derived from this software without specific prior written permission. 22 | # 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 24 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 25 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 26 | # LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 30 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | ################################################################################################# 33 | usage_string = \ 34 | '''Usage: wrap.py [-fgd] [-i pmpi_init] [-c mpicc_name] [-o file] wrapper.w [...] 35 | Python script for creating PMPI wrappers. Roughly follows the syntax of 36 | the Argonne PMPI wrapper generator, with some enhancements. 37 | Options:" 38 | -d Just dump function declarations parsed out of mpi.h 39 | -f Generate fortran wrappers in addition to C wrappers. 40 | -g Generate reentry guards around wrapper functions. 41 | -s Skip writing #includes, #defines, and other front-matter (for non-C output). 42 | -c exe Provide name of MPI compiler (for parsing mpi.h). Default is \'mpicc\'. 43 | -I dir Provide an extra include directory to use when parsing mpi.h. 44 | -i pmpi_init Specify proper binding for the fortran pmpi_init function. 45 | Default is \'pmpi_init_\'. Wrappers compiled for PIC will guess the 46 | right binding automatically (use -DPIC when you compile dynamic libs). 47 | -o file Send output to a file instead of stdout. 48 | 49 | by Todd Gamblin, tgamblin@llnl.gov 50 | ''' 51 | import tempfile, getopt, subprocess, sys, os, re, StringIO, types, itertools 52 | 53 | # Default values for command-line parameters 54 | mpicc = 'mpicc' # Default name for the MPI compiler 55 | includes = [] # Default set of directories to inlucde when parsing mpi.h 56 | pmpi_init_binding = "pmpi_init_" # Default binding for pmpi_init 57 | output_fortran_wrappers = False # Don't print fortran wrappers by default 58 | output_guards = False # Don't print reentry guards by default 59 | skip_headers = False # Skip header information and defines (for non-C output) 60 | dump_prototypes = False # Just exit and dump MPI protos if false. 61 | 62 | # Possible legal bindings for the fortran version of PMPI_Init() 63 | pmpi_init_bindings = ["PMPI_INIT", "pmpi_init", "pmpi_init_", "pmpi_init__"] 64 | 65 | # Possible function return types to consider, used for declaration parser. 66 | # In general, all MPI calls we care about return int. We include double 67 | # to grab MPI_Wtick and MPI_Wtime, but we'll ignore the f2c and c2f calls 68 | # that return MPI_Datatypes and other such things. 69 | rtypes = ['int', 'double' ] 70 | 71 | # If we find these strings in a declaration, exclude it from consideration. 72 | exclude_strings = [ "c2f", "f2c", "typedef" ] 73 | 74 | # Regular expressions for start and end of declarations in mpi.h. These are 75 | # used to get the declaration strings out for parsing with formal_re below. 76 | begin_decl_re = re.compile("(" + "|".join(rtypes) + ")\s+(MPI_\w+)\s*\(") 77 | exclude_re = re.compile("|".join(exclude_strings)) 78 | end_decl_re = re.compile("\).*\;") 79 | 80 | # Regular Expression for splitting up args. Matching against this 81 | # returns three groups: type info, arg name, and array info 82 | formal_re = re.compile( 83 | "\s*(" + # Start type 84 | "(?:const)?\s*" + # Initial const 85 | "\w+" # Type name (note: doesn't handle 'long long', etc. right now) 86 | ")\s*(" + # End type, begin pointers 87 | "(?:\s*\*(?:\s*const)?)*" + # Look for 0 or more pointers with optional 'const' 88 | ")\s*" # End pointers 89 | "(?:(\w+)\s*)?" + # Argument name. Optional. 90 | "(\[.*\])?\s*$" # Array type. Also optional. Works for multidimensions b/c it's greedy. 91 | ) 92 | 93 | # Fortran wrapper suffix 94 | f_wrap_suffix = "_fortran_wrapper" 95 | 96 | # Initial includes and defines for wrapper files. 97 | wrapper_includes = ''' 98 | #include 99 | #include 100 | #include 101 | 102 | #ifndef _EXTERN_C_ 103 | #ifdef __cplusplus 104 | #define _EXTERN_C_ extern "C" 105 | #else /* __cplusplus */ 106 | #define _EXTERN_C_ 107 | #endif /* __cplusplus */ 108 | #endif /* _EXTERN_C_ */ 109 | 110 | #ifdef MPICH_HAS_C2F 111 | _EXTERN_C_ void *MPIR_ToPointer(int); 112 | #endif // MPICH_HAS_C2F 113 | 114 | #ifdef PIC 115 | /* For shared libraries, declare these weak and figure out which one was linked 116 | based on which init wrapper was called. See mpi_init wrappers. */ 117 | #pragma weak pmpi_init 118 | #pragma weak PMPI_INIT 119 | #pragma weak pmpi_init_ 120 | #pragma weak pmpi_init__ 121 | #endif /* PIC */ 122 | 123 | _EXTERN_C_ void pmpi_init(MPI_Fint *ierr); 124 | _EXTERN_C_ void PMPI_INIT(MPI_Fint *ierr); 125 | _EXTERN_C_ void pmpi_init_(MPI_Fint *ierr); 126 | _EXTERN_C_ void pmpi_init__(MPI_Fint *ierr); 127 | 128 | ''' 129 | 130 | # Default modifiers for generated bindings 131 | default_modifiers = ["_EXTERN_C_"] # _EXTERN_C_ is #defined (or not) in wrapper_includes. See above. 132 | 133 | # Set of MPI Handle types 134 | mpi_handle_types = set(["MPI_Comm", "MPI_Errhandler", "MPI_File", "MPI_Group", "MPI_Info", 135 | "MPI_Op", "MPI_Request", "MPI_Status", "MPI_Datatype", "MPI_Win" ]) 136 | 137 | # MPI Calls that have array parameters, and mappings from the array parameter positions to the position 138 | # of the 'count' paramters that determine their size 139 | mpi_array_calls = { 140 | "MPI_Startall" : { 1:0 }, 141 | "MPI_Testall" : { 1:0, 3:0 }, 142 | "MPI_Testany" : { 1:0 }, 143 | "MPI_Testsome" : { 1:0, 4:0 }, 144 | "MPI_Type_create_struct" : { 3:0 }, 145 | "MPI_Type_get_contents" : { 6:1 }, 146 | "MPI_Type_struct" : { 3:0 }, 147 | "MPI_Waitall" : { 1:0, 2:0 }, 148 | "MPI_Waitany" : { 1:0 }, 149 | "MPI_Waitsome" : { 1:0, 4:0 } 150 | } 151 | 152 | 153 | def find_matching_paren(string, index, lparen='(', rparen=')'): 154 | """Find the closing paren corresponding to the open paren at 155 | in . Optionally, can provide other characters to match on. 156 | If found, returns the index of the matching parenthesis. If not found, 157 | returns -1. 158 | """ 159 | if not string[index] == lparen: 160 | raise ValueError("Character at index %d is '%s'. Expected '%s'" 161 | % (index, string[index], lparen)) 162 | index += 1 163 | count = 1 164 | while index < len(string) and count > 0: 165 | while index < len(string) and string[index] not in (lparen, rparen): 166 | index += 1 167 | if string[index] == lparen: 168 | count += 1 169 | elif string[index] == rparen: 170 | count -= 1 171 | 172 | if count == 0: 173 | return index 174 | else: 175 | return -1 176 | 177 | 178 | def isindex(str): 179 | """True if a string is something we can index an array with.""" 180 | try: 181 | int(str) 182 | return True 183 | except ValueError: 184 | return False 185 | 186 | def once(function): 187 | if not hasattr(function, "did_once"): 188 | function() 189 | function.did_once = True 190 | 191 | # Returns MPI_Blah_[f2c,c2f] prefix for a handle type. MPI_Datatype is a special case. 192 | def conversion_prefix(handle_type): 193 | if handle_type == "MPI_Datatype": 194 | return "MPI_Type" 195 | else: 196 | return handle_type 197 | 198 | # Special join function for joining lines together. Puts "\n" at the end too. 199 | def joinlines(list, sep="\n"): 200 | if list: 201 | return sep.join(list) + sep 202 | else: 203 | return "" 204 | 205 | # Possible types of Tokens in input. 206 | LBRACE, RBRACE, TEXT, IDENTIFIER = range(4) 207 | 208 | class Token: 209 | """Represents tokens; generated from input by lexer and fed to parse().""" 210 | def __init__(self, type, value, line=0): 211 | self.type = type # Type of token 212 | self.value = value # Text value 213 | self.line = line 214 | 215 | def __str__(self): 216 | return "'%s'" % re.sub(r'\n', "\\\\n", self.value) 217 | 218 | def isa(self, type): 219 | return self.type == type 220 | 221 | 222 | class LineTrackingLexer(object): 223 | """Base class for Lexers that keep track of line numbers.""" 224 | def __init__(self, lexicon): 225 | self.line_no = -1 226 | self.scanner = re.Scanner(lexicon) 227 | 228 | def make_token(self, type, value): 229 | token = Token(type, value, self.line_no) 230 | self.line_no += value.count("\n") 231 | return token 232 | 233 | def lex(self, text): 234 | self.line_no = 0 235 | tokens, remainder = self.scanner.scan(text) 236 | if remainder: 237 | sys.stderr.write("Unlexable input:\n%s\n" % remainder) 238 | sys.exit(1) 239 | self.line_no = -1 240 | return tokens 241 | 242 | class OuterRegionLexer(LineTrackingLexer): 243 | def __init__(self): 244 | super(OuterRegionLexer, self).__init__([ 245 | (r'{{', self.lbrace), 246 | (r'}}', self.rbrace), 247 | (r'({(?!{)|}(?!})|[^{}])*', self.text)]) 248 | def lbrace(self, scanner, token): return self.make_token(LBRACE, token) 249 | def rbrace(self, scanner, token): return self.make_token(RBRACE, token) 250 | def text(self, scanner, token): return self.make_token(TEXT, token) 251 | 252 | class OuterCommentLexer(OuterRegionLexer): 253 | def __init__(self): 254 | super(OuterRegionLexer, self).__init__([ 255 | (r'/\*(.|[\r\n])*?\*/', self.text), # multiline comment 256 | (r'//(.|[\r\n])*?(?=[\r\n])', self.text), # single line comment 257 | (r'{{', self.lbrace), 258 | (r'}}', self.rbrace), 259 | (r'({(?!{)|}(?!})|/(?![/*])|[^{}/])*', self.text)]) 260 | 261 | class InnerLexer(OuterRegionLexer): 262 | def __init__(self): 263 | super(OuterRegionLexer, self).__init__([ 264 | (r'{{', self.lbrace), 265 | (r'}}', self.rbrace), 266 | (r'(["\'])?((?:(?!\1)[^\\]|\\.)*)\1', self.quoted_id), 267 | (r'([^\s]+)', self.identifier), 268 | (r'\s+', None)]) 269 | def identifier(self, scanner, token): return self.make_token(IDENTIFIER, token) 270 | def quoted_id(self, scanner, token): 271 | # remove quotes from quoted ids. Note that ids and quoted ids are pretty much the same thing; 272 | # the quotes are just optional. You only need them if you need spaces in your expression. 273 | return self.make_token(IDENTIFIER, re.sub(r'^["\'](.*)["\']$', '\\1', token)) 274 | 275 | # Global current filename and function name for error msgs 276 | cur_filename = "" 277 | cur_function = None 278 | 279 | class WrapSyntaxError: 280 | """Simple Class for syntax errors raised by the wrapper generator (rather than python)""" 281 | pass 282 | 283 | def syntax_error(msg): 284 | # TODO: make line numbers actually work. 285 | sys.stderr.write("%s:%d: %s\n" % (cur_filename, 0, msg)) 286 | if cur_function: 287 | sys.stderr.write(" While handling %s.\n" % cur_function) 288 | raise WrapSyntaxError 289 | 290 | ################################################################################ 291 | # MPI Semantics: 292 | # Classes in this section describe MPI declarations and types. These are used 293 | # to parse the mpi.h header and to generate wrapper code. 294 | ################################################################################ 295 | class Scope: 296 | """ This is the very basic class for scopes in the wrapper generator. Scopes 297 | are hierarchical and support nesting. They contain string keys mapped 298 | to either string values or to macro functions. 299 | Scopes also keep track of the particular macro they correspond to (macro_name). 300 | """ 301 | def __init__(self, enclosing_scope=None): 302 | self.map = {} 303 | self.enclosing_scope = enclosing_scope 304 | self.macro_name = None # For better debugging error messages 305 | 306 | def __getitem__(self, key): 307 | if key in self.map: return self.map[key] 308 | elif self.enclosing_scope: return self.enclosing_scope[key] 309 | else: raise KeyError(key + " is not in scope.") 310 | 311 | def __contains__(self, key): 312 | if key in self.map: return True 313 | elif self.enclosing_scope: return key in self.enclosing_scope 314 | else: return False 315 | 316 | def __setitem__(self, key, value): 317 | self.map[key] = value 318 | 319 | def include(self, map): 320 | """Add entire contents of the map (or scope) to this scope.""" 321 | self.map.update(map) 322 | 323 | ################################################################################ 324 | # MPI Semantics: 325 | # Classes in this section describe MPI declarations and types. These are used 326 | # to parse the mpi.h header and to generate wrapper code. 327 | ################################################################################ 328 | # Map from function name to declaration created from mpi.h. 329 | mpi_functions = {} 330 | 331 | class Param: 332 | """Descriptor for formal parameters of MPI functions. 333 | Doesn't represent a full parse, only the initial type information, 334 | name, and array info of the argument split up into strings. 335 | """ 336 | def __init__(self, type, pointers, name, array, pos): 337 | self.type = type # Name of arg's type (might include things like 'const') 338 | self.pointers = pointers # Pointers 339 | self.name = name # Formal parameter name (from header or autogenerated) 340 | self.array = array # Any array type information after the name 341 | self.pos = pos # Position of arg in declartion 342 | self.decl = None # This gets set later by Declaration 343 | 344 | def setDeclaration(self, decl): 345 | """Needs to be called by Declaration to finish initing the arg.""" 346 | self.decl = decl 347 | 348 | def isHandleArray(self): 349 | """True if this Param represents an array of MPI handle values.""" 350 | return (self.decl.name in mpi_array_calls 351 | and self.pos in mpi_array_calls[self.decl.name]) 352 | 353 | def countParam(self): 354 | """If this Param is a handle array, returns the Param that represents the count of its elements""" 355 | return self.decl.args[mpi_array_calls[self.decl.name][self.pos]] 356 | 357 | def isHandle(self): 358 | """True if this Param is one of the MPI builtin handle types.""" 359 | return self.type in mpi_handle_types 360 | 361 | def isStatus(self): 362 | """True if this Param is an MPI_Status. MPI_Status is handled differently 363 | in c2f/f2c calls from the other handle types. 364 | """ 365 | return self.type == "MPI_Status" 366 | 367 | def fortranFormal(self): 368 | """Prints out a formal parameter for a fortran wrapper.""" 369 | # There are only a few possible fortran arg types in our wrappers, since 370 | # everything is a pointer. 371 | if self.type == "MPI_Aint" or self.type.endswith("_function"): 372 | ftype = self.type 373 | else: 374 | ftype = "MPI_Fint" 375 | 376 | # Arrays don't come in as pointers (they're passed as arrays) 377 | # Everything else is a pointer. 378 | if self.pointers: 379 | pointers = self.pointers 380 | elif self.array: 381 | pointers = "" 382 | else: 383 | pointers = "*" 384 | 385 | # Put it all together and return the fortran wrapper type here. 386 | arr = self.array or '' 387 | return "%s %s%s%s" % (ftype, pointers, self.name, arr) 388 | 389 | def cType(self): 390 | if not self.type: 391 | return '' 392 | else: 393 | arr = self.array or '' 394 | pointers = self.pointers or '' 395 | return "%s%s%s" % (self.type, pointers, arr) 396 | 397 | def cFormal(self): 398 | """Prints out a formal parameter for a C wrapper.""" 399 | if not self.type: 400 | return self.name # special case for '...' 401 | else: 402 | arr = self.array or '' 403 | pointers = self.pointers or '' 404 | return "%s %s%s%s" % (self.type, pointers, self.name, arr) 405 | 406 | def castType(self): 407 | arr = self.array or '' 408 | pointers = self.pointers or '' 409 | if '[]' in arr: 410 | if arr.count('[') > 1: 411 | pointers += '(*)' # need extra parens for, e.g., int[][3] -> int(*)[3] 412 | else: 413 | pointers += '*' # justa single array; can pass pointer. 414 | arr = arr.replace('[]', '') 415 | return "%s%s%s" % (self.type, pointers, arr) 416 | 417 | def __str__(self): 418 | return self.cFormal() 419 | 420 | 421 | class Declaration: 422 | """ Descriptor for simple MPI function declarations. 423 | Contains return type, name of function, and a list of args. 424 | """ 425 | def __init__(self, rtype, name): 426 | self.rtype = rtype 427 | self.name = name 428 | self.args = [] 429 | 430 | def addArgument(self, arg): 431 | arg.setDeclaration(self) 432 | self.args.append(arg) 433 | 434 | def __iter__(self): 435 | for arg in self.args: yield arg 436 | 437 | def __str__(self): 438 | return self.prototype() 439 | 440 | def retType(self): 441 | return self.rtype 442 | 443 | def formals(self): 444 | return [arg.cFormal() for arg in self.args] 445 | 446 | def types(self): 447 | return [arg.cType() for arg in self.args] 448 | 449 | def argsNoEllipsis(self): 450 | return filter(lambda arg: arg.name != "...", self.args) 451 | 452 | def returnsErrorCode(self): 453 | """This is a special case for MPI_Wtime and MPI_Wtick. 454 | These functions actually return a double value instead of an int error code. 455 | """ 456 | return self.rtype == "int" 457 | 458 | def argNames(self): 459 | return [arg.name for arg in self.argsNoEllipsis()] 460 | 461 | def getArgName(self, index): 462 | return self.argsNoEllipsis()[index].name 463 | 464 | def fortranFormals(self): 465 | formals = map(Param.fortranFormal, self.argsNoEllipsis()) 466 | if self.name == "MPI_Init": formals = [] # Special case for init: no args in fortran 467 | 468 | ierr = [] 469 | if self.returnsErrorCode(): ierr = ["MPI_Fint *ierr"] 470 | return formals + ierr 471 | 472 | def fortranArgNames(self): 473 | names = self.argNames() 474 | if self.name == "MPI_Init": names = [] 475 | 476 | ierr = [] 477 | if self.returnsErrorCode(): ierr = ["ierr"] 478 | return names + ierr 479 | 480 | def prototype(self, modifiers=""): 481 | if modifiers: modifiers = joinlines(modifiers, " ") 482 | return "%s%s %s(%s)" % (modifiers, self.retType(), self.name, ", ".join(self.formals())) 483 | 484 | def pmpi_prototype(self, modifiers=""): 485 | if modifiers: modifiers = joinlines(modifiers, " ") 486 | return "%s%s P%s(%s)" % (modifiers, self.retType(), self.name, ", ".join(self.formals())) 487 | 488 | def fortranPrototype(self, name=None, modifiers=""): 489 | if not name: name = self.name 490 | if modifiers: modifiers = joinlines(modifiers, " ") 491 | 492 | if self.returnsErrorCode(): 493 | rtype = "void" # Fortran calls use ierr parameter instead 494 | else: 495 | rtype = self.rtype 496 | return "%s%s %s(%s)" % (modifiers, rtype, name, ", ".join(self.fortranFormals())) 497 | 498 | 499 | types = set() 500 | all_pointers = set() 501 | 502 | def enumerate_mpi_declarations(mpicc, includes): 503 | """ Invokes mpicc's C preprocessor on a C file that includes mpi.h. 504 | Parses the output for declarations, and yields each declaration to 505 | the caller. 506 | """ 507 | # Create an input file that just includes 508 | tmpfile = tempfile.NamedTemporaryFile('w+b', -1, '.c') 509 | tmpname = "%s" % tmpfile.name 510 | tmpfile.write('#include ') 511 | tmpfile.write("\n") 512 | tmpfile.flush() 513 | 514 | # Run the mpicc -E on the temp file and pipe the output 515 | # back to this process for parsing. 516 | string_includes = ["-I"+dir for dir in includes] 517 | mpicc_cmd = "%s -E %s" % (mpicc, " ".join(string_includes)) 518 | try: 519 | popen = subprocess.Popen("%s %s" % (mpicc_cmd, tmpname), shell=True, 520 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 521 | except IOError: 522 | sys.stderr.write("IOError: couldn't run '" + mpicc_cmd + "' for parsing mpi.h\n") 523 | sys.exit(1) 524 | 525 | # Parse out the declarations from the MPI file 526 | mpi_h = popen.stdout 527 | for line in mpi_h: 528 | line = line.strip() 529 | begin = begin_decl_re.search(line) 530 | if begin and not exclude_re.search(line): 531 | # Grab return type and fn name from initial parse 532 | return_type, fn_name = begin.groups() 533 | 534 | # Accumulate rest of declaration (possibly multi-line) 535 | while not end_decl_re.search(line): 536 | line += " " + mpi_h.next().strip() 537 | 538 | # Split args up by commas so we can parse them independently 539 | fn_and_paren = r'(%s\s*\()' % fn_name 540 | match = re.search(fn_and_paren, line) 541 | lparen = match.start(1) + len(match.group(1)) - 1 542 | rparen = find_matching_paren(line, lparen) 543 | if rparen < 0: 544 | raise ValueError("Malformed declaration in header: '%s'" % line) 545 | 546 | arg_string = line[lparen+1:rparen] 547 | arg_list = map(lambda s: s.strip(), arg_string.split(",")) 548 | 549 | # Handle functions that take no args specially 550 | if arg_list == ['void']: 551 | arg_list = [] 552 | 553 | # Parse formal parameter descriptors out of args 554 | decl = Declaration(return_type, fn_name) 555 | arg_num = 0 556 | for arg in arg_list: 557 | if arg == '...': # Special case for Pcontrol. 558 | decl.addArgument(Param(None, None, '...', None, arg_num)) 559 | else: 560 | match = formal_re.match(arg) 561 | if not match: 562 | sys.stderr.write("MATCH FAILED FOR: '%s' in %s\n" % (arg, fn_name)) 563 | sys.exit(1) 564 | 565 | type, pointers, name, array = match.groups() 566 | types.add(type) 567 | all_pointers.add(pointers) 568 | # If there's no name, make one up. 569 | if not name: name = "arg_" + str(arg_num) 570 | 571 | decl.addArgument(Param(type.strip(), pointers, name, array, arg_num)) 572 | arg_num += 1 573 | 574 | yield decl 575 | 576 | mpi_h.close() 577 | return_code = popen.wait() 578 | if return_code != 0: 579 | sys.stderr.write("Error: Couldn't run '%s' for parsing mpi.h.\n" % mpicc_cmd) 580 | sys.stderr.write(" Process exited with code %d.\n" % return_code) 581 | sys.exit(1) 582 | 583 | # Do some cleanup once we're done reading. 584 | tmpfile.close() 585 | 586 | 587 | def write_enter_guard(out, decl): 588 | """Prevent us from entering wrapper functions if we're already in a wrapper function. 589 | Just call the PMPI function w/o the wrapper instead.""" 590 | if output_guards: 591 | out.write(" if (in_wrapper) return P%s(%s);\n" % (decl.name, ", ".join(decl.argNames()))) 592 | out.write(" in_wrapper = 1;\n") 593 | 594 | def write_exit_guard(out): 595 | """After a call, set in_wrapper back to 0 so we can enter the next call.""" 596 | if output_guards: 597 | out.write(" in_wrapper = 0;\n") 598 | 599 | 600 | def write_c_wrapper(out, decl, return_val, write_body): 601 | """Write the C wrapper for an MPI function.""" 602 | # Write the PMPI prototype here in case mpi.h doesn't define it 603 | # (sadly the case with some MPI implementaitons) 604 | out.write(decl.pmpi_prototype(default_modifiers)) 605 | out.write(";\n") 606 | 607 | # Now write the wrapper function, which will call the PMPI function we declared. 608 | out.write(decl.prototype(default_modifiers)) 609 | out.write(" { \n") 610 | out.write(" %s %s = 0;\n" % (decl.retType(), return_val)) 611 | 612 | write_enter_guard(out, decl) 613 | write_body(out) 614 | write_exit_guard(out) 615 | 616 | out.write(" return %s;\n" % return_val) 617 | out.write("}\n\n") 618 | 619 | 620 | def write_fortran_binding(out, decl, delegate_name, binding, stmts=None): 621 | """Outputs a wrapper for a particular fortran binding that delegates to the 622 | primary Fortran wrapper. Optionally takes a list of statements to execute 623 | before delegating. 624 | """ 625 | out.write(decl.fortranPrototype(binding, default_modifiers)) 626 | out.write(" { \n") 627 | if stmts: 628 | out.write(joinlines(map(lambda s: " " + s, stmts))) 629 | if decl.returnsErrorCode(): 630 | # regular MPI fortran functions use an error code 631 | out.write(" %s(%s);\n" % (delegate_name, ", ".join(decl.fortranArgNames()))) 632 | else: 633 | # wtick and wtime return a value 634 | out.write(" return %s(%s);\n" % (delegate_name, ", ".join(decl.fortranArgNames()))) 635 | out.write("}\n\n") 636 | 637 | 638 | class FortranDelegation: 639 | """Class for constructing a call to a Fortran wrapper delegate function. Provides 640 | storage for local temporary variables, copies of parameters, callsites for MPI-1 and 641 | MPI-2, and writebacks to local pointer types. 642 | """ 643 | def __init__(self, decl, return_val): 644 | self.decl = decl 645 | self.return_val = return_val 646 | 647 | self.temps = set() 648 | self.copies = [] 649 | self.writebacks = [] 650 | self.actuals = [] 651 | self.mpich_actuals = [] 652 | 653 | def addTemp(self, type, name): 654 | """Adds a temp var with a particular name. Adds the same var only once.""" 655 | temp = " %s %s;" % (type, name) 656 | self.temps.add(temp) 657 | 658 | def addActual(self, actual): 659 | self.actuals.append(actual) 660 | self.mpich_actuals.append(actual) 661 | 662 | def addActualMPICH(self, actual): 663 | self.mpich_actuals.append(actual) 664 | 665 | def addActualMPI2(self, actual): 666 | self.actuals.append(actual) 667 | 668 | def addWriteback(self, stmt): 669 | self.writebacks.append(" %s" % stmt) 670 | 671 | def addCopy(self, stmt): 672 | self.copies.append(" %s" % stmt) 673 | 674 | def write(self, out): 675 | assert len(self.actuals) == len(self.mpich_actuals) 676 | 677 | call = " %s = %s" % (self.return_val, self.decl.name) 678 | mpich_call = "%s(%s);\n" % (call, ", ".join(self.mpich_actuals)) 679 | mpi2_call = "%s(%s);\n" % (call, ", ".join(self.actuals)) 680 | 681 | out.write(" %s %s = 0;\n" % (self.decl.retType(), self.return_val)) 682 | if mpich_call == mpi2_call and not (self.temps or self.copies or self.writebacks): 683 | out.write(mpich_call) 684 | else: 685 | out.write("#if (!defined(MPICH_HAS_C2F) && defined(MPICH_NAME) && (MPICH_NAME == 1)) /* MPICH test */\n") 686 | out.write(mpich_call) 687 | out.write("#else /* MPI-2 safe call */\n") 688 | out.write(joinlines(self.temps)) 689 | out.write(joinlines(self.copies)) 690 | out.write(mpi2_call) 691 | out.write(joinlines(self.writebacks)) 692 | out.write("#endif /* MPICH test */\n") 693 | 694 | 695 | def write_fortran_wrappers(out, decl, return_val): 696 | """Writes primary fortran wrapper that handles arg translation. 697 | Also outputs bindings for this wrapper for different types of fortran compilers. 698 | """ 699 | delegate_name = decl.name + f_wrap_suffix 700 | out.write(decl.fortranPrototype(delegate_name, ["static"])) 701 | out.write(" { \n") 702 | 703 | call = FortranDelegation(decl, return_val) 704 | 705 | if decl.name == "MPI_Init": 706 | # Use out.write() here so it comes at very beginning of wrapper function 707 | out.write(" int argc = 0;\n"); 708 | out.write(" char ** argv = NULL;\n"); 709 | call.addActual("&argc"); 710 | call.addActual("&argv"); 711 | call.write(out) 712 | out.write(" *ierr = %s;\n" % return_val) 713 | out.write("}\n\n") 714 | 715 | # Write out various bindings that delegate to the main fortran wrapper 716 | write_fortran_binding(out, decl, delegate_name, "MPI_INIT", ["fortran_init = 1;"]) 717 | write_fortran_binding(out, decl, delegate_name, "mpi_init", ["fortran_init = 2;"]) 718 | write_fortran_binding(out, decl, delegate_name, "mpi_init_", ["fortran_init = 3;"]) 719 | write_fortran_binding(out, decl, delegate_name, "mpi_init__", ["fortran_init = 4;"]) 720 | return 721 | 722 | # This look processes the rest of the call for all other routines. 723 | for arg in decl.args: 724 | if arg.name == "...": # skip ellipsis 725 | continue 726 | 727 | if not (arg.pointers or arg.array): 728 | if not arg.isHandle(): 729 | # These are pass-by-value arguments, so just deref and pass thru 730 | dereferenced = "*%s" % arg.name 731 | call.addActual(dereferenced) 732 | else: 733 | # Non-ptr, non-arr handles need to be converted with MPI_Blah_f2c 734 | # No special case for MPI_Status here because MPI_Statuses are never passed by value. 735 | call.addActualMPI2("%s_f2c(*%s)" % (conversion_prefix(arg.type), arg.name)) 736 | call.addActualMPICH("(%s)(*%s)" % (arg.type, arg.name)) 737 | 738 | else: 739 | if not arg.isHandle(): 740 | # Non-MPI handle pointer types can be passed w/o dereferencing, but need to 741 | # cast to correct pointer type first (from MPI_Fint*). 742 | call.addActual("(%s)%s" % (arg.castType(), arg.name)) 743 | else: 744 | # For MPI-1, assume ints, cross fingers, and pass things straight through. 745 | call.addActualMPICH("(%s*)%s" % (arg.type, arg.name)) 746 | conv = conversion_prefix(arg.type) 747 | temp = "temp_%s" % arg.name 748 | 749 | # For MPI-2, other pointer and array types need temporaries and special conversions. 750 | if not arg.isHandleArray(): 751 | call.addTemp(arg.type, temp) 752 | call.addActualMPI2("&%s" % temp) 753 | 754 | if arg.isStatus(): 755 | call.addCopy("%s_f2c(%s, &%s);" % (conv, arg.name, temp)) 756 | call.addWriteback("%s_c2f(&%s, %s);" % (conv, temp, arg.name)) 757 | else: 758 | call.addCopy("%s = %s_f2c(*%s);" % (temp, conv, arg.name)) 759 | call.addWriteback("*%s = %s_c2f(%s);" % (arg.name, conv, temp)) 760 | else: 761 | # Make temporary variables for the array and the loop var 762 | temp_arr_type = "%s*" % arg.type 763 | call.addTemp(temp_arr_type, temp) 764 | call.addTemp("int", "i") 765 | 766 | # generate a copy and a writeback statement for this type of handle 767 | if arg.isStatus(): 768 | copy = " %s_f2c(&%s[i], &%s[i])" % (conv, arg.name, temp) 769 | writeback = " %s_c2f(&%s[i], &%s[i])" % (conv, temp, arg.name) 770 | else: 771 | copy = " temp_%s[i] = %s_f2c(%s[i])" % (arg.name, conv, arg.name) 772 | writeback = " %s[i] = %s_c2f(temp_%s[i])" % (arg.name, conv, arg.name) 773 | 774 | # Generate the call surrounded by temp array allocation, copies, writebacks, and temp free 775 | count = "*%s" % arg.countParam().name 776 | call.addCopy("%s = (%s)malloc(sizeof(%s) * %s);" % 777 | (temp, temp_arr_type, arg.type, count)) 778 | call.addCopy("for (i=0; i < %s; i++)" % count) 779 | call.addCopy("%s;" % copy) 780 | call.addActualMPI2(temp) 781 | call.addWriteback("for (i=0; i < %s; i++)" % count) 782 | call.addWriteback("%s;" % writeback) 783 | call.addWriteback("free(%s);" % temp) 784 | 785 | call.write(out) 786 | if decl.returnsErrorCode(): 787 | out.write(" *ierr = %s;\n" % return_val) 788 | else: 789 | out.write(" return %s;\n" % return_val) 790 | out.write("}\n\n") 791 | 792 | # Write out various bindings that delegate to the main fortran wrapper 793 | write_fortran_binding(out, decl, delegate_name, decl.name.upper()) 794 | write_fortran_binding(out, decl, delegate_name, decl.name.lower()) 795 | write_fortran_binding(out, decl, delegate_name, decl.name.lower() + "_") 796 | write_fortran_binding(out, decl, delegate_name, decl.name.lower() + "__") 797 | 798 | 799 | ################################################################################ 800 | # Macros: 801 | # - functions annotated as @macro or @bodymacro define the global macros and 802 | # basic pieces of the generator. 803 | # - include_decl is used to include MPI declarations into function scopes. 804 | ################################################################################ 805 | # Table of global macros 806 | macros = {} 807 | 808 | # This decorator adds macro functions to the outermost function scope. 809 | def macro(macro_name, **attrs): 810 | def decorate(fun): 811 | macros[macro_name] = fun # Add macro to outer scope under supplied name 812 | fun.has_body = False # By default, macros have no body. 813 | for key in attrs: # Optionally set/override attributes 814 | setattr(fun, key, attrs[key]) 815 | return fun 816 | return decorate 817 | 818 | def handle_list(list_name, list, args): 819 | """This function handles indexing lists used as macros in the wrapper generator. 820 | There are two syntaxes: 821 | {{}} Evaluates to the whole list, e.g. 'foo, bar, baz' 822 | {{ }} Evaluates to a particular element of a list. 823 | """ 824 | if not args: 825 | return list 826 | else: 827 | len(args) == 1 or syntax_error("Wrong number of args for list expression.") 828 | try: 829 | return list[int(args[0])] 830 | except ValueError: 831 | syntax_error("Invald index value: '%s'" % args[0]) 832 | except IndexError: 833 | syntax_error("Index out of range in '%s': %d" % (list_name, index)) 834 | 835 | class TypeApplier: 836 | """This class implements a Macro function for applying something callable to 837 | args in a decl with a particular type. 838 | """ 839 | def __init__(self, decl): 840 | self.decl = decl 841 | 842 | def __call__(self, out, scope, args, children): 843 | len(args) == 2 or syntax_error("Wrong number of args in apply macro.") 844 | type, macro_name = args 845 | for arg in self.decl.args: 846 | if arg.cType() == type: 847 | out.write("%s(%s);\n" % (macro_name, arg.name)) 848 | 849 | def include_decl(scope, decl): 850 | """This function is used by macros to include attributes MPI declarations in their scope.""" 851 | scope["ret_type"] = decl.retType() 852 | scope["args"] = decl.argNames() 853 | scope["nargs"] = len(decl.argNames()) 854 | scope["types"] = decl.types() 855 | scope["formals"] = decl.formals() 856 | scope["apply_to_type"] = TypeApplier(decl) 857 | scope.function_name = decl.name 858 | 859 | # These are old-stype, deprecated names. 860 | def get_arg(out, scope, args, children): 861 | return handle_list("args", decl.argNames(), args) 862 | scope["get_arg"] = get_arg 863 | scope["applyToType"] = scope["apply_to_type"] 864 | scope["retType"] = scope["ret_type"] 865 | scope["argList"] = "(%s)" % ", ".join(scope["args"]) 866 | scope["argTypeList"] = "(%s)" % ", ".join(scope["formals"]) 867 | 868 | def all_but(fn_list): 869 | """Return a list of all mpi functions except those in fn_list""" 870 | all_mpi = set(mpi_functions.keys()) 871 | diff = all_mpi - set(fn_list) 872 | return [x for x in diff] 873 | 874 | @macro("foreachfn", has_body=True) 875 | def foreachfn(out, scope, args, children): 876 | """Iterate over all functions listed in args.""" 877 | args or syntax_error("Error: foreachfn requires function name argument.") 878 | global cur_function 879 | 880 | fn_var = args[0] 881 | for fn_name in args[1:]: 882 | cur_function = fn_name 883 | if not fn_name in mpi_functions: 884 | syntax_error(fn_name + " is not an MPI function") 885 | 886 | fn = mpi_functions[fn_name] 887 | fn_scope = Scope(scope) 888 | fn_scope[fn_var] = fn_name 889 | include_decl(fn_scope, fn) 890 | 891 | for child in children: 892 | child.evaluate(out, fn_scope) 893 | cur_function = None 894 | 895 | @macro("fn", has_body=True) 896 | def fn(out, scope, args, children): 897 | """Iterate over listed functions and generate skeleton too.""" 898 | args or syntax_error("Error: fn requires function name argument.") 899 | global cur_function 900 | 901 | fn_var = args[0] 902 | for fn_name in args[1:]: 903 | cur_function = fn_name 904 | if not fn_name in mpi_functions: 905 | syntax_error(fn_name + " is not an MPI function") 906 | 907 | fn = mpi_functions[fn_name] 908 | return_val = "_wrap_py_return_val" 909 | 910 | fn_scope = Scope(scope) 911 | fn_scope[fn_var] = fn_name 912 | include_decl(fn_scope, fn) 913 | 914 | fn_scope["ret_val"] = return_val 915 | fn_scope["returnVal"] = fn_scope["ret_val"] # deprecated name. 916 | 917 | c_call = "%s = P%s(%s);" % (return_val, fn.name, ", ".join(fn.argNames())) 918 | if fn_name == "MPI_Init" and output_fortran_wrappers: 919 | def callfn(out, scope, args, children): 920 | # All this is to deal with fortran, since fortran's MPI_Init() function is different 921 | # from C's. We need to make sure to delegate specifically to the fortran init wrapping. 922 | # For dynamic libs, we use weak symbols to pick it automatically. For static libs, need 923 | # to rely on input from the user via pmpi_init_binding and the -i option. 924 | out.write(" if (fortran_init) {\n") 925 | out.write("#ifdef PIC\n") 926 | out.write(" if (!PMPI_INIT && !pmpi_init && !pmpi_init_ && !pmpi_init__) {\n") 927 | out.write(" fprintf(stderr, \"ERROR: Couldn't find fortran pmpi_init function. Link against static library instead.\\n\");\n") 928 | out.write(" exit(1);\n") 929 | out.write(" }") 930 | out.write(" switch (fortran_init) {\n") 931 | out.write(" case 1: PMPI_INIT(&%s); break;\n" % return_val) 932 | out.write(" case 2: pmpi_init(&%s); break;\n" % return_val) 933 | out.write(" case 3: pmpi_init_(&%s); break;\n" % return_val) 934 | out.write(" case 4: pmpi_init__(&%s); break;\n" % return_val) 935 | out.write(" default:\n") 936 | out.write(" fprintf(stderr, \"NO SUITABLE FORTRAN MPI_INIT BINDING\\n\");\n") 937 | out.write(" break;\n") 938 | out.write(" }\n") 939 | out.write("#else /* !PIC */\n") 940 | out.write(" %s(&%s);\n" % (pmpi_init_binding, return_val)) 941 | out.write("#endif /* !PIC */\n") 942 | out.write(" } else {\n") 943 | out.write(" %s\n" % c_call) 944 | out.write(" }\n") 945 | 946 | fn_scope["callfn"] = callfn 947 | 948 | def write_fortran_init_flag(): 949 | output.write("static int fortran_init = 0;\n") 950 | once(write_fortran_init_flag) 951 | 952 | else: 953 | fn_scope["callfn"] = c_call 954 | 955 | def write_body(out): 956 | for child in children: 957 | child.evaluate(out, fn_scope) 958 | 959 | out.write("/* ================== C Wrappers for %s ================== */\n" % fn_name) 960 | write_c_wrapper(out, fn, return_val, write_body) 961 | if output_fortran_wrappers: 962 | out.write("/* =============== Fortran Wrappers for %s =============== */\n" % fn_name) 963 | write_fortran_wrappers(out, fn, return_val) 964 | out.write("/* ================= End Wrappers for %s ================= */\n\n\n" % fn_name) 965 | cur_function = None 966 | 967 | @macro("forallfn", has_body=True) 968 | def forallfn(out, scope, args, children): 969 | """Iterate over all but the functions listed in args.""" 970 | args or syntax_error("Error: forallfn requires function name argument.") 971 | foreachfn(out, scope, [args[0]] + all_but(args[1:]), children) 972 | 973 | @macro("fnall", has_body=True) 974 | def fnall(out, scope, args, children): 975 | """Iterate over all but listed functions and generate skeleton too.""" 976 | args or syntax_error("Error: fnall requires function name argument.") 977 | fn(out, scope, [args[0]] + all_but(args[1:]), children) 978 | 979 | @macro("sub") 980 | def sub(out, scope, args, children): 981 | """{{sub }} 982 | Replaces value of with all instances of replaced with . 983 | """ 984 | len(args) == 3 or syntax_error("'sub' macro takes exactly 4 arguments.") 985 | string, regex, substitution = args 986 | if isinstance(string, list): 987 | return [re.sub(regex, substitution, s) for s in string] 988 | if not isinstance(regex, str): 989 | syntax_error("Invalid regular expression in 'sub' macro: '%s'" % regex) 990 | else: 991 | return re.sub(regex, substitution, string) 992 | 993 | @macro("zip") 994 | def zip_macro(out, scope, args, children): 995 | len(args) == 2 or syntax_error("'zip' macro takes exactly 2 arguments.") 996 | if not all([isinstance(a, list) for a in args]): 997 | syntax_error("Arguments to 'zip' macro must be lists.") 998 | a, b = args 999 | return ["%s %s" % x for x in zip(a, b)] 1000 | 1001 | @macro("def") 1002 | def def_macro(out, scope, args, children): 1003 | len(args) == 2 or syntax_error("'def' macro takes exactly 2 arguments.") 1004 | scope[args[0]] = args[1] 1005 | 1006 | @macro("list") 1007 | def list_macro(out, scope, args, children): 1008 | result = [] 1009 | for arg in args: 1010 | if isinstance(arg, list): 1011 | result.extend(arg) 1012 | else: 1013 | result.append(arg) 1014 | return result 1015 | 1016 | @macro("filter") 1017 | def filter_macro(out, scope, args, children): 1018 | """{{filter }} 1019 | Returns a list containing all elements of that matches. 1020 | """ 1021 | len(args) == 2 or syntax_error("'filter' macro takes exactly 2 arguments.") 1022 | regex, l = args 1023 | if not isinstance(l, list): 1024 | syntax_error("Invalid list in 'filter' macro: '%s'" % str(list)) 1025 | if not isinstance(regex, str): 1026 | syntax_error("Invalid regex in 'filter' macro: '%s'" % str(regex)) 1027 | def match(s): 1028 | return re.search(regex, s) 1029 | return filter(match, l) 1030 | 1031 | @macro("fn_num") 1032 | def fn_num(out, scope, args, children): 1033 | val = fn_num.val 1034 | fn_num.val += 1 1035 | return val 1036 | fn_num.val = 0 # init the counter here. 1037 | 1038 | 1039 | ################################################################################ 1040 | # Parser support: 1041 | # - Chunk class for bits of parsed text on which macros are executed. 1042 | # - parse() function uses a Lexer to examine a file. 1043 | ################################################################################ 1044 | class Chunk: 1045 | """Represents a piece of a wrapper file. Is either a text chunk 1046 | or a macro chunk with children to which the macro should be applied. 1047 | macros are evaluated lazily, so the macro is just a string until 1048 | execute is called and it is fetched from its enclosing scope.""" 1049 | def __init__(self): 1050 | self.macro = None 1051 | self.args = [] 1052 | self.text = None 1053 | self.children = [] 1054 | 1055 | def iwrite(self, file, level, text): 1056 | """Write indented text.""" 1057 | for x in xrange(level): 1058 | file.write(" ") 1059 | file.write(text) 1060 | 1061 | def write(self, file=sys.stdout, l=0): 1062 | if self.macro: self.iwrite(file, l, "{{%s %s}}" % (self.macro, " ".join([str(arg) for arg in self.args]))) 1063 | if self.text: self.iwrite(file, l, "TEXT\n") 1064 | for child in self.children: 1065 | child.write(file, l+1) 1066 | 1067 | def execute(self, out, scope): 1068 | """This function executes a chunk. For strings, lists, text chunks, etc., this just 1069 | entails returning the chunk's value. For callable macros, this executes and returns 1070 | the chunk's value. 1071 | """ 1072 | if not self.macro: 1073 | out.write(self.text) 1074 | else: 1075 | if not self.macro in scope: 1076 | error_msg = "Invalid macro: '%s'" % self.macro 1077 | if scope.function_name: 1078 | error_msg += " for " + scope.function_name 1079 | syntax_error(error_msg) 1080 | 1081 | value = scope[self.macro] 1082 | if hasattr(value, "__call__"): 1083 | # It's a macro, so we need to execute it. But first evaluate its args. 1084 | def eval_arg(arg): 1085 | if isinstance(arg, Chunk): 1086 | return arg.execute(out, scope) 1087 | else: 1088 | return arg 1089 | args = [eval_arg(arg) for arg in self.args] 1090 | return value(out, scope, args, self.children) 1091 | elif isinstance(value, list): 1092 | # Special case for handling lists and list indexing 1093 | return handle_list(self.macro, value, self.args) 1094 | else: 1095 | # Just return the value of anything else 1096 | return value 1097 | 1098 | def stringify(self, value): 1099 | """Used by evaluate() to print the return values of chunks out to the output file.""" 1100 | if isinstance(value, list): 1101 | return ", ".join(value) 1102 | else: 1103 | return str(value) 1104 | 1105 | def evaluate(self, out, scope): 1106 | """This is an 'interactive' version of execute. This should be called when 1107 | the chunk's value (if any) should be written out. Body macros and the outermost 1108 | scope should use this instead of execute(). 1109 | """ 1110 | value = self.execute(out, scope) 1111 | if value is not None: # Note the distinction here -- 0 is false but we want to print it! 1112 | out.write(self.stringify(value)) 1113 | 1114 | class Parser: 1115 | """Parser for the really simple wrappergen grammar. 1116 | This parser has support for multiple lexers. self.tokens is a list of iterables, each 1117 | representing a new token stream. You can add additional tokens to be lexed using push_tokens. 1118 | This will cause the pushed tokens to be handled before any others. This allows us to switch 1119 | lexers while parsing, so that the outer part of the file is processed in a language-agnostic 1120 | way, but stuff inside macros is handled as its own macro language. 1121 | """ 1122 | def __init__(self, macros): 1123 | self.macros = macros 1124 | self.macro_lexer = InnerLexer() 1125 | self.tokens = iter([]) # iterators over tokens, handled in order. Starts empty. 1126 | self.token = None # last accepted token 1127 | self.next = None # next token 1128 | 1129 | def gettok(self): 1130 | """Puts the next token in the input stream into self.next.""" 1131 | try: 1132 | self.next = self.tokens.next() 1133 | except StopIteration: 1134 | self.next = None 1135 | 1136 | def push_tokens(self, iterable): 1137 | """Adds all tokens in some iterable to the token stream.""" 1138 | self.tokens = itertools.chain(iter(iterable), iter([self.next]), self.tokens) 1139 | self.gettok() 1140 | 1141 | def accept(self, id): 1142 | """Puts the next symbol in self.token if we like it. Then calls gettok()""" 1143 | if self.next.isa(id): 1144 | self.token = self.next 1145 | self.gettok() 1146 | return True 1147 | return False 1148 | 1149 | def unexpected_token(self): 1150 | syntax_error("Unexpected token: %s." % self.next) 1151 | 1152 | def expect(self, id): 1153 | """Like accept(), but fails if we don't like the next token.""" 1154 | if self.accept(id): 1155 | return True 1156 | else: 1157 | if self.next: 1158 | self.unexpected_token() 1159 | else: 1160 | syntax_error("Unexpected end of file.") 1161 | sys.exit(1) 1162 | 1163 | def is_body_macro(self, name): 1164 | """Shorthand for testing whether a particular name is the name of a macro that has a body. 1165 | Need this for parsing the language b/c things like {{fn}} need a corresponding {{endfn}}. 1166 | """ 1167 | return name in self.macros and self.macros[name].has_body 1168 | 1169 | def macro(self, accept_body_macros=True): 1170 | # lex inner-macro text as wrapper language if we encounter text here. 1171 | if self.accept(TEXT): 1172 | self.push_tokens(self.macro_lexer.lex(self.token.value)) 1173 | 1174 | # Now proceed with parsing the macro language's tokens 1175 | chunk = Chunk() 1176 | self.expect(IDENTIFIER) 1177 | chunk.macro = self.token.value 1178 | 1179 | if not accept_body_macros and self.is_body_macro(chunk.macro): 1180 | syntax_error("Cannot use body macros in expression context: '%s'" % chunk.macro) 1181 | eys.exit(1) 1182 | 1183 | while True: 1184 | if self.accept(LBRACE): 1185 | chunk.args.append(self.macro(False)) 1186 | elif self.accept(IDENTIFIER): 1187 | chunk.args.append(self.token.value) 1188 | elif self.accept(TEXT): 1189 | self.push_tokens(self.macro_lexer.lex(self.token.value)) 1190 | else: 1191 | self.expect(RBRACE) 1192 | break 1193 | return chunk 1194 | 1195 | def text(self, end_macro = None): 1196 | chunks = [] 1197 | while self.next: 1198 | if self.accept(TEXT): 1199 | chunk = Chunk() 1200 | chunk.text = self.token.value 1201 | chunks.append(chunk) 1202 | elif self.accept(LBRACE): 1203 | chunk = self.macro() 1204 | name = chunk.macro 1205 | 1206 | if name == end_macro: 1207 | # end macro: just break and don't append 1208 | break 1209 | elif isindex(chunk.macro): 1210 | # Special case for indices -- raw number macros index 'args' list 1211 | chunk.macro = "args" 1212 | chunk.args = [name] 1213 | elif self.is_body_macro(name): 1214 | chunk.children = self.text("end"+name) 1215 | chunks.append(chunk) 1216 | else: 1217 | self.unexpected_token() 1218 | 1219 | return chunks 1220 | 1221 | def parse(self, text): 1222 | if skip_headers: 1223 | outer_lexer = OuterRegionLexer() # Not generating C code, text is text. 1224 | else: 1225 | outer_lexer = OuterCommentLexer() # C code. Considers C-style comments. 1226 | self.push_tokens(outer_lexer.lex(text)) 1227 | return self.text() 1228 | 1229 | ################################################################################ 1230 | # Main script: 1231 | # Get arguments, set up outer scope, parse files, generator wrappers. 1232 | ################################################################################ 1233 | def usage(): 1234 | sys.stderr.write(usage_string) 1235 | sys.exit(2) 1236 | 1237 | # Let the user specify another mpicc to get mpi.h from 1238 | output = sys.stdout 1239 | output_filename = None 1240 | 1241 | try: 1242 | opts, args = getopt.gnu_getopt(sys.argv[1:], "fsgdc:o:i:I:") 1243 | except getopt.GetoptError, err: 1244 | sys.stderr.write(err + "\n") 1245 | usage() 1246 | 1247 | for opt, arg in opts: 1248 | if opt == "-d": dump_prototypes = True 1249 | if opt == "-f": output_fortran_wrappers = True 1250 | if opt == "-s": skip_headers = True 1251 | if opt == "-g": output_guards = True 1252 | if opt == "-c": mpicc = arg 1253 | if opt == "-o": output_filename = arg 1254 | if opt == "-I": 1255 | stripped = arg.strip() 1256 | if stripped: includes.append(stripped) 1257 | if opt == "-i": 1258 | if not arg in pmpi_init_bindings: 1259 | sys.stderr.write("ERROR: PMPI_Init binding must be one of:\n %s\n" % " ".join(possible_bindings)) 1260 | usage() 1261 | else: 1262 | pmpi_init_binding = arg 1263 | 1264 | if len(args) < 1 and not dump_prototypes: 1265 | usage() 1266 | 1267 | # Parse mpi.h and put declarations into a map. 1268 | for decl in enumerate_mpi_declarations(mpicc, includes): 1269 | mpi_functions[decl.name] = decl 1270 | if dump_prototypes: print decl 1271 | 1272 | # Fail gracefully if we didn't find anything. 1273 | if not mpi_functions: 1274 | sys.stderr.write("Error: Found no declarations in mpi.h.\n") 1275 | sys.exit(1) 1276 | 1277 | # If we're just dumping prototypes, we can just exit here. 1278 | if dump_prototypes: sys.exit(0) 1279 | 1280 | # Open the output file here if it was specified 1281 | if output_filename: 1282 | try: 1283 | output = open(output_filename, "w") 1284 | except IOError: 1285 | sys.stderr.write("Error: couldn't open file " + arg + " for writing.\n") 1286 | sys.exit(1) 1287 | 1288 | try: 1289 | # Start with some headers and definitions. 1290 | if not skip_headers: 1291 | output.write(wrapper_includes) 1292 | if output_guards: output.write("static int in_wrapper = 0;\n") 1293 | 1294 | # Parse each file listed on the command line and execute 1295 | # it once it's parsed. 1296 | fileno = 0 1297 | for f in args: 1298 | cur_filename = f 1299 | file = open(cur_filename) 1300 | 1301 | # Outer scope contains fileno and the fundamental macros. 1302 | outer_scope = Scope() 1303 | outer_scope["fileno"] = str(fileno) 1304 | outer_scope.include(macros) 1305 | 1306 | parser = Parser(macros) 1307 | chunks = parser.parse(file.read()) 1308 | 1309 | for chunk in chunks: 1310 | chunk.evaluate(output, Scope(outer_scope)) 1311 | fileno += 1 1312 | 1313 | except WrapSyntaxError: 1314 | output.close() 1315 | if output_filename: os.remove(output_filename) 1316 | sys.exit(1) 1317 | 1318 | output.close() 1319 | --------------------------------------------------------------------------------