├── src ├── Schedgen │ ├── buffer_element.cpp │ ├── Makefile │ ├── buffer_element.hpp │ ├── goal_comm.h │ ├── trace_reader.hpp │ ├── schedgen_cmdline.ggo │ └── schedgen.hpp ├── liballprof2 │ ├── README.md │ ├── tracer_main.c │ └── gensem.py ├── Schedgen2 │ ├── config_example.json │ ├── TODO │ ├── README │ ├── additional_microbenchmarks.py │ ├── schedgen.py │ ├── mpi_colls.py │ ├── process_trace.py │ ├── goal.py │ └── patterns.py ├── liballprof │ ├── sync.h │ ├── mpi_helloworld.f90 │ ├── allprof.h │ ├── mpi_helloworld.c │ ├── numbers.h │ ├── wrapper.sh │ ├── template.c │ └── sync.c ├── Drawviz │ ├── Makefile │ ├── drawviz.ggo │ ├── TimelineDrawing.hpp │ ├── Drawviz.cpp │ ├── cmdline.h │ └── TimelineDrawing.cpp ├── LogGOPSim │ ├── txt2bin_cmdline.ggo │ ├── Makefile │ ├── loggopsim_cmdline.ggo │ ├── LogGOPSim.hpp │ ├── Goal.hpp │ ├── TimelineVisualization.hpp │ ├── binary_tree_32.goal │ └── Noise.hpp └── CMakeLists.txt ├── .gitignore ├── tests ├── mpi_helloworld.f90 └── mpi_helloworld.c ├── cmake ├── re2c.cmake ├── gengetopt.cmake ├── FindUnwind.cmake └── FindGraphviz.cmake ├── .github └── workflows │ └── build-and-test.yml ├── LICENCE ├── doc ├── README └── README-mpi-matching └── README.md /src/Schedgen/buffer_element.cpp: -------------------------------------------------------------------------------- 1 | #include "buffer_element.hpp" 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *_cmdline.c 3 | *_cmdline.h 4 | src/LogGOPSim/txt2bin.cpp 5 | -------------------------------------------------------------------------------- /src/liballprof2/README.md: -------------------------------------------------------------------------------- 1 | This is an attempt to recreate liballprof and fix some of its shortcomings at the same time. 2 | It is not functional / complete yet - see it as work in progress. -------------------------------------------------------------------------------- /src/Schedgen2/config_example.json: -------------------------------------------------------------------------------- 1 | { 2 | "ptrn": "allreduce", 3 | "algotithm": "ring", 4 | "comm_size": 16, 5 | "datasize": 1024, 6 | "output": "allreduce_ring_16_1024.bin", 7 | "txt2bin": "../../build/txt2bin" 8 | } -------------------------------------------------------------------------------- /src/liballprof/sync.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | double sync_tree(MPI_Comm comm); 9 | double sync_lin(MPI_Comm comm); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | -------------------------------------------------------------------------------- /src/Schedgen2/TODO: -------------------------------------------------------------------------------- 1 | Trace Reader: 2 | - add calcs between mpi calls, make them dependent on each other 3 | 4 | GOAL: 5 | - implement re-rooting, now we assume rank 0 is root 6 | - implement comm-flattening 7 | - handle tags 8 | - get rid of GetLabel (label should be index in ops) 9 | -------------------------------------------------------------------------------- /src/Drawviz/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS= -O0 -Wno-deprecated -Wall 2 | CCFLAGS= -O0 3 | LDFLAGS= -lps -lboost_regex 4 | 5 | all: 6 | gengetopt < drawviz.ggo 7 | gcc $(CCFLAGS) -c *.c 8 | g++ $(CXXFLAGS) -c *.cpp 9 | g++ $(CXXFLAGS) *.o -o drawviz $(LDFLAGS) 10 | 11 | clean: 12 | rm -f cmdline.o 13 | rm -f *.o 14 | rm -f drawviz 15 | -------------------------------------------------------------------------------- /src/LogGOPSim/txt2bin_cmdline.ggo: -------------------------------------------------------------------------------- 1 | package "goalsim" 2 | version "0.1" 3 | 4 | option "input" i "Input file, textfile containing GOAL schedules" string typestr="filename" 5 | option "output" o "Output file, will contain the binary representation of the GOAL schedules" string typestr="filename" 6 | option "progress" p "Print progress information while parsing the schedule" flag off 7 | 8 | -------------------------------------------------------------------------------- /src/Schedgen2/README: -------------------------------------------------------------------------------- 1 | This is a prototype of a GAOL schedule generator in Python. 2 | At present this is not intended as a replacement for Schedgen. APIs might change drastically. 3 | The GOAL :) of this version is to make it easy to compose GOAL schedules, by keeping everything in memory. 4 | Schedgen on the other hand is optimized to minimize memory footprint and does not easily allow composability, i.e., supporting communicators. 5 | -------------------------------------------------------------------------------- /tests/mpi_helloworld.f90: -------------------------------------------------------------------------------- 1 | program helloworld 2 | implicit none 3 | include 'mpif.h' 4 | 5 | integer :: ierr, me, nproc 6 | double precision :: val 7 | 8 | call MPI_INIT(ierr) 9 | call MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr) 10 | call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr) 11 | 12 | call RANDOM_NUMBER(val) 13 | 14 | write(*,*) 'before', me, val 15 | call MPI_ALLREDUCE(MPI_IN_PLACE, val, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & 16 | & MPI_COMM_WORLD, ierr) 17 | write(*,*) 'after', me, val 18 | 19 | call MPI_FINALIZE(ierr); 20 | 21 | end program 22 | -------------------------------------------------------------------------------- /src/liballprof/mpi_helloworld.f90: -------------------------------------------------------------------------------- 1 | program helloworld 2 | implicit none 3 | include 'mpif.h' 4 | 5 | integer :: ierr, me, nproc 6 | double precision :: val 7 | 8 | call MPI_INIT(ierr) 9 | call MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr) 10 | call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr) 11 | 12 | call RANDOM_NUMBER(val) 13 | 14 | write(*,*) 'before', me, val 15 | call MPI_ALLREDUCE(MPI_IN_PLACE, val, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & 16 | & MPI_COMM_WORLD, ierr) 17 | write(*,*) 'after', me, val 18 | 19 | call MPI_FINALIZE(ierr); 20 | 21 | end program 22 | -------------------------------------------------------------------------------- /src/Schedgen/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS=-g -O3 -Wno-deprecated -Wall -std=c++11 2 | CCFLAGS=-g -O3 -g 3 | LDFLAGS=-g -O3 -g -lboost_iostreams -L/opt/homebrew/lib/ 4 | 5 | force: all 6 | 7 | schedgen_cmdline.c: schedgen_cmdline.ggo 8 | gengetopt -i $< -F schedgen_cmdline 9 | 10 | %.o: %.cpp *hpp *h 11 | ${CXX} $(CXXFLAGS) -c $< 12 | 13 | %.o: %.c *h 14 | ${CC} $(CCFLAGS) -c $< 15 | 16 | all: buffer_element.o schedgen_cmdline.o process_trace.o schedgen_cmdline.ggo schedgen.o 17 | ${CXX} $(CXXFLAGS) *.o -o schedgen $(LDFLAGS) 18 | 19 | clean: 20 | rm -f *.o 21 | rm -f schedgen_cmdline.c schedgen_cmdline.h 22 | rm -f schedgen 23 | -------------------------------------------------------------------------------- /src/Drawviz/drawviz.ggo: -------------------------------------------------------------------------------- 1 | package "drawviz" 2 | version "0.1" 3 | 4 | option "inputfile" i "Name of the inputfile (event data)" string 5 | option "outputfile" o "Name of the output file (postscript)" default="timeline.ps" string optional 6 | option "linethickness" l "Thickness of lines" default="1" int optional 7 | option "starttime" s "Starttime, if only a interval should be drawn" default="0" int optional 8 | option "endtime" e "Endtime, if only a interval should be drawn" default="0" int optional 9 | option "arrowheads" - "If this flag is given, arrowheads will be drawn" flag off 10 | option "descrtext" - "If this flag is given, text will be written below o_send and o_recv" flag off 11 | -------------------------------------------------------------------------------- /src/liballprof/allprof.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * liballprof MPIP Wrapper 3 | * 4 | * Copyright: Indiana University 5 | * Author: Torsten Hoefler 6 | * 7 | *************************************************************************/ 8 | 9 | #define VERSION 1 10 | 11 | /* undef to disable banner printing */ 12 | #define PRINT_BANNER 13 | 14 | /* trace file prefix (relative to run directory) and suffix */ 15 | #define FILE_PREFIX "/tmp/pmpi-trace-rank-" 16 | #define FILE_SUFFIX ".txt" 17 | 18 | /* undef to disable writer thread */ 19 | #define WRITER_THREAD 20 | #define BUFSIZE 10485760 21 | #define THRESHOLD 8388608 22 | 23 | /* IBM only implements a subset of MPI-2 */ 24 | /* #define IBM_BROKEN_MPI */ 25 | -------------------------------------------------------------------------------- /cmake/re2c.cmake: -------------------------------------------------------------------------------- 1 | macro (find_re2c) 2 | if (NOT RE2C_EXECUTABLE) 3 | find_program (RE2C_EXECUTABLE re2c) 4 | if (NOT RE2C_EXECUTABLE) 5 | message (FATAL_ERROR "re2c not found. Aborting...") 6 | endif () 7 | endif () 8 | endmacro () 9 | 10 | macro (add_re2c_files _basename) 11 | find_re2c() 12 | 13 | set (_re2c_in ${CMAKE_CURRENT_SOURCE_DIR}/${_basename}.re) 14 | set (_re2c_out ${CMAKE_CURRENT_SOURCE_DIR}/${_basename}.cpp) 15 | 16 | get_filename_component(_basepath ${_basename} DIRECTORY) 17 | get_filename_component(_basefile ${_basename} NAME) 18 | 19 | add_custom_command ( 20 | OUTPUT ${_re2c_out} 21 | COMMAND re2c ${_re2c_in} -o ${_re2c_out} 22 | DEPENDS ${_re2c_in} 23 | # BYPRODUCTS 24 | COMMENT "Generating re2c parser code ..." 25 | VERBATIM 26 | ) 27 | 28 | endmacro (add_re2c_files) 29 | -------------------------------------------------------------------------------- /tests/mpi_helloworld.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | int main(int argc, char **argv) { 7 | /* ------ MPI specific ------- */ 8 | int rank; /* MPI rank */ 9 | int procs; /* number of mpi procs */ 10 | double i; 11 | int ret; 12 | MPI_Request reqs[2]; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | MPI_Comm_size(MPI_COMM_WORLD, &procs); 17 | 18 | i = (double)rand(); 19 | 20 | MPI_Isend(&rank, 1, MPI_INT, (rank+1)%procs, 0, MPI_COMM_WORLD, &reqs[0]); 21 | MPI_Irecv(&rank, 1, MPI_INT, (rank-1+procs)%procs, 0, MPI_COMM_WORLD, &reqs[1]); 22 | MPI_Waitall(2,reqs,MPI_STATUSES_IGNORE); 23 | 24 | printf("before rank %u: i=%f\n", rank, i); 25 | ret = MPI_Allreduce(MPI_IN_PLACE, &i, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 26 | printf("after rank %u: i=%f\n", rank, i); 27 | 28 | 29 | printf("Hello from rank %u\n", rank); 30 | fflush(stdout); 31 | 32 | MPI_Finalize(); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /src/liballprof/mpi_helloworld.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | int main(int argc, char **argv) { 7 | /* ------ MPI specific ------- */ 8 | int rank; /* MPI rank */ 9 | int procs; /* number of mpi procs */ 10 | double i; 11 | int ret; 12 | MPI_Request reqs[2]; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | MPI_Comm_size(MPI_COMM_WORLD, &procs); 17 | 18 | i = (double)rand(); 19 | 20 | MPI_Isend(&rank, 1, MPI_INT, (rank+1)%procs, 0, MPI_COMM_WORLD, &reqs[0]); 21 | MPI_Irecv(&rank, 1, MPI_INT, (rank-1+procs)%procs, 0, MPI_COMM_WORLD, &reqs[1]); 22 | MPI_Waitall(2,reqs,MPI_STATUSES_IGNORE); 23 | 24 | printf("before rank %u: i=%f\n", rank, i); 25 | ret = MPI_Allreduce(MPI_IN_PLACE, &i, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 26 | printf("after rank %u: i=%f\n", rank, i); 27 | 28 | 29 | printf("Hello from rank %u\n", rank); 30 | fflush(stdout); 31 | 32 | MPI_Finalize(); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /src/liballprof/numbers.h: -------------------------------------------------------------------------------- 1 | #define LOG_MPI_INT 1 2 | #define LOG_MPI_INTEGER 2 3 | #define LOG_MPI_LONG 3 4 | #define LOG_MPI_SHORT 4 5 | #define LOG_MPI_UNSIGNED 5 6 | #define LOG_MPI_UNSIGNED_LONG 6 7 | #define LOG_MPI_UNSIGNED_SHORT 7 8 | #define LOG_MPI_FLOAT 8 9 | #define LOG_MPI_REAL 9 10 | #define LOG_MPI_DOUBLE 10 11 | #define LOG_MPI_DOUBLE_PRECISION 11 12 | #define LOG_MPI_LONG_DOUBLE 12 13 | #define LOG_MPI_BYTE 13 14 | #define LOG_MPI_FLOAT_INT 14 15 | #define LOG_MPI_DOUBLE_INT 15 16 | #define LOG_MPI_LONG_INT 16 17 | #define LOG_MPI_2INT 17 18 | #define LOG_MPI_SHORT_INT 18 19 | #define LOG_MPI_LONG_DOUBLE_INT 19 20 | #define LOG_MPI_LOGICAL 20 21 | #define LOG_MPI_COMPLEX 21 22 | #define LOG_MPI_CHARACTER 21 23 | #define LOG_MPI_DOUBLE_COMPLEX 22 24 | 25 | 26 | #define LOG_MPI_MIN 1 27 | #define LOG_MPI_MAX 2 28 | #define LOG_MPI_SUM 3 29 | #define LOG_MPI_PROD 4 30 | #define LOG_MPI_LAND 5 31 | #define LOG_MPI_BAND 6 32 | #define LOG_MPI_LOR 7 33 | #define LOG_MPI_BOR 8 34 | #define LOG_MPI_LXOR 9 35 | #define LOG_MPI_BXOR 10 36 | #define LOG_MPI_MINLOC 11 37 | #define LOG_MPI_MAXLOC 12 38 | -------------------------------------------------------------------------------- /cmake/gengetopt.cmake: -------------------------------------------------------------------------------- 1 | macro (find_gengetopt) 2 | if (NOT GENGETOPT_EXECUTABLE) 3 | find_program (GENGETOPT_EXECUTABLE gengetopt) 4 | if (NOT GENGETOPT_EXECUTABLE) 5 | message (FATAL_ERROR "gengetopt not found. Aborting...") 6 | endif () 7 | endif () 8 | endmacro () 9 | 10 | macro (add_gengetopt_files _basename) 11 | find_gengetopt () 12 | 13 | set (_ggo_extra_input ${ARGV}) 14 | 15 | set (_ggo_c ${CMAKE_CURRENT_SOURCE_DIR}/${_basename}.c) 16 | set (_ggo_h ${CMAKE_CURRENT_SOURCE_DIR}/${_basename}.h) 17 | set (_ggo_g ${CMAKE_CURRENT_SOURCE_DIR}/${_basename}.ggo) 18 | 19 | get_filename_component(_basepath ${_basename} DIRECTORY) 20 | get_filename_component(_basefile ${_basename} NAME) 21 | 22 | add_custom_command ( 23 | OUTPUT ${_ggo_c} ${_ggo_h} 24 | COMMAND gengetopt -F ${_basefile} -i ${_ggo_g} --output-dir ${CMAKE_CURRENT_SOURCE_DIR}/${_basepath} 25 | DEPENDS ${_ggo_g} 26 | # BYPRODUCTS 27 | COMMENT "Generating getopt parser code ..." 28 | VERBATIM 29 | ) 30 | 31 | set (GGO_C ${_ggo_c}) 32 | set (GGO_H ${_ggo_h}) 33 | 34 | endmacro (add_gengetopt_files) 35 | -------------------------------------------------------------------------------- /src/Schedgen2/additional_microbenchmarks.py: -------------------------------------------------------------------------------- 1 | from patterns import linear 2 | 3 | 4 | def incast( 5 | comm_size: int, 6 | datasize: int, 7 | tag: int = 42, 8 | ptrn: str = "linear", 9 | randomized_data: bool = False, 10 | **kwargs, 11 | ): 12 | assert ptrn == "linear", "incast only supports the linear communication pattern" 13 | return linear( 14 | comm_size=comm_size, 15 | datasize=datasize, 16 | tag=tag, 17 | algorithm="incast", 18 | parallel=True, 19 | randomized_data=randomized_data, 20 | **kwargs, 21 | ) 22 | 23 | 24 | def outcast( 25 | comm_size: int, 26 | datasize: int, 27 | tag: int = 42, 28 | ptrn: str = "linear", 29 | randomized_data: bool = False, 30 | **kwargs, 31 | ): 32 | assert ptrn == "linear", "outcast only supports the linear communication pattern" 33 | return linear( 34 | comm_size=comm_size, 35 | datasize=datasize, 36 | tag=tag, 37 | algorithm="outcast", 38 | parallel=True, 39 | randomized_data=randomized_data, 40 | **kwargs, 41 | ) 42 | -------------------------------------------------------------------------------- /src/liballprof/wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #COMPRESS="bzip2 -c" 4 | #SUFFIX="bz2" 5 | COMPRESS="gzip -c" 6 | SUFFIX="gz" 7 | 8 | HOST=$(hostname -s) 9 | VERBOSE=false 10 | if [ -f $HOME/.wrapper_verbose ]; then 11 | VERBOSE=true 12 | fi 13 | 14 | #echo "[$HOST] clearing /tmp ..." 15 | rm -f /tmp/pmpi-trace-rank-*txt 16 | 17 | if $VERBOSE; then 18 | echo "[$HOST] htor profiling wrapper: executing $@ ..." 19 | fi 20 | 21 | 22 | # execute the command ... 23 | $@ 24 | 25 | if [ x"$HTOR_PMPI_FILE_PREFIX" == "x" ]; then 26 | HTOR_PMPI_FILE_PREFIX="/tmp/pmpi-trace-rank-" 27 | fi; 28 | 29 | for i in $(ls -1 $HTOR_PMPI_FILE_PREFIX*txt 2>/dev/null); do 30 | if test -f $i; then 31 | TMP=$(mktemp) 32 | if $VERBOSE; then 33 | echo "[$HOST] moving $i to $TMP to have exclusive access ..." 34 | fi 35 | # one process wins the move -- and mv should be atomic in any 36 | # reasonable FS :) 37 | mv $i $TMP 2> /dev/null 38 | # if I won ... compress it ... 39 | if test -s $TMP; then 40 | if $VERBOSE; then 41 | echo "[$HOST] compressing $i ($TMP) ..." 42 | fi 43 | cat $TMP | $COMPRESS > $(basename $i).$SUFFIX; 44 | fi; 45 | rm $TMP 46 | fi; 47 | done; 48 | 49 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | name: CMake on a single platform 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 11 | BUILD_TYPE: Release 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - name: Install build dependencies 21 | run: sudo apt-get install gengetopt re2c libgraphviz-dev python3 libclang-15-dev llvm-15-dev python3-clang-15 openmpi-bin openmpi-common libopenmpi-dev libunwind-dev 22 | 23 | - name: Configure CMake 24 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 25 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 26 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} src 27 | 28 | - name: Build 29 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 30 | 31 | - name: Test 32 | working-directory: ${{github.workspace}}/build 33 | run: ctest --output-on-failure -C ${{env.BUILD_TYPE}} 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/Schedgen/buffer_element.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #include 12 | 13 | // This class stores the information of a single element of an address list 14 | // These things consist of three entries: The type, which can be IN=1 or OUT=2, 15 | // indicated by a '<' or '>' in the schedule. Then there is the actual address 16 | // in memory which is a simple integer in our language. And last, there is the 17 | // size of data referenced by this address which is an integer and denotes the 18 | // size in bytes. 19 | 20 | typedef uint64_t btime_t; 21 | 22 | class buffer_element { 23 | public: 24 | int type; // IN=1, OUT=2 25 | int addr; // address where to read/write 26 | btime_t size; // size of data to read/write in bytes 27 | 28 | buffer_element() : type(0), addr(0), size(0) {}; 29 | buffer_element(const buffer_element &elem) : type(elem.type), addr(elem.addr), size(elem.size) {}; 30 | buffer_element(int t, int a, btime_t s) : type(t), addr(a), size(s) {}; 31 | buffer_element& operator=(const buffer_element &elem) { 32 | type = elem.type; 33 | addr = elem.addr; 34 | size = elem.size; 35 | return *this; 36 | }; 37 | 38 | }; 39 | 40 | -------------------------------------------------------------------------------- /src/LogGOPSim/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CXXFLAGS= -std=c++11 -O0 -g -pedantic -Wno-deprecated -Wall -Wno-long-long -I/opt/homebrew/include/ 3 | CCFLAGS= -O0 -g 4 | LDFLAGS= -L/opt/homebrew/lib/ -lcgraph -g 5 | 6 | AUTOGEN_SRC= loggopsim_cmdline.c loggopsim_cmdline.h txt2bin_cmdline.h txt2bin_cmdline.c 7 | LOGGOPSIM_OBJECTS= LogGOPSim.o 8 | HLPR_OBJECTS= loggopsim_cmdline.o 9 | ALL_OBJECTS= $(LOGGOPSIM_OBJECTS) $(HLPR_OBJECTS) 10 | BINARY= LogGOPSim 11 | 12 | all: $(ALL_OBJECTS) $(AUTOGEN_SRC) txt2bin 13 | $(CXX) $(CXXFLAGS) $(ALL_OBJECTS) -o $(BINARY) $(LDFLAGS) 14 | 15 | txt2bin: 16 | re2c -o txt2bin.cpp txt2bin.re 17 | gengetopt -F txt2bin_cmdline -i txt2bin_cmdline.ggo 18 | $(CXX) -g -O3 txt2bin.cpp txt2bin_cmdline.c -o txt2bin 19 | 20 | loggopsim_cmdline.c: loggopsim_cmdline.ggo 21 | gengetopt -F loggopsim_cmdline -i loggopsim_cmdline.ggo 22 | 23 | loggopsim_cmdline.h: loggopsim_cmdline.ggo 24 | gengetopt -F loggopsim_cmdline -i loggopsim_cmdline.ggo 25 | 26 | txt2bin_cmdline.c: txt2bin_cmdline.ggo 27 | gengetopt -F txt2bin_cmdline -i txt2bin_cmdline.ggo 28 | 29 | txt2bin_cmdline.h: txt2bin_cmdline.ggo 30 | gengetopt -F txt2bin_cmdline -i txt2bin_cmdline.ggo 31 | 32 | 33 | 34 | %.o: %.cpp $(AUTOGEN_SRC) *.hpp 35 | $(CXX) $(CXXFLAGS) -c $< 36 | 37 | %.o: %.c $(AUTOGEN_SRC) *.h 38 | $(CXX) $(CCFLAGS) -c $< 39 | 40 | clean: 41 | rm -f $(AUTOGEN_SRC) 42 | rm -f $(ALL_OBJECTS) 43 | rm -f $(BINARY) 44 | rm -f txt2bin.cpp txt2bin bin2txt bin2dot simtest 45 | rm -f cmdline_txt2bin.* 46 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without 2 | modification, are permitted provided that the following conditions are met: 3 | 4 | 1. Redistributions of source code must retain the above copyright notice, this 5 | list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, 8 | this list of conditions and the following disclaimer in the documentation 9 | and/or other materials provided with the distribution. 10 | 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | 4. Redistributions of any form whatsoever must retain the following 16 | acknowledgment: 'This product includes software developed by SPCL @ ETH Zurich 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /src/LogGOPSim/loggopsim_cmdline.ggo: -------------------------------------------------------------------------------- 1 | package "goalsim" 2 | version "0.1" 3 | 4 | 5 | option "filename" f "Prefix for the filenames which contain the schedules" string 6 | option "save-mem" - "Map the schedules as MAP_SHARED to enable processing of large schedule (larger than main memory). This will invalidate the schedules during simulation." flag off 7 | option "LogGOPS_L" L "The latency parameter L in the LogGP model" int default="2500" optional 8 | option "LogGOPS_o" o "The overhead parameter o in the LogGP model" int default="1500" optional 9 | option "LogGOPS_g" g "The gap per message parameter g in the LogGP model" int default="1000" optional 10 | option "LogGOPS_G" G "The gap per byte parameter G in the LogGP model" int default="6" optional 11 | option "LogGOPS_S" S "Datasize at which we change from eager to rendezvous protocol" int default="65535" optional 12 | option "LogGOPS_O" O "The overhead per byte in LogGOP" int default="0" optional 13 | #option "starttimes" t "Name of an output file from a previous run, the end times of the old simulation will be the starttime of this one" string optional 14 | option "vizfile" V "Name of the output file for visualization data" string optional 15 | option "verbose" v "Enable more verbose output" optional 16 | option "progress" - "print progress" optional 17 | option "batchmode" b "enable batchmode (never print detailed host info)" optional 18 | #option "collnoise" - "Enable noise in collective operations" optional 19 | option "noise-trace" - "Read Noise from trace " string optional 20 | option "noise-cosched" - "Co-schedule noise (use same starttime on all processes)" flag off 21 | option "network-type" n "Network type (LogGP=no network congestion; simple=simple linear model)" values="LogGP","simple" default="LogGP" string optional 22 | option "network-file" - "Input file for network (annotated dot format)" string optional 23 | option "qstat" - "Enable PQ and UQ statistics. Argument is output filename prefix" default="Unknown" string optional 24 | 25 | -------------------------------------------------------------------------------- /src/Schedgen/goal_comm.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | class KeyRankPair { 4 | int key; 5 | int rank; 6 | 7 | public: 8 | KeyRankPair(int key, int rank) { 9 | this->key = key; 10 | this->rank = rank; 11 | } 12 | bool operator<(const KeyRankPair &rhs) const { 13 | return ((key == rhs.key) && (rank < rhs.rank)) || (key < rhs.key); 14 | } 15 | }; 16 | 17 | class Comm { 18 | 19 | /* The GOAL base API assumes only a single communicator, aka MPI_COMM_WORLD. 20 | * This class provides communicator support */ 21 | 22 | private: 23 | Comm *base_comm; // pointer to root of the comm tree 24 | int id; // unique ID of this communicator, id=0 means this is MPI_COMM_WORLD 25 | int color; // if this comm was created by comm_split, this is his color 26 | std::vector key2rank; // key, world_rank, pos is new rank 27 | std::set children; 28 | int next_free_id; // only used at base comm for now 29 | 30 | Comm *find_comm_rec(int comm_id) { 31 | if (this->id == comm_id) 32 | return this; 33 | for (auto c : this->children) { 34 | Comm *r = c->find_comm_rec(comm_id); 35 | if (r != NULL) 36 | return r; 37 | } 38 | return NULL; 39 | } 40 | 41 | public: 42 | Comm() { 43 | this->base_comm = this; 44 | this->id = 0; 45 | this->next_free_id = 1; 46 | } 47 | 48 | Comm *find_comm(int comm_id) { 49 | auto r = this->base_comm->find_comm_rec(comm_id); 50 | if (r == NULL) 51 | fprintf(stderr, "Did not find comm %i\n", comm_id); 52 | return r; 53 | } 54 | 55 | int getId(void) { return this->id; } 56 | 57 | int nextId() { return this->base_comm->next_free_id++; } 58 | 59 | Comm *find_or_create_child_comm(int color) { 60 | for (auto c : this->children) { 61 | if (c->color == color) 62 | return c; 63 | } 64 | Comm *c = new Comm; 65 | c->base_comm = this->base_comm; 66 | c->id = this->base_comm->nextId(); 67 | c->color = color; 68 | return c; 69 | } 70 | 71 | void add_rank_key(int world_rank, int key) { 72 | auto p = KeyRankPair(key, world_rank); 73 | this->key2rank.push_back(p); 74 | std::sort( 75 | this->key2rank.begin(), 76 | this->key2rank 77 | .end()); // we could add a "close_comm" method and sort only once 78 | } 79 | }; -------------------------------------------------------------------------------- /src/LogGOPSim/LogGOPSim.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef GRAPH_NODE_PROPERTIES 4 | #define GRAPH_NODE_PROPERTIES 1 5 | 6 | typedef uint64_t btime_t; 7 | 8 | /* this class is CRITICAL -- keep it as SMALL as possible! 9 | * 10 | * current size: 39 bytes 11 | * 12 | */ 13 | class graph_node_properties { 14 | public: 15 | btime_t time; 16 | btime_t starttime; // only used for MSGs to identify start times 17 | #ifdef HOSTSYNC 18 | btime_t syncstart; 19 | #endif 20 | #ifdef STRICT_ORDER 21 | btime_t ts; /* this is a timestamp that determines the (original) insertion order of 22 | elements in the queue, it is increased for every new element, not for 23 | re-insertions! Needed for correctness. */ 24 | #endif 25 | uint64_t size; // number of bytes to send, recv, or time to spend in loclop 26 | uint32_t target; // partner for send/recv 27 | uint32_t host; // owning host 28 | uint32_t offset; // for Parser (to identify schedule element) 29 | uint32_t tag; // tag for send/recv 30 | uint32_t handle; // handle for network layer :-/ 31 | uint8_t proc; // processing element for this operation 32 | uint8_t nic; // network interface for this operation 33 | char type; // see below 34 | }; 35 | 36 | /* this is a comparison functor that can be used to compare and sort 37 | * operation types of graph_node_properties */ 38 | class gnp_op_comp_func { 39 | public: 40 | bool operator()(graph_node_properties x, graph_node_properties y) { 41 | if(x.type < y.type) return true; 42 | return false; 43 | } 44 | }; 45 | 46 | /* this is a comparison functor that can be used to compare and sort 47 | * graph_node_properties by time */ 48 | class aqcompare_func { 49 | public: 50 | bool operator()(graph_node_properties x, graph_node_properties y) { 51 | if(x.time > y.time) return true; 52 | #ifdef STRICT_ORDER 53 | if(x.time == y.time && x.ts > y.ts) return true; 54 | #endif 55 | return false; 56 | } 57 | }; 58 | 59 | 60 | // mnemonic defines for op type 61 | static const int OP_SEND = 1; 62 | static const int OP_RECV = 2; 63 | static const int OP_LOCOP = 3; 64 | static const int OP_MSG = 4; 65 | 66 | static const uint32_t ANY_SOURCE = ~0; 67 | static const uint32_t ANY_TAG = ~0; 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /cmake/FindUnwind.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find libunwind 2 | # Once done this will define 3 | # 4 | # Unwind_FOUND - system has libunwind 5 | # unwind::unwind - cmake target for libunwind 6 | 7 | include (FindPackageHandleStandardArgs) 8 | 9 | find_path (Unwind_INCLUDE_DIR NAMES unwind.h libunwind.h DOC "unwind include directory") 10 | find_library (Unwind_LIBRARY NAMES unwind DOC "unwind library") 11 | 12 | mark_as_advanced (Unwind_INCLUDE_DIR Unwind_LIBRARY) 13 | 14 | # Extract version information 15 | if (Unwind_LIBRARY) 16 | set (_Unwind_VERSION_HEADER ${Unwind_INCLUDE_DIR}/libunwind-common.h) 17 | 18 | if (EXISTS ${_Unwind_VERSION_HEADER}) 19 | file (READ ${_Unwind_VERSION_HEADER} _Unwind_VERSION_CONTENTS) 20 | 21 | string (REGEX REPLACE ".*#define UNW_VERSION_MAJOR[ \t]+([0-9]+).*" "\\1" 22 | Unwind_VERSION_MAJOR "${_Unwind_VERSION_CONTENTS}") 23 | string (REGEX REPLACE ".*#define UNW_VERSION_MINOR[ \t]+([0-9]+).*" "\\1" 24 | Unwind_VERSION_MINOR "${_Unwind_VERSION_CONTENTS}") 25 | string (REGEX REPLACE ".*#define UNW_VERSION_EXTRA[ \t]+([0-9]+).*" "\\1" 26 | Unwind_VERSION_PATCH "${_Unwind_VERSION_CONTENTS}") 27 | 28 | set (Unwind_VERSION ${Unwind_VERSION_MAJOR}.${Unwind_VERSION_MINOR}) 29 | 30 | if (CMAKE_MATCH_0) 31 | # Third version component may be empty 32 | set (Unwind_VERSION ${Unwind_VERSION}.${Unwind_VERSION_PATCH}) 33 | set (Unwind_VERSION_COMPONENTS 3) 34 | else (CMAKE_MATCH_0) 35 | set (Unwind_VERSION_COMPONENTS 2) 36 | endif (CMAKE_MATCH_0) 37 | endif (EXISTS ${_Unwind_VERSION_HEADER}) 38 | endif (Unwind_LIBRARY) 39 | 40 | # handle the QUIETLY and REQUIRED arguments and set Unwind_FOUND to TRUE 41 | # if all listed variables are TRUE 42 | find_package_handle_standard_args (Unwind 43 | REQUIRED_VARS Unwind_INCLUDE_DIR Unwind_LIBRARY 44 | VERSION_VAR Unwind_VERSION 45 | ) 46 | 47 | if (Unwind_FOUND) 48 | if (NOT TARGET unwind::unwind) 49 | add_library (unwind::unwind INTERFACE IMPORTED) 50 | 51 | set_property (TARGET unwind::unwind PROPERTY 52 | INTERFACE_INCLUDE_DIRECTORIES ${Unwind_INCLUDE_DIR} 53 | ) 54 | set_property (TARGET unwind::unwind PROPERTY 55 | INTERFACE_LINK_LIBRARIES ${Unwind_LIBRARY} 56 | ) 57 | set_property (TARGET unwind::unwind PROPERTY 58 | IMPORTED_CONFIGURATIONS RELEASE 59 | ) 60 | endif (NOT TARGET unwind::unwind) 61 | endif (Unwind_FOUND) 62 | -------------------------------------------------------------------------------- /src/Schedgen/trace_reader.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #include "schedgen.hpp" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | //#define HAVE_BOOST_IO 20 | 21 | #ifdef HAVE_BOOST_IO 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #endif 29 | 30 | class TraceReader { 31 | private: 32 | std::ifstream trace; 33 | enum {BZ2, NORM} type; 34 | 35 | #ifdef HAVE_BOOST_IO 36 | boost::iostreams::filtering_streambuf inbz2; 37 | #endif 38 | 39 | public: 40 | TraceReader(std::string fname) { 41 | trace.open(fname.c_str(),std::ios::in); 42 | 43 | //boost::cmatch m; 44 | //static const boost::regex e(".*\\.bz2$"); 45 | //if(regex_match(fname.c_str(), m, e)) type = BZ2; 46 | if(NULL!=strstr(fname.c_str(), ".bz2")) { 47 | #ifdef HAVE_BOOST_IO 48 | type = BZ2; 49 | #else 50 | std::cerr << "bz2 not supported (anymore)\n"; 51 | _exit(10); 52 | #endif 53 | } else type=NORM; 54 | 55 | #ifdef HAVE_BOOST_IO 56 | if(type == BZ2) { 57 | inbz2.push(boost::iostreams::bzip2_decompressor()); 58 | inbz2.push(trace); 59 | } 60 | #endif 61 | } 62 | 63 | bool is_open() { 64 | return trace.is_open(); 65 | } 66 | 67 | std::streampos tellg() { 68 | return trace.tellg(); 69 | } 70 | 71 | void seekg(std::streampos pos) { 72 | trace.seekg(pos); 73 | } 74 | 75 | bool getline(char* s, int n) { 76 | bool eof=0; 77 | if(type == BZ2) { 78 | #ifdef HAVE_BOOST_IO 79 | int pos = 0; 80 | while(1) { 81 | std::string line; 82 | char z = boost::iostreams::get(inbz2); 83 | if(z == '\n') break; 84 | if(z == EOF) { eof=1; break; } 85 | s[pos++] = z; 86 | } 87 | s[pos]='\0'; 88 | #endif 89 | } else { 90 | trace.getline(s,n); 91 | eof = trace.eof(); 92 | } 93 | //std::cout << "getline " << s << "\n"; 94 | return eof; 95 | } 96 | }; 97 | -------------------------------------------------------------------------------- /src/Schedgen/schedgen_cmdline.ggo: -------------------------------------------------------------------------------- 1 | package "schedgen" 2 | version "0.1" 3 | 4 | option "ptrn" p "Name of the communication pattern that should be used to generate a schedule" 5 | values="binomialtreereduce","binarytreebcast","binomialtreebcast","nwaydissemination","pipelinedring","pipelinedringdep","doublering","gather","scatter","linbarrier","trace","dissemination","random_bisect","random_bisect_fd_sym","linear_alltoall","linear_alltoallv","allreduce_recdoub","allreduce_ring","resnet","chained_dissem" 6 | default="binomialtreebcast" string optional 7 | option "commsize" s "Number of nodes that should be used in the communication pattern" default="8" int optional 8 | option "timemult" - "Time multiplier, relative to microsecond (e.g., nanoseconds -> 1000)" default="1000" int optional 9 | option "datasize" d "Number of bytes that are transmitted in the communication patttern in a basic step" default="1" int optional 10 | option "filename" o "Filename for the name of the generated schedule" default="schedule.goal" string optional 11 | option "nway" - "Fanout for the n-way dissemination pattern" default="1" int optional 12 | option "root" - "Root node for certain patterns" default="0" int optional 13 | option "segmentsize" - "Segment size for pipelined pattern" default="1" int optional 14 | option "nb" - "nonblocking execution of collectives (provide length of local operation)" default="0" int optional 15 | option "nb-poll" - "polling interval for nonblocking execution" default="0" int optional 16 | option "cpu" - "select CPU to execute computation" default="0" int optional 17 | option "rpl-dep-cmp" - "replaces dependencies with fixed computation (parameter represents computation time. If -1, does not replace dependencies.)" default="-1" int optional 18 | option "a2av-skew-ratio" - "In the alltoallv pattern, each chunk of data sent by each rank is of a random size. The size of the chunk with id 'root' is of size 'datasize'. All the other chunks are of random size, and a2av-skew-ratio times smaller." default="1" int optional 19 | option "outcast" - "Generates outcast in the alltoallv pattern." flag off 20 | option "traces" - "Tracefile for rank 0 (others are autodetected)" string optional 21 | option "traces-start" - "file with start lines in trace files (is updated after run if trace-nops is given)" string optional 22 | option "traces-nops" - "number of operations to write to file (0=all)" default="0" int optional 23 | option "traces-extr" - "extrapolation factor for traces" default="1" int optional 24 | option "traces-print" - "print each operation" default="0" int optional 25 | option "traces-nop2p" - "do not consider point-to-point communication" flag off 26 | option "traces-nocolls" - "do not consider collective communication" flag off 27 | option "traces-nbcify" - "turn blocking collectives into non-blocking colls, time to pre-post NBCs" default="0" int optional 28 | 29 | -------------------------------------------------------------------------------- /src/Drawviz/TimelineDrawing.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "cmdline.h" 19 | 20 | class overh { 21 | public: 22 | int type; // 1 = osend, 2 = orecv 23 | int rank; 24 | int cpu; 25 | uint64_t start; 26 | uint64_t end; 27 | float r; 28 | float g; 29 | float b; 30 | }; 31 | 32 | class trans { 33 | public: 34 | int source; 35 | int dest; 36 | uint64_t starttime; 37 | uint64_t endtime; 38 | int size; 39 | int G; 40 | int r; 41 | int g; 42 | int b; 43 | }; 44 | 45 | class TimelineDrawing { 46 | 47 | private: 48 | gengetopt_args_info args_info; 49 | 50 | PSDoc *psdoc; 51 | int psfont; 52 | int fontsize; 53 | 54 | int numranks; 55 | double ranksep; 56 | int numcpus; 57 | double cpusep; 58 | double timesep; 59 | 60 | int width; 61 | int height; 62 | int leftmargin; 63 | 64 | std::string content; 65 | 66 | std::vector overheads; 67 | std::vector transmissions; 68 | 69 | void calc_arrowhead_coords(int sx, int sy, int dx, int dy, int *x1, int *y1, int *x2, int *y2); 70 | void add_ranknum(int); 71 | public: 72 | 73 | TimelineDrawing(gengetopt_args_info _args_info) : args_info(_args_info) {}; 74 | 75 | void init_graph(int numranks, int numcpus, int width, int height, std::string filename); 76 | void close_graph(); 77 | void draw_everything(int maxtime); 78 | 79 | void draw_ranklines(); 80 | void draw_osend(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b); 81 | void draw_orecv(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b); 82 | void draw_transmission(int source, int dest, uint64_t starttime, uint64_t endtime, int size, int G, float r, float g, float b); 83 | void draw_loclop(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b); 84 | void draw_noise(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b); 85 | void draw_seperator(int rank, int cpu, int pos); 86 | 87 | void add_osend(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b); 88 | void add_orecv(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b); 89 | void add_transmission(int source, int dest, uint64_t starttime, uint64_t endtime, int size, int G, float r, float g, float b); 90 | void add_loclop(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b); 91 | void add_noise(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b); 92 | }; 93 | 94 | 95 | -------------------------------------------------------------------------------- /cmake/FindGraphviz.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Graphviz 2 | # Once done this will define 3 | # 4 | # GRAPHVIZ_FOUND - system has Graphviz 5 | # GRAPHVIZ_INCLUDE_DIRS - Graphviz include directories 6 | # GRAPHVIZ_CGRAPH_LIBRARY - Graphviz CGRAPH library 7 | # GRAPHVIZ_VERSION - Graphviz version 8 | # 9 | # This module reads hints about search locations from the following cmake variables: 10 | # GRAPHVIZ_ROOT - Graphviz installation prefix 11 | # (containing bin/, include/, etc.) 12 | 13 | # Copyright (c) 2009, Adrien Bustany, 14 | # Copyright (c) 2013-2014 Kevin Funk 15 | 16 | # Version computation and some cleanups by Allen Winter 17 | # Copyright (c) 2012-2014 Klarälvdalens Datakonsult AB, a KDAB Group company 18 | 19 | # Simplified script by Dogan Can 20 | # Copyright (c) 2014 University of Southern California 21 | 22 | # Redistribution and use is allowed according to the terms of the GPLv3+ license. 23 | 24 | 25 | if(GRAPHVIZ_ROOT) 26 | set(_GRAPHVIZ_INCLUDE_DIR ${GRAPHVIZ_ROOT}/include) 27 | set(_GRAPHVIZ_LIBRARY_DIR ${GRAPHVIZ_ROOT}/lib) 28 | endif() 29 | 30 | find_path(GRAPHVIZ_INCLUDE_DIR NAMES graphviz/cgraph.h 31 | HINTS ${_GRAPHVIZ_INCLUDE_DIR}) 32 | find_library(GRAPHVIZ_CGRAPH_LIBRARY NAMES cgraph 33 | HINTS ${_GRAPHVIZ_LIBRARY_DIR}) 34 | 35 | if(GRAPHVIZ_INCLUDE_DIR AND GRAPHVIZ_CGRAPH_LIBRARY) 36 | set(GRAPHVIZ_FOUND TRUE) 37 | else() 38 | set(GRAPHVIZ_FOUND FALSE) 39 | endif() 40 | 41 | # Ok, now compute the version 42 | if(GRAPHVIZ_FOUND) 43 | set(FIND_GRAPHVIZ_VERSION_SOURCE 44 | "#include \n#include \n int main()\n {\n printf(\"%s\",PACKAGE_VERSION);return 1;\n }\n") 45 | set(FIND_GRAPHVIZ_VERSION_SOURCE_FILE ${CMAKE_BINARY_DIR}/CMakeTmp/FindGRAPHVIZ.cxx) 46 | file(WRITE "${FIND_GRAPHVIZ_VERSION_SOURCE_FILE}" "${FIND_GRAPHVIZ_VERSION_SOURCE}") 47 | 48 | set(FIND_GRAPHVIZ_VERSION_ADD_INCLUDES 49 | "-DINCLUDE_DIRECTORIES:STRING=${GRAPHVIZ_INCLUDE_DIR}") 50 | 51 | try_run(RUN_RESULT COMPILE_RESULT 52 | ${CMAKE_BINARY_DIR} 53 | ${FIND_GRAPHVIZ_VERSION_SOURCE_FILE} 54 | CMAKE_FLAGS "${FIND_GRAPHVIZ_VERSION_ADD_INCLUDES}" 55 | RUN_OUTPUT_VARIABLE GRAPHVIZ_VERSION) 56 | 57 | if(COMPILE_RESULT AND RUN_RESULT EQUAL 1) 58 | message(STATUS "Graphviz version: ${GRAPHVIZ_VERSION}") 59 | else() 60 | message(FATAL_ERROR "Unable to compile or run the graphviz version detection program.") 61 | endif() 62 | 63 | set(GRAPHVIZ_INCLUDE_DIRS ${GRAPHVIZ_INCLUDE_DIR} ${GRAPHVIZ_INCLUDE_DIR}/graphviz) 64 | 65 | if(NOT Graphviz_FIND_QUIETLY) 66 | message(STATUS "Graphviz include: ${GRAPHVIZ_INCLUDE_DIRS}") 67 | message(STATUS "Graphviz libraries: ${GRAPHVIZ_CGRAPH_LIBRARY}") 68 | endif() 69 | endif() 70 | 71 | if(Graphviz_FIND_REQUIRED AND NOT GRAPHVIZ_FOUND) 72 | message(FATAL_ERROR "Could not find GraphViz.") 73 | endif() 74 | -------------------------------------------------------------------------------- /src/LogGOPSim/Goal.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "Parser.hpp" 6 | 7 | typedef Node* goalop_t; 8 | 9 | class Goal { 10 | 11 | private: 12 | Graph graph; 13 | uint32_t rank; 14 | uint32_t num_ranks; 15 | 16 | uint8_t MaxCPU(uint8_t cpu = 0) { 17 | static uint8_t max_cpu = 0; 18 | if (cpu > max_cpu) max_cpu = cpu; 19 | return max_cpu; 20 | } 21 | 22 | uint8_t MaxNIC(uint8_t nic = 0) { 23 | static uint8_t max_nic = 0; 24 | if (nic > max_nic) max_nic = nic; 25 | return max_nic; 26 | } 27 | 28 | public: 29 | 30 | goalop_t Send(uint32_t src, uint32_t dest, uint64_t size, uint32_t tag, uint8_t cpu, uint8_t nic) { 31 | 32 | Node* n = graph.addNode(); 33 | 34 | n->Type = OPTYPE_SEND; 35 | n->Peer = dest; 36 | n->Tag = tag; 37 | n->Proc = cpu; 38 | n->Nic = nic; 39 | n->Size = size; 40 | 41 | MaxCPU(cpu); 42 | MaxNIC(nic); 43 | 44 | return n; 45 | } 46 | 47 | goalop_t Recv(uint32_t src, uint32_t dest, uint64_t size, uint32_t tag, uint8_t cpu, uint8_t nic) { 48 | 49 | Node* n = graph.addNode(); 50 | 51 | n->Type = OPTYPE_RECV; 52 | n->Peer = src; 53 | n->Tag = tag; 54 | n->Proc = cpu; 55 | n->Nic = nic; 56 | n->Size = size; 57 | 58 | MaxCPU(cpu); 59 | MaxNIC(nic); 60 | 61 | return n; 62 | } 63 | 64 | goalop_t Calc(uint32_t src, uint64_t size, uint8_t cpu, uint8_t nic) { 65 | 66 | Node* n = graph.addNode(); 67 | 68 | n->Type = OPTYPE_CALC; 69 | n->Peer = 0; // this optype has not real peer, i just set it so it is clearly defined 70 | n->Tag = 0; // this optype has not real tag, i just set it so it is clearly defined 71 | n->Proc = cpu; 72 | n->Nic = nic; 73 | n->Size = size; 74 | 75 | MaxCPU(cpu); 76 | MaxNIC(nic); 77 | 78 | return n; 79 | } 80 | 81 | void StartDependency(goalop_t src, goalop_t dest) { 82 | // a can not be executed before b is started 83 | graph.addStartDependency(src, dest); 84 | } 85 | 86 | void Dependency(goalop_t src, goalop_t dest) { 87 | //a can not be executed before b is finished 88 | graph.addDependency(src, dest); 89 | } 90 | 91 | void SerializeSchedule(char* filename) { 92 | 93 | static int fd; 94 | 95 | // create/open binary schedule if it is the first rank (rank 0) 96 | if (rank==0) { 97 | fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR); 98 | if (fd == -1) { 99 | fprintf(stderr, "Couldn't open %s for schedule serialization!\n", filename); 100 | perror("system error message:"); 101 | exit(EXIT_FAILURE); 102 | } 103 | } 104 | 105 | graph.serialize_mmap(fd, rank, num_ranks, MaxCPU(), MaxNIC()); 106 | 107 | // close the binary schedule if it is the last rank 108 | if (rank == num_ranks-1) { 109 | close(fd); 110 | sync(); 111 | } 112 | 113 | } 114 | 115 | void SetRank(uint32_t r) { 116 | rank=r; 117 | } 118 | 119 | void SetNumRanks(uint32_t nr) { 120 | num_ranks = nr; 121 | } 122 | 123 | }; 124 | -------------------------------------------------------------------------------- /doc/README: -------------------------------------------------------------------------------- 1 | # README file for the LogGOPSim simulator 2 | 3 | Installing 4 | ---------- 5 | 6 | * prerequisites to build: 7 | - C++ compiler (e.g., g++) 8 | - re2c - http://re2c.org/ 9 | - gengetopt - http://www.gnu.org/software/gengetopt/gengetopt.html 10 | - libagraph - http://www.graphviz.org/ 11 | 12 | * build: 13 | - optional: edit Makefile (change CXX and/or CXXFLAGS) 14 | - make 15 | 16 | Running 17 | ------- 18 | 19 | * write or generate a GOAL schedule or use one of the example 20 | schedules (e.g., dissemination_16.goal or binary_tree_16.goal) 21 | 22 | * convert schedule to binary format using txt2bin: 23 | - e.g., txt2bin -i dissemination_16.goal -o dissemination_16.bin 24 | 25 | * execute simulation with default parameters (see LogGOPSim --help for 26 | more options): 27 | - e.g., LogGOPSim -f dissemination_16.bin 28 | 29 | * interpret output: 30 | - for small simulations, each host end time is printed (22000ns for 31 | our example with default parameters) 32 | - for larger runs, only the maximum time is printed 33 | 34 | Visualization 35 | ------------- 36 | 37 | * run LogGOPSim with -V option: 38 | - e.g., LogGOPSim -f dissemination_16.bin -V viz.out 39 | 40 | * compile DrawViz (simple "make") 41 | 42 | * run DrawViz (only for smaller simulations): 43 | - e.g., drawviz -i viz.out -o viz.eps 44 | 45 | * view postscript output: 46 | - e.g., gv viz.eps 47 | 48 | MPI Matching Data 49 | ------------------ 50 | 51 | * run LogGOPSim with -qstat option: 52 | - e.g., LogGOPSim -f dissemination_16.bin -stat mpi-matching will produce several 53 | files containing MPI match queue data with names that have the form mpi-matching.*.data 54 | 55 | * additional information on the MPI matching data is available in README-mpi-matching 56 | 57 | Schedgen - automatic GOAL schedule generator 58 | -------------------------------------------- 59 | 60 | * compile SchedGen (simple "make") 61 | 62 | * run schedgen to generate schedules for collective operations: 63 | - e.g., schedgen -p binomialtreebcast -s 32 -o binary_tree_32.goal 64 | (generates a binomial tree brodacast pattern with 32 processes, 65 | the GOAL schedule can be converted to the binary simulator input 66 | with txt2bin) 67 | 68 | * run schedgen to generate schedules for application traces collected 69 | with liballprof-0.9: 70 | - traces need to be collected by linking liballprof as PMPI layer 71 | with an MPI application. Sample traces are included in the 72 | distribution in liballprof-samples 73 | - e.g., schedgen -p trace --traces liballprof-samples/sweep3d-2x2/pmpi-trace-rank-0.txt -o sweep-4.goal 74 | - convert and simulate: 75 | - e.g., txt2bin -i sweep-4.goal -o sweep-4.bin 76 | LogGOPSim -f sweep-4.bin 77 | 78 | Citation 79 | -------- 80 | 81 | Any published work which uses this software should include the following 82 | citation: 83 | ---------------------------------------------------------------------- 84 | T. Hoefler, T. Schneider, A. Lumsdaine: LogGOPSim ­ Simulating 85 | Large-Scale Applications in the LogGOPS Model 86 | ---------------------------------------------------------------------- 87 | -------------------------------------------------------------------------------- /src/LogGOPSim/TimelineVisualization.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class TimelineVisualization { 11 | 12 | private: 13 | std::string content; 14 | bool enable; 15 | std::string filename; 16 | 17 | void add_ranknum(int numranks) { 18 | 19 | std::stringstream os; 20 | os << "numranks " << numranks << ";\n"; 21 | this->content.append(os.str()); 22 | 23 | } 24 | 25 | void write_events(bool append) { 26 | 27 | std::ofstream myfile; 28 | if (append) myfile.open(filename.c_str(), std::ios::out | std::ios::app); 29 | else myfile.open(filename.c_str(), std::ios::out); 30 | if (myfile.is_open()) { 31 | myfile << this->content; 32 | myfile.close(); 33 | } 34 | else { 35 | fprintf(stderr, "Unable to open %s\n", filename.c_str()); 36 | } 37 | 38 | } 39 | 40 | 41 | 42 | public: 43 | 44 | TimelineVisualization(gengetopt_args_info *args_info, int p) { 45 | this->enable = args_info->vizfile_given; 46 | if(!enable) return; 47 | 48 | filename = args_info->vizfile_arg; 49 | add_ranknum(p); 50 | } 51 | 52 | ~TimelineVisualization() { 53 | if(!enable) return; 54 | 55 | write_events(false); 56 | } 57 | 58 | void add_osend(int rank, uint64_t start, uint64_t end, int cpu, float r=0.0, float g=0.0, float b=1.0) { 59 | if(!enable) return; 60 | 61 | std::stringstream outstream; 62 | outstream << "osend " << rank << " " << cpu << " " << start << " " << end << " " << r << " " << g << " " << b << ";\n"; 63 | this->content.append(outstream.str()); 64 | 65 | } 66 | 67 | void add_orecv(int rank, uint64_t start, uint64_t end, int cpu, float r=0.0, float g=0.0, float b=1.0) { 68 | if(!enable) return; 69 | 70 | std::stringstream os; 71 | os << "orecv " << rank << " " << cpu << " " << start << " " << end << " " << r << " " << g << " " << b << ";\n"; 72 | this->content.append(os.str()); 73 | 74 | } 75 | 76 | void add_loclop(int rank, uint64_t start, uint64_t end, int cpu, float r=1.0, float g=0.0, float b=0.0) { 77 | if(!enable) return; 78 | 79 | std::stringstream os; 80 | os << "loclop " << rank << " " << cpu << " " << start << " " << end << " " << r << " " << g << " " << b << ";\n"; 81 | this->content.append(os.str()); 82 | 83 | } 84 | 85 | void add_noise(int rank, uint64_t start, uint64_t end, int cpu, float r=0.0, float g=1.0, float b=0.0) { 86 | if(!enable) return; 87 | 88 | std::stringstream os; 89 | os << "noise " << rank << " " << cpu << " " << start << " " << end << " " << r << " " << g << " " << b << ";\n"; 90 | this->content.append(os.str()); 91 | 92 | } 93 | 94 | void add_transmission(int source, int dest, uint64_t starttime, uint64_t endtime, int size, int G, float r=0.0, float g=0.0, float b=1.0) { 95 | if(!enable) return; 96 | 97 | std::stringstream os; 98 | os << "transmission " << source << " " << dest << " " << starttime << " "; 99 | os << endtime << " " << size << " " << G << " " << r << " " << g << " " << b << ";\n"; 100 | this->content.append(os.str()); 101 | } 102 | }; 103 | -------------------------------------------------------------------------------- /src/LogGOPSim/binary_tree_32.goal: -------------------------------------------------------------------------------- 1 | num_ranks 32 2 | 3 | rank 0 { 4 | l1: send 1b to 1 tag 0 5 | l2: send 1b to 2 tag 0 6 | l3: send 1b to 4 tag 0 7 | l4: send 1b to 8 tag 0 8 | l5: send 1b to 16 tag 0 9 | } 10 | 11 | rank 1 { 12 | l1: recv 1b from 0 tag 0 13 | l2: send 1b to 3 tag 0 14 | l2 requires l1 15 | l3: send 1b to 5 tag 0 16 | l3 requires l1 17 | l4: send 1b to 9 tag 0 18 | l4 requires l1 19 | l5: send 1b to 17 tag 0 20 | l5 requires l1 21 | } 22 | 23 | rank 2 { 24 | l1: recv 1b from 0 tag 0 25 | l2: send 1b to 6 tag 0 26 | l2 requires l1 27 | l3: send 1b to 10 tag 0 28 | l3 requires l1 29 | l4: send 1b to 18 tag 0 30 | l4 requires l1 31 | } 32 | 33 | rank 3 { 34 | l1: recv 1b from 1 tag 0 35 | l2: send 1b to 7 tag 0 36 | l2 requires l1 37 | l3: send 1b to 11 tag 0 38 | l3 requires l1 39 | l4: send 1b to 19 tag 0 40 | l4 requires l1 41 | } 42 | 43 | rank 4 { 44 | l1: recv 1b from 0 tag 0 45 | l2: send 1b to 12 tag 0 46 | l2 requires l1 47 | l3: send 1b to 20 tag 0 48 | l3 requires l1 49 | } 50 | 51 | rank 5 { 52 | l1: recv 1b from 1 tag 0 53 | l2: send 1b to 13 tag 0 54 | l2 requires l1 55 | l3: send 1b to 21 tag 0 56 | l3 requires l1 57 | } 58 | 59 | rank 6 { 60 | l1: recv 1b from 2 tag 0 61 | l2: send 1b to 14 tag 0 62 | l2 requires l1 63 | l3: send 1b to 22 tag 0 64 | l3 requires l1 65 | } 66 | 67 | rank 7 { 68 | l1: recv 1b from 3 tag 0 69 | l2: send 1b to 15 tag 0 70 | l2 requires l1 71 | l3: send 1b to 23 tag 0 72 | l3 requires l1 73 | } 74 | 75 | rank 8 { 76 | l1: recv 1b from 0 tag 0 77 | l2: send 1b to 24 tag 0 78 | l2 requires l1 79 | } 80 | 81 | rank 9 { 82 | l1: recv 1b from 1 tag 0 83 | l2: send 1b to 25 tag 0 84 | l2 requires l1 85 | } 86 | 87 | rank 10 { 88 | l1: recv 1b from 2 tag 0 89 | l2: send 1b to 26 tag 0 90 | l2 requires l1 91 | } 92 | 93 | rank 11 { 94 | l1: recv 1b from 3 tag 0 95 | l2: send 1b to 27 tag 0 96 | l2 requires l1 97 | } 98 | 99 | rank 12 { 100 | l1: recv 1b from 4 tag 0 101 | l2: send 1b to 28 tag 0 102 | l2 requires l1 103 | } 104 | 105 | rank 13 { 106 | l1: recv 1b from 5 tag 0 107 | l2: send 1b to 29 tag 0 108 | l2 requires l1 109 | } 110 | 111 | rank 14 { 112 | l1: recv 1b from 6 tag 0 113 | l2: send 1b to 30 tag 0 114 | l2 requires l1 115 | } 116 | 117 | rank 15 { 118 | l1: recv 1b from 7 tag 0 119 | l2: send 1b to 31 tag 0 120 | l2 requires l1 121 | } 122 | 123 | rank 16 { 124 | l1: recv 1b from 0 tag 0 125 | } 126 | 127 | rank 17 { 128 | l1: recv 1b from 1 tag 0 129 | } 130 | 131 | rank 18 { 132 | l1: recv 1b from 2 tag 0 133 | } 134 | 135 | rank 19 { 136 | l1: recv 1b from 3 tag 0 137 | } 138 | 139 | rank 20 { 140 | l1: recv 1b from 4 tag 0 141 | } 142 | 143 | rank 21 { 144 | l1: recv 1b from 5 tag 0 145 | } 146 | 147 | rank 22 { 148 | l1: recv 1b from 6 tag 0 149 | } 150 | 151 | rank 23 { 152 | l1: recv 1b from 7 tag 0 153 | } 154 | 155 | rank 24 { 156 | l1: recv 1b from 8 tag 0 157 | } 158 | 159 | rank 25 { 160 | l1: recv 1b from 9 tag 0 161 | } 162 | 163 | rank 26 { 164 | l1: recv 1b from 10 tag 0 165 | } 166 | 167 | rank 27 { 168 | l1: recv 1b from 11 tag 0 169 | } 170 | 171 | rank 28 { 172 | l1: recv 1b from 12 tag 0 173 | } 174 | 175 | rank 29 { 176 | l1: recv 1b from 13 tag 0 177 | } 178 | 179 | rank 30 { 180 | l1: recv 1b from 14 tag 0 181 | } 182 | 183 | rank 31 { 184 | l1: recv 1b from 15 tag 0 185 | } 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The LogGOPSim Toolchain 2 | ======================= 3 | 4 | The tools in this repository are centered around LogGOPSim, a network simulator 5 | based on the LogGP model. 6 | 7 | For a full explanation of this model, please see the referenced publications. But in 8 | short this model (as implemented in LogGOPSim) provides the following: 9 | 10 | * Matching semantics similar to MPI, i.e., a send matches a specific receive, thus both sender and receiver can influence matching, and dependencies between recv and send operations can be expressed, thus real-world applications can be simulated (unlike other simulators which rely on predefined traffic patterns). 11 | * Nessages take a uniform amount of time between any pair of hosts, regardless of other traffic (there are extensions of LogGOPSim which change that), thus large-scale simulations can be performed relatively fast, compared to packet-based simulators. 12 | 13 | Parts of the toolchain 14 | ====================== 15 | 16 | * LogGOPSim: The simulator itself. It consumes a GOAL binary file, which specifies the actions (send and receive) of each host in the simulated network and produces a timing report, i.e., the time at which each host finishes its execution (among other data). 17 | * Schedgen: While it is possible to write a GOAL file for LogGOPSim by hand, this is not advised. Instead, the Schedgen tool can be used to create such files. Schedgen can produce GOAL files for single MPI collective operations, but also allows to produce GOAL files which mimic the communication patterns observed in ML training workloads. It can also convert traces of MPI applications into the GOAL format. In case Schedgen does not offer the communication pattern you want to simulate, it can be extended using a C++ or Python API. 18 | * Schedgen2: An experimental re-implementation of Schedgen in Python - while this offers features that Schedgen lacks it misses many things stil. 19 | * Txt2bin: The output of Schedgen is produced in a human-readable text format, which makes it easy to debug schedules, however, for large scale simulations the limiting resource is memory/cache, thus we convert the GOAL file into a space-efficient binary format before feeding it into LogGOPSim. The txt2bin tool performs this conversion. When invoking LogGOPSim, the user has the option of allowing "destructive reading" of the binary schedule, i.e., the input file is memory mapped and modified during the execution to limit further reduce the amount of memory required during large simulations. 20 | * liballprof: A wrapper library around MPI which records all MPI calls, including their non-data arguments, the MPI traces produced can be converted into the GOAL format by Schedgen. 21 | 22 | 23 | Building the toolchain 24 | ====================== 25 | 26 | On a recent Debian-based distro such as Ubuntu you can install the build dependencies with something like 27 | ``` 28 | sudo apt-get install cmake gengetopt re2c libgraphviz-dev python3 libclang-15-dev llvm-15-dev python3-clang-15 openmpi-bin openmpi-common libopenmpi-dev libunwind-dev 29 | ``` 30 | YMMV, but this is what we use in our CI pipeline. 31 | 32 | 33 | This project uses cmake as its build tool: 34 | ``` 35 | git clone [This repo] 36 | cd LogGOPSim 37 | mkdir build 38 | cd build 39 | cmake ../src/CMakeLists.txt 40 | make 41 | ``` 42 | 43 | Simple usage example 44 | ==================== 45 | 46 | ``` 47 | # we assume we are in the build folder, i.e., completed the steps above 48 | ./schedgen --commsize 20 --datasize 1024 --ptrn binomialtreereduce -o example.goal # generate a GOAL text file for a simple pattern (a reduction using a binomial tree, for 20 hosts, each host contributing 1024 bytes) 49 | ./txt2bin -i example.goal -o example.bin # convert the GOAL text file into the binary format required by LogGOPSim 50 | ./LogGOPSim -f example.bin # run LogGOPSim with default parameters (see output below, try running with --help to see how to change them) 51 | LogGP network backend; size: 8 (1 CPUs, 1 NICs); L=2500, o=1500 g=1000, G=6, O=0, P=8, S=65535 52 | PERFORMANCE: Processes: 8 Events: 21 Time: 0 s Speed: inf ev/s 53 | Times: 54 | Host 0: 34914 55 | Host 1: 24776 56 | Host 2: 13138 57 | Host 3: 13138 58 | Host 4: 1500 59 | Host 5: 1500 60 | Host 6: 1500 61 | Host 7: 1500 62 | ``` 63 | -------------------------------------------------------------------------------- /src/Schedgen/schedgen.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #ifndef SCHEDGEN_HPP 12 | #define SCHEDGEN_HPP 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | //#include 20 | #include "buffer_element.hpp" 21 | #include "schedgen_cmdline.h" 22 | 23 | class Goal; 24 | 25 | class LocOp { 26 | private: 27 | double time_mult; // multiplier - relative to microseconds 28 | public: 29 | int cpu; // cpu to execute on 30 | enum t_type { 31 | IREQU, 32 | REQU 33 | }; // type of preceding op (I{send,recv} or {send,recv}) 34 | std::vector> prev, 35 | next; // preceding and next operations - pairs of id and type 36 | Goal *goal; // goal object 37 | double start; // start time for this local operation 38 | 39 | LocOp(Goal *_goal, double _time_mult, int cpu) 40 | : time_mult(_time_mult), cpu(cpu), goal(_goal), start(0) {} 41 | void NextOp(double time, double tend); 42 | }; 43 | 44 | #include "goal_comm.h" 45 | 46 | class Goal { 47 | 48 | public: 49 | Comm *comm; 50 | typedef int t_id; // identifier type 51 | static const t_id NO_ID; // invalid identifier 52 | 53 | typedef std::vector> 54 | locop; /* used to identify local operations for dependencies, it's a 55 | vector of pairs of < id , irequ | requ > */ 56 | 57 | Goal(gengetopt_args_info *args_info, int nranks); 58 | ~Goal(); 59 | 60 | void StartOp() { // this starts an operatio 61 | start.clear(); 62 | end.clear(); 63 | } 64 | 65 | int BuildComm_split(int base_comm, int rank_in_world_comm, int color, 66 | int key) { 67 | Comm *c = this->comm->find_comm(base_comm); 68 | Comm *nc = c->find_or_create_child_comm(color); 69 | nc->add_rank_key(rank_in_world_comm, key); 70 | return nc->getId(); 71 | } 72 | 73 | std::pair EndOp() { 74 | locop rstart, rend; 75 | std::set::iterator it; 76 | for (it = start.begin(); it != start.end(); it++) { 77 | rstart.push_back(std::make_pair(*it, LocOp::REQU)); 78 | } 79 | for (it = end.begin(); it != end.end(); it++) { 80 | rend.push_back(std::make_pair(*it, LocOp::REQU)); 81 | } 82 | return std::make_pair(rstart, rend); 83 | } 84 | 85 | void SetTag(uint64_t tag) { curtag = tag; } 86 | void StartRank(int rank); 87 | void Comment(std::string c); 88 | int Send(std::vector buf, int dest); 89 | int Send(int size, int dest); 90 | int Recv(std::vector, int src); 91 | int Recv(int size, int src); 92 | int Exec(std::string opname, btime_t size, int proc); 93 | int Exec(std::string opname, std::vector buf); 94 | int Exec(std::string opname, btime_t size); 95 | void Requires(int tail, int head); 96 | void Irequires(int tail, int head); 97 | void EndRank(); 98 | void Write(); 99 | void AppendString(std::string); 100 | 101 | private: 102 | std::set start, 103 | end; /* the operations which are independent at start and end */ 104 | std::string schedule; 105 | std::string filename; 106 | std::fstream myfile; 107 | 108 | /* nonblocking stuff */ 109 | bool nb; 110 | int poll_int; 111 | int nbfunc; 112 | int cpu; 113 | std::vector ranks_init; 114 | 115 | t_id id_counter; 116 | int dummynode; 117 | int sends, recvs, execs, ranks, reqs; 118 | uint64_t curtag; 119 | 120 | void read_schedule_from_file(); 121 | }; 122 | 123 | template std::vector make_vector(T x) { 124 | std::vector y; 125 | y.push_back(x); 126 | return y; 127 | }; 128 | 129 | // prototype 130 | void process_trace(gengetopt_args_info *args_info); 131 | void create_binomial_tree_bcast_rank(Goal *goal, int root, int comm_rank, 132 | int comm_size, int datasize); 133 | void create_binomial_tree_reduce_rank(Goal *goal, int root, int comm_rank, 134 | int comm_size, int datasize); 135 | void create_dissemination_rank(Goal *goal, int comm_rank, int comm_size, 136 | int datasize); 137 | void create_linear_alltoall_rank(Goal *goal, int src_rank, int comm_size, 138 | int datasize); 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | project(LogGOPSim LANGUAGES C CXX Fortran) 3 | 4 | set(CMAKE_BUILD_TYPE RelWithDebInfo) 5 | 6 | # If several versions of a package provide CMake packages, try to use the latest one. 7 | set(CMAKE_FIND_PACKAGE_SORT_ORDER NATURAL) 8 | set(CMAKE_FIND_PACKAGE_SORT_DIRECTION DEC) 9 | 10 | set(CMAKE_CXX_STANDARD 11) 11 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/../cmake" ${CMAKE_MODULE_PATH}) 12 | 13 | include(${CMAKE_MODULE_PATH}/gengetopt.cmake) 14 | include(${CMAKE_MODULE_PATH}/re2c.cmake) 15 | find_gengetopt() 16 | find_re2c() 17 | 18 | # build loggopsim 19 | find_package(Graphviz REQUIRED) 20 | include_directories(${GRAPHVIZ_INCLUDE_DIRS}) 21 | add_gengetopt_files(LogGOPSim/loggopsim_cmdline) 22 | add_executable(LogGOPSim LogGOPSim/loggopsim_cmdline.c LogGOPSim/LogGOPSim.cpp) 23 | target_link_libraries(LogGOPSim ${GRAPHVIZ_CGRAPH_LIBRARY}) 24 | 25 | # build txt2bin 26 | add_gengetopt_files(LogGOPSim/txt2bin_cmdline) 27 | add_re2c_files(LogGOPSim/txt2bin) 28 | add_executable(txt2bin LogGOPSim/txt2bin.cpp LogGOPSim/txt2bin_cmdline.c) 29 | 30 | # build Schedgen1 (Schedgen2 is pure python) 31 | add_gengetopt_files(Schedgen/schedgen_cmdline) 32 | add_executable(schedgen Schedgen/buffer_element.cpp Schedgen/schedgen.cpp Schedgen/process_trace.cpp Schedgen/schedgen_cmdline.c) 33 | 34 | # build liballprof1 35 | find_package(MPI) 36 | include(FortranCInterface) 37 | FortranCInterface_VERIFY() 38 | FortranCInterface_HEADER(fc_mangle.h) 39 | include_directories(SYSTEM ${MPI_INCLUDE_PATH}) 40 | add_custom_command(OUTPUT lap1_mpi_c_wrapper.c COMMAND ${PROJECT_SOURCE_DIR}/liballprof/gencode.py c > lap1_mpi_c_wrapper.c DEPENDS ${PROJECT_SOURCE_DIR}/liballprof/mpi_header.h) 41 | add_custom_command(OUTPUT lap1_mpi_f_wrapper.c COMMAND ${PROJECT_SOURCE_DIR}/liballprof/gencode.py f77 > lap1_mpi_f_wrapper.c DEPENDS ${PROJECT_SOURCE_DIR}/liballprof/mpi_header.h) 42 | #include_directories(liballprof) 43 | #add_library(mpipclog1 lap1_mpi_c_wrapper.c liballprof/sync.c) 44 | #add_library(mpipflog1 lap1_mpi_f_wrapper.c liballprof/sync.c) 45 | 46 | 47 | # build liballprof2 48 | find_package(Clang) 49 | find_package(Unwind) 50 | add_custom_target(mpi_header_avail DEPENDS ${PROJECT_SOURCE_DIR}/liballprof2/mpi.h) 51 | add_custom_target(mpi_semantics_avail DEPENDS mpi_sem.yml) 52 | add_custom_target(mpi_wrapper_generated DEPENDS mpi_c_wrapper.c mpi_f_wrapper.c ) 53 | add_custom_command(OUTPUT mpi_c_wrapper.c mpi_f_wrapper.c COMMAND ${PROJECT_SOURCE_DIR}/liballprof2/gencode.py -s mpi_sem.yml DEPENDS mpi_semantics_avail) 54 | add_custom_command(OUTPUT mpi_sem.yml COMMAND ${PROJECT_SOURCE_DIR}/liballprof2/gensem.py -l ${CLANG_INSTALL_PREFIX}/lib -m ${PROJECT_SOURCE_DIR}/liballprof2/mpi.h DEPENDS mpi_header_avail) 55 | add_library(mpipclog SHARED mpi_c_wrapper.c) 56 | add_library(mpipflog SHARED mpi_f_wrapper.c) 57 | add_dependencies(mpipclog mpi_wrapper_generated) 58 | add_dependencies(mpipflog mpi_wrapper_generated) 59 | target_link_libraries(mpipclog ${MPI_C_LIBRARIES}) 60 | target_link_libraries(mpipflog ${MPI_Fortran_LIBRARIES}) 61 | if (Unwind_FOUND) 62 | target_link_libraries(mpipclog unwind::unwind) 63 | target_link_libraries(mpipflog unwind::unwind) 64 | endif() 65 | 66 | include(CTest) 67 | # test the c++ toolchain for some pattern 68 | add_test(NAME schedgen_binomialtreebcast COMMAND schedgen -o schedule.goal --commsize 8 --ptrn binomialtreebcast) 69 | add_test(NAME txt2bin_binomialtreebcast COMMAND txt2bin -i schedule.goal -o schedule.bin DEPENDS schedgen_binomialtreebcast) 70 | add_test(NAME loggopsim_binomialtreebcast COMMAND LogGOPSim -f schedule.bin DEPENDS txt2bin_binomialtreebcast) 71 | 72 | # test the python toolchain for some pattern 73 | add_test(NAME schedgen2_bcast COMMAND ${PYTHON_EXECUTABLE} schedgen.py bcast --output schedule.goal WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/Schedgen2) 74 | add_test(NAME txt2bin_schedgen2_bcast COMMAND txt2bin -i ${PROJECT_SOURCE_DIR}/Schedgen2/schedule.goal -o schedule.bin) 75 | add_test(NAME loggopsim_schedgen2_bcast COMMAND LogGOPSim -f schedule.bin) 76 | 77 | # test liballprof 78 | 79 | # test liballprof2 80 | #c wrapper test 81 | add_executable(test_lap2_c ${PROJECT_SOURCE_DIR}/../tests/mpi_helloworld.c) 82 | target_link_libraries(test_lap2_c mpipclog) 83 | add_test(NAME trace_lap2_c COMMAND ${MPIEXEC_EXECUTABLE} --host localhost:4 ${MPIEXEC_NUMPROC_FLAG} 4 $) 84 | add_test(NAME lap2_c_trace_exists COMMAND ${CMAKE_COMMAND} -E cat lap2-trace-rank-1-of-4.txt) 85 | 86 | # fortran wrapper test 87 | add_executable(test_lap2_f ${PROJECT_SOURCE_DIR}/../tests/mpi_helloworld.f90) 88 | target_link_libraries(test_lap2_f mpipflog) 89 | add_test(NAME trace_lap2_f COMMAND ${MPIEXEC_EXECUTABLE} --host localhost:4 ${MPIEXEC_NUMPROC_FLAG} 4 $) 90 | add_test(NAME lap2_f_trace_exists COMMAND ${CMAKE_COMMAND} -E cat lap2-trace-rank-1-of-4.txt ) 91 | 92 | -------------------------------------------------------------------------------- /src/liballprof2/tracer_main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define UNW_LOCAL_ONLY //we do not need to unwind frames in another process 7 | #include 8 | 9 | #define LAP2_TRANSFER_BUFFER_SIZE 1024 10 | #define LAP2_BACKTRACE_BUF_SIZE 4096 11 | #define WRITE_TRACE(fmt, args...) fprintf(lap_fptr, fmt, args) 12 | 13 | FILE* lap_fptr = NULL; 14 | char* lap_backtrace_buf = NULL; 15 | int lap_initialized = 0; 16 | int lap_mpi_initialized = 0; 17 | 18 | int lap_tracing_enabled = 1; 19 | int lap_backtrace_enabled = 1; 20 | int lap_elem_tracing_enabled = 1; 21 | 22 | 23 | static void init_back_trace(void) { 24 | 25 | } 26 | 27 | static void lap_get_full_backtrace(char* buf, size_t len) { 28 | size_t written = 0; 29 | unw_cursor_t cursor; 30 | unw_context_t context; 31 | 32 | // Initialize cursor to current frame for local unwinding. 33 | unw_getcontext(&context); 34 | unw_init_local(&cursor, &context); 35 | 36 | // Unwind frames one by one, going up the frame stack. 37 | while (unw_step(&cursor) > 0) { 38 | unw_word_t offset, pc; 39 | unw_get_reg(&cursor, UNW_REG_IP, &pc); 40 | if (pc == 0) { 41 | break; 42 | } 43 | written += snprintf(&buf[written], len-written, "0x%lx:", pc); 44 | 45 | char sym[256]; 46 | if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset) == 0) { 47 | written += snprintf(&buf[written], len-written, " (%s+0x%lx) <- ", sym, offset); 48 | } else { 49 | written += snprintf(&buf[written], len-written, "NO_SYMBOL "); 50 | } 51 | } 52 | if (written>0) written -= 4; 53 | buf[written] = '\0'; 54 | } 55 | 56 | static void lap_check(void) { 57 | if (lap_mpi_initialized == 0) PMPI_Initialized(&lap_mpi_initialized); 58 | if (lap_initialized) return; 59 | lap_fptr = tmpfile(); //write to a tmpfile, we don't know our rank yet, until MPI is initialized 60 | lap_backtrace_buf = malloc(LAP2_BACKTRACE_BUF_SIZE); 61 | assert(lap_backtrace_buf); 62 | assert(lap_fptr); 63 | init_back_trace(); 64 | lap_initialized = 1; 65 | } 66 | 67 | 68 | static void lap_collect_traces(void) { 69 | int comm_rank, comm_size; 70 | PMPI_Comm_rank(MPI_COMM_WORLD, &comm_rank); 71 | PMPI_Comm_size(MPI_COMM_WORLD, &comm_size); 72 | int trace_size = ftell(lap_fptr); 73 | fseek(lap_fptr, 0, SEEK_SET); 74 | int* trace_sizes = malloc(comm_size * sizeof(int)); 75 | if (trace_sizes == NULL) { 76 | fprintf(stderr, "lap2 ran out of memory when collecting traces :(\n"); 77 | return; 78 | } 79 | void* chunkbuf = malloc(LAP2_TRANSFER_BUFFER_SIZE); 80 | if (chunkbuf == NULL) { 81 | fprintf(stderr, "lap2 ran out of memory when collecting traces, decrease LAP2_TRANSFER_BUFFER_SIZE=%i :(\n", LAP2_TRANSFER_BUFFER_SIZE); 82 | return; 83 | } 84 | PMPI_Gather(&trace_size, 1, MPI_INT, trace_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD); 85 | if (comm_rank == 0) { 86 | for (int r=0; r"), LogGOPSim will produce the following set of files: 34 | 35 | * -rq-max.data : this file contains one line per rank, each line contains the maximum 36 | number of elements observed in the RQ 37 | 38 | * -rq-hit.data : this file contains one line per rank, each line contains list of 39 | space-separated pairs. Each pair has the form: 40 | 41 | , 42 | 43 | Each successful search of the RQ results in the creation of a new pair 44 | (i.e., the number of pairs for a given rank corresponds to the number of 45 | successful searches, i.e., hits, of the RQ). The value of 46 | represents how many elements were searched before a match was found. The 47 | value of represents the point in simulated time (in 48 | nanoseconds since the start of the simulation) at which the search 49 | occurred. 50 | 51 | * -rq-miss.data : this file contains one line per rank, each line contains list of 52 | space-separated pairs. Each pair has the form: 53 | 54 | , 55 | 56 | Each unsuccessful search of the RQ results in the creation of a new pair 57 | (i.e., the number of pairs for a given rank corresponds to the number of 58 | unsuccessful searches, i.e., misses, of the RQ). The value of 59 | represents the size of the RQ when the search failed, i.e., 60 | how many elements were searched trying to find a match. The value of 61 | represents the point in simulated time (in nanoseconds 62 | since the start of the simulation) at which the search occurred. 63 | 64 | * -uq-max.data : this file contains one line per rank, each line contains the maximum 65 | number of elements observed in the UQ 66 | 67 | * -uq-hit.data : this file contains one line per rank, each line contains list of 68 | space-separated pairs. Each pair has the form: 69 | 70 | , 71 | 72 | Each successful search of the UQ results in the creation of a new pair 73 | (i.e., the number of pairs for a given rank corresponds to the number of 74 | successful searches, i.e., hits, of the UQ). The value of 75 | represents how many elements were searched before a match was found. The 76 | value of represents the point in simulated time (in 77 | nanoseconds since the start of the simulation) at which the search occurred. 78 | 79 | * -uq-miss.data : this file contains one line per rank, each line contains list of 80 | space-separated pairs. Each pair has the form: 81 | 82 | , 83 | 84 | Each unsuccessful search of the UQ results in the creation of a new pair 85 | (i.e., the number of pairs for a given rank corresponds to the number of 86 | unsuccessful searches, i.e., misses, of the UQ). The value of 87 | represents the size of the RQ when the search failed, i.e., 88 | how many elements were searched trying to find a match. The value of 89 | represents the point in simulated time (in nanoseconds 90 | since the start of the simulation) at which the search occurred. 91 | 92 | Example 93 | ------- 94 | For an example of the data that can be collected using this option, see: 95 | 96 | Ferreira, Levy, Pedretti and Grant. "Characterizing MPI matching via trace-based simulation", 97 | Parallel Computing, volume 77, pages 57-83 (2018). 98 | 99 | Questions 100 | --------- 101 | Questions regarding this feature may be directed to: 102 | 103 | Scott Levy (sllevy@sandia.gov) 104 | Kurt Ferreira (kbferre@sandia.gov) 105 | -------------------------------------------------------------------------------- /src/LogGOPSim/Noise.hpp: -------------------------------------------------------------------------------- 1 | #include "loggopsim_cmdline.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | class Noise { 10 | private: 11 | int p; 12 | std::vector > trcnoise; // read NG noise 13 | uint64_t trctime; // how long is the trace 14 | 15 | std::vector ranktime; // time in trcnoise for each rank 16 | std::vector injected_noise; // counts total injected noise per node 17 | static const int max_report=64; // maximum number of nodes to report for 18 | 19 | public: 20 | 21 | Noise(gengetopt_args_info *args_info, int p) : p(p) { 22 | 23 | if(args_info->noise_trace_given) { 24 | const int size=1024; 25 | char buffer[size]; 26 | std::ifstream trace; 27 | trace.open(args_info->noise_trace_arg); 28 | if(!trace.is_open()) { 29 | std::cerr << "couldn't read noise trace file: " << args_info->noise_trace_arg << " - exiting\n"; 30 | throw(10); 31 | } 32 | 33 | bool eof=false; 34 | int line=0; 35 | while(!eof) { 36 | line++; 37 | trace.getline(buffer,size); 38 | 39 | if(buffer[0] == '#') continue; 40 | 41 | double offset, duration; 42 | // format: line ::= \t - all times in nanoseconds 43 | sscanf(buffer, "%lf\t%lf", &offset, &duration); 44 | 45 | //std::cout << offset << " " << duration << "\n"; 46 | trcnoise.push_back(std::make_pair((uint64_t)round(offset), (uint64_t)round(duration))); 47 | 48 | eof = trace.eof(); 49 | } 50 | 51 | if(((trcnoise.end()-1)->first-trcnoise.begin()->first) > (double)std::numeric_limits::max()) { 52 | std::cerr << " the length of the noise-trace ("<<(trcnoise.end()-1)->first-trcnoise.begin()->first<<" ns) is can not be saved in 'uint64_t' (max: "<<(double)std::numeric_limits::max()<<") - exiting\n"; 53 | throw(11); 54 | } 55 | 56 | //trctime = ((trcnoise.end()-1)->first-trcnoise.begin()->first); 57 | trctime = (trcnoise.end()-1)->first; 58 | std::cout << "Noisegen: read " << trcnoise.size() << " noise events spanning " << trctime/1e9 << "s "; 59 | if(args_info->noise_cosched_given) 60 | std::cout << "(coscheduling)\n"; 61 | else 62 | std::cout << "(independent)\n"; 63 | 64 | std::mt19937 mtrand(time(0)); 65 | double cosched_starttime = ((double)mtrand()/mtrand.max())*((double)trctime); 66 | for(int i=0; inoise_cosched_given) { 68 | ranktime.push_back((uint64_t)cosched_starttime); 69 | } else { 70 | double starttime = ((double)mtrand()/mtrand.max())*((double)trctime); 71 | ranktime.push_back((uint64_t)starttime); 72 | //printf("%i %llu %llu\n", i, (uint64_t)starttime, trctime); 73 | } 74 | if (p<=max_report) injected_noise.push_back(0); 75 | } 76 | } 77 | } 78 | 79 | ~Noise() { 80 | // if we have trace data 81 | if(trcnoise.size()) { 82 | // only print noise for small runs 83 | if (p<=max_report) { 84 | std::cout << "noise per rank: "; 85 | for(int i=0; i trcnoise[0].first) { 109 | // do binary search for pos where trcnoise[pos].first is the 110 | // biggest element that is smaller than trcstart 111 | unsigned int min=0, max=trcnoise.size()-1; 112 | do { 113 | pos=(min+max) / 2; 114 | if(trcstart > trcnoise[pos].first) { 115 | min = pos+1; 116 | } else { 117 | max = pos-1; 118 | } 119 | } while((trcstart != trcnoise[pos].first) && (min < max)); 120 | 121 | // the binary search doesn't necessarily find the right interval, 122 | // however, it brings us close 123 | while( !( // we loop until we have: 124 | (trcnoise[pos].first <= trcstart) && // pos is smaller or equal than trcstart 125 | (trcnoise[pos+1].first > trcstart) // pos+1 is larger than trcstart 126 | ) ) { 127 | if(trcnoise[pos].first > trcstart) pos--; 128 | else pos++; 129 | }; 130 | // compute the endtime of the last event 131 | endlastevent = trcnoise[pos].first+trcnoise[pos].second; 132 | } 133 | 134 | // if last event reached into starttime - then it influenced me :) 135 | if(endlastevent>trcstart) { 136 | noise += endlastevent-trcstart; 137 | } 138 | 139 | //if(noise > 100000) std::cout << trcnoise[pos].first << " " << trcstart << " " << trcnoise[pos+1].first << "\n"; 140 | 141 | btime_t end = trcstart+oplength; 142 | 143 | // if we're at the end of samples - wrap around 144 | if(pos == trcnoise.size()-1) { 145 | end -= trctime; // adjust end time 146 | pos = 0; // set position to first 147 | } 148 | 149 | // if we reach into next sample - then add the whole time of sample 150 | while(end > trcnoise[pos+1].first) { 151 | pos++; 152 | noise += trcnoise[pos].second; 153 | /*if(noise > 100000) { 154 | std::cout << "inner " << trcnoise[pos].first << " pos: " << pos << " start-end: " << trcstart << "-" << end << " end: " << end << " noise: " << noise << " trctime: " << trctime << "\n"; 155 | return 0;}*/ 156 | 157 | // if we're at the end of samples - wrap around 158 | if(pos == trcnoise.size()-1) { 159 | end -= trctime; // adjust end time 160 | pos = 0; // set position to first 161 | } 162 | } 163 | 164 | // do *NOT* update ranktime because starttime is absolut 165 | 166 | //if (noise > 100000) std::cout << "injected " << noise << " ns noise in " << endtime-starttime << "ns\n"; 167 | if (p<=max_report) injected_noise[r] += noise; 168 | } 169 | 170 | assert(noise >= 0); 171 | return noise; 172 | } 173 | }; 174 | -------------------------------------------------------------------------------- /src/Drawviz/Drawviz.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "TimelineDrawing.hpp" 19 | #include "cmdline.h" 20 | 21 | 22 | int main(int argc, char **argv) { 23 | 24 | gengetopt_args_info args_info; 25 | 26 | if (cmdline_parser(argc, argv, &args_info) != 0) { 27 | fprintf(stderr, "Couldn't parse command line arguments!\n"); 28 | exit(EXIT_FAILURE); 29 | } 30 | 31 | std::string line; 32 | std::ifstream myfile(args_info.inputfile_arg); 33 | 34 | int rank_num = 0; 35 | int maxtime = 0; 36 | int maxcpu = 0; 37 | bool interval = false; 38 | 39 | if (args_info.endtime_arg > 0) { 40 | interval = true; 41 | } 42 | 43 | if (myfile.is_open()) { 44 | 45 | TimelineDrawing TLViz(args_info); 46 | 47 | while (!myfile.eof()) { 48 | 49 | boost::cmatch matches; 50 | 51 | getline (myfile,line); 52 | 53 | boost::regex ranknum("numranks (\\d+);"); 54 | boost::regex osend("osend (\\d+) (\\d+) (\\d+) (\\d+) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?);"); 55 | boost::regex orecv("orecv (\\d+) (\\d+) (\\d+) (\\d+) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?);"); 56 | boost::regex loclop("loclop (\\d+) (\\d+) (\\d+) (\\d+) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?);"); 57 | boost::regex noise("noise (\\d+) (\\d+) (\\d+) (\\d+) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?);"); 58 | boost::regex transmission("transmission (\\d+) (\\d+) (\\d+) (\\d+) (\\d+) (\\d+) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?) (\\d+(?:.\\d+)?);"); 59 | boost::regex whitespace("\\w*"); 60 | 61 | if (boost::regex_match(line.c_str(), matches, osend)) { 62 | 63 | std::string ranks = matches[1]; 64 | std::string cpus = matches[2]; 65 | std::string starts = matches[3]; 66 | std::string ends = matches[4]; 67 | std::string reds = matches[5]; 68 | std::string greens = matches[6]; 69 | std::string blues = matches[7]; 70 | 71 | if ((interval==false) or ((atoi(starts.c_str()) >= args_info.starttime_arg) && (atoi(ends.c_str()) < args_info.endtime_arg))) { 72 | TLViz.add_osend(atoi(ranks.c_str()), atoi(starts.c_str())-args_info.starttime_arg, atoi(ends.c_str())-args_info.starttime_arg, atoi(cpus.c_str()), 73 | atof(reds.c_str()), atof(greens.c_str()), atof(blues.c_str()) ); 74 | if (maxtime < atoi(ends.c_str())) maxtime = atoi(ends.c_str()); 75 | if (maxcpu < atoi(cpus.c_str())) maxcpu = atoi(cpus.c_str()); 76 | } 77 | } 78 | 79 | else if (boost::regex_match(line.c_str(), matches, orecv)) { 80 | 81 | std::string ranks = matches[1]; 82 | std::string cpus = matches[2]; 83 | std::string starts = matches[3]; 84 | std::string ends = matches[4]; 85 | std::string reds = matches[5]; 86 | std::string greens = matches[6]; 87 | std::string blues = matches[7]; 88 | 89 | if ((interval==false) or ((atoi(starts.c_str()) >= args_info.starttime_arg) && (atoi(ends.c_str()) < args_info.endtime_arg))) { 90 | TLViz.add_orecv(atoi(ranks.c_str()), atoi(starts.c_str())-args_info.starttime_arg, atoi(ends.c_str())-args_info.starttime_arg, atoi(cpus.c_str()), 91 | atof(reds.c_str()), atof(greens.c_str()), atof(blues.c_str()) ); 92 | if (maxtime < atoi(ends.c_str())) maxtime = atoi(ends.c_str()); 93 | if (maxcpu < atoi(cpus.c_str())) maxcpu = atoi(cpus.c_str()); 94 | } 95 | } 96 | 97 | else if (boost::regex_match(line.c_str(), matches, loclop)) { 98 | 99 | std::string ranks = matches[1]; 100 | std::string cpus = matches[2]; 101 | std::string starts = matches[3]; 102 | std::string ends = matches[4]; 103 | std::string reds = matches[5]; 104 | std::string greens = matches[6]; 105 | std::string blues = matches[7]; 106 | 107 | if ((interval==false) or ((atoi(starts.c_str()) >= args_info.starttime_arg) && (atoi(ends.c_str()) < args_info.endtime_arg))) { 108 | TLViz.add_loclop(atoi(ranks.c_str()), atoi(starts.c_str())-args_info.starttime_arg , atoi(ends.c_str())-args_info.starttime_arg, atoi(cpus.c_str()), 109 | atof(reds.c_str()), atof(greens.c_str()), atof(blues.c_str()) ); 110 | if (maxtime < atoi(ends.c_str())) maxtime = atoi(ends.c_str()); 111 | if (maxcpu < atoi(cpus.c_str())) maxcpu = atoi(cpus.c_str()); 112 | } 113 | } 114 | 115 | else if (boost::regex_match(line.c_str(), matches, noise)) { 116 | 117 | std::string ranks = matches[1]; 118 | std::string cpus = matches[2]; 119 | std::string starts = matches[3]; 120 | std::string ends = matches[4]; 121 | std::string reds = matches[5]; 122 | std::string greens = matches[6]; 123 | std::string blues = matches[7]; 124 | 125 | if ((interval==false) or ((atoi(starts.c_str()) >= args_info.starttime_arg) && (atoi(ends.c_str()) < args_info.endtime_arg))) { 126 | TLViz.add_noise(atoi(ranks.c_str()), atoi(starts.c_str())-args_info.starttime_arg , atoi(ends.c_str())-args_info.starttime_arg, atoi(cpus.c_str()), 127 | atof(reds.c_str()), atof(greens.c_str()), atof(blues.c_str()) ); 128 | if (maxtime < atoi(ends.c_str())) maxtime = atoi(ends.c_str()); 129 | if (maxcpu < atoi(cpus.c_str())) maxcpu = atoi(cpus.c_str()); 130 | } 131 | } 132 | 133 | else if (boost::regex_match(line.c_str(), matches, transmission)) { 134 | 135 | std::string src = matches[1]; 136 | std::string dest = matches[2]; 137 | std::string start = matches[3]; 138 | std::string end = matches[4]; 139 | std::string size = matches[5]; 140 | std::string G = matches[6]; 141 | std::string reds = matches[7]; 142 | std::string greens = matches[8]; 143 | std::string blues = matches[9]; 144 | 145 | if ((interval==false) or ((atoi(start.c_str()) >= args_info.starttime_arg) && (atoi(end.c_str()) < args_info.endtime_arg))) { 146 | TLViz.add_transmission(atoi(src.c_str()), atoi(dest.c_str()), atoi(start.c_str()) - args_info.starttime_arg, 147 | atoi(end.c_str()) - args_info.starttime_arg, atoi(size.c_str()), atoi(G.c_str()), 148 | atof(reds.c_str()), atof(greens.c_str()), atof(blues.c_str()) ); 149 | 150 | int endtime = atoi(end.c_str())+atoi(G.c_str())*atoi(size.c_str()); 151 | if (maxtime < endtime ) maxtime = endtime; 152 | } 153 | } 154 | else if (boost::regex_match(line.c_str(), matches, ranknum)) { 155 | std::string ranknum = matches[1]; 156 | if (atoi(ranknum.c_str()) > rank_num) rank_num = atoi(ranknum.c_str()); 157 | } 158 | else if (boost::regex_match(line.c_str(), matches, whitespace)) { 159 | } 160 | else { 161 | std::cout << "Unamtched line: [" << line << "]" << std::endl; 162 | } 163 | 164 | } 165 | myfile.close(); 166 | 167 | TLViz.init_graph(rank_num, maxcpu+1, 800, 800, args_info.outputfile_arg); 168 | TLViz.draw_ranklines(); 169 | maxtime -= args_info.starttime_arg; 170 | TLViz.draw_everything(maxtime); 171 | TLViz.close_graph(); 172 | } 173 | else { 174 | fprintf(stderr, "Unable to open file with starttimes (%s)\n", args_info.inputfile_arg); 175 | exit(EXIT_FAILURE); 176 | } 177 | 178 | exit(EXIT_SUCCESS); 179 | } 180 | 181 | 182 | -------------------------------------------------------------------------------- /src/Schedgen2/schedgen.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import sys 4 | import json 5 | import tempfile 6 | import subprocess 7 | import argparse 8 | from mpi_colls import * 9 | from additional_microbenchmarks import * 10 | 11 | parser = argparse.ArgumentParser(description="Generate GOAL Schedules.") 12 | 13 | subparsers = parser.add_subparsers( 14 | help="Communication to generate", dest="comm", required=True 15 | ) 16 | mpi = [] 17 | additional_microbenchmarks = [] 18 | 19 | incast_parser = subparsers.add_parser("incast") 20 | additional_microbenchmarks.append(incast_parser) 21 | 22 | outcast_parser = subparsers.add_parser("outcast") 23 | additional_microbenchmarks.append(outcast_parser) 24 | 25 | dissemination_parser = subparsers.add_parser("dissemination") 26 | mpi.append(dissemination_parser) 27 | 28 | reduce_parser = subparsers.add_parser("reduce") 29 | mpi.append(reduce_parser) 30 | 31 | bcast_parser = subparsers.add_parser("bcast") 32 | mpi.append(bcast_parser) 33 | 34 | scatter_parser = subparsers.add_parser("scatter") 35 | mpi.append(scatter_parser) 36 | 37 | allreduce_parser = subparsers.add_parser("allreduce") 38 | mpi.append(allreduce_parser) 39 | 40 | alltoall_parser = subparsers.add_parser("alltoall") 41 | mpi.append(alltoall_parser) 42 | 43 | alltoallv_parser = subparsers.add_parser("alltoallv") 44 | mpi.append(alltoallv_parser) 45 | 46 | for p in additional_microbenchmarks: 47 | p.add_argument( 48 | "--randomized_data", 49 | dest="randomized_data", 50 | action="store_true", 51 | help="Use unbalanced data sizes", 52 | ) 53 | 54 | for p in [allreduce_parser, alltoall_parser, alltoallv_parser]: 55 | p.add_argument( 56 | "--num_comm_groups", 57 | dest="num_comm_groups", 58 | type=int, 59 | default=1, 60 | help="Number of communication groups, >1 for multi-allreduce and multi-alltoall(v)", 61 | ) 62 | 63 | for p in mpi + additional_microbenchmarks: 64 | p.add_argument( 65 | "--ptrn", 66 | dest="ptrn", 67 | choices=["datasize_based", "binomialtree", "recdoub", "ring", "linear"], 68 | default="datasize_based", 69 | help="Pattern to use for communication, note that not all patterns are available for all communication types", 70 | ) 71 | p.add_argument( 72 | "--ptrn-config", 73 | dest="ptrn_config", 74 | help="Configuration file for the pattern to use with data size based selection to override the default configuration", 75 | ) 76 | p.add_argument( 77 | "--comm_size", 78 | dest="comm_size", 79 | type=int, 80 | default=8, 81 | help="Size of the communicator", 82 | ) 83 | p.add_argument( 84 | "--datasize", 85 | dest="datasize", 86 | type=int, 87 | default=8, 88 | help="Size of the data, i.e., for reduce operations", 89 | ) 90 | p.add_argument( 91 | "--window_size", 92 | dest="window_size", 93 | type=int, 94 | default=0, 95 | help="Window size for windowed linear communication patterns", 96 | ) 97 | p.add_argument( 98 | "--compute_time_dependency", 99 | dest="compute_time_dependency", 100 | type=int, 101 | default=0, 102 | help="Compute time that is to be inserted in between send operations", 103 | ) 104 | p.add_argument( 105 | "--output", 106 | dest="output", 107 | default="stdout", 108 | help="Output file", 109 | ) 110 | p.add_argument( 111 | "--ignore_verification", 112 | dest="ignore_verification", 113 | action="store_true", 114 | help="Ignore verification of parameters", 115 | ) 116 | p.add_argument( 117 | "--config", 118 | dest="config", 119 | help="Configuration file, takes precedence over other parameters", 120 | ) 121 | p.add_argument( 122 | "--txt2bin", 123 | dest="txt2bin", 124 | help="Path to txt2bin executable", 125 | ) 126 | 127 | 128 | def verify_params(args): 129 | if args.ignore_verification: 130 | return 131 | assert args.comm_size > 0, "Communicator size must be greater than 0." 132 | assert args.datasize > 0, "Data size must be greater than 0." 133 | assert ( 134 | args.txt2bin is None or args.output != "stdout" 135 | ), "Cannot use txt2bin with stdout" 136 | assert ( 137 | args.ptrn != "recdoub" or args.comm_size & (args.comm_size - 1) == 0 138 | ), "Currently recdoub pattern requires a power of 2 communicator size." 139 | 140 | 141 | def comm_to_func(comm: str) -> callable: 142 | """ 143 | Convert a communication type to a function that generates the communication. 144 | 145 | :param comm: The communication type. 146 | :return: A function that generates the communication. 147 | """ 148 | 149 | if comm == "incast": 150 | return incast 151 | elif comm == "outcast": 152 | return outcast 153 | elif comm == "reduce": 154 | return reduce 155 | elif comm == "bcast": 156 | return bcast 157 | elif comm == "scatter": 158 | return scatter 159 | elif comm == "dissemination": 160 | return dissemination 161 | elif comm == "allreduce": 162 | return allreduce 163 | elif comm == "alltoall": 164 | return alltoall 165 | elif comm == "alltoallv": 166 | return alltoallv 167 | else: 168 | raise ValueError(f"Communication type {comm} not implemented") 169 | 170 | 171 | def multi(collective: callable, num_comm_groups: int, comm_size: int, **kwargs): 172 | comm = GoalComm(comm_size * num_comm_groups) 173 | comms = comm.CommSplit( 174 | color=[i // comm_size for i in range(comm_size * num_comm_groups)], 175 | key=[i % comm_size for i in range(comm_size * num_comm_groups)], 176 | ) 177 | for comm_split in comms: 178 | comm_collective = collective(comm_size=comm_size, **kwargs) 179 | comm_split.Append(comm_collective) 180 | return comm 181 | 182 | 183 | args = parser.parse_args() 184 | if args.config is not None: 185 | with open(args.config, "r") as f: 186 | config = json.load(f) 187 | for k, v in config.items(): 188 | setattr(args, k, v) 189 | 190 | if args.ptrn == "datasize_based": 191 | if args.comm in [p.prog.split()[-1] for p in mpi]: 192 | args.ptrn = mpi_communication_pattern_selection( 193 | args.comm, args.comm_size, args.datasize 194 | ) 195 | elif args.comm in [p.prog.split()[-1] for p in additional_microbenchmarks]: 196 | args.ptrn = "linear" 197 | else: 198 | raise ValueError( 199 | f"Communication type {args.comm} does not currently support data size based pattern selection" 200 | ) 201 | 202 | verify_params(args) 203 | args.tag = 42 204 | 205 | if ( 206 | "num_comm_groups" not in vars(args) 207 | or args.num_comm_groups is None 208 | or args.num_comm_groups <= 1 209 | ): 210 | g = comm_to_func(args.comm)(**vars(args)) 211 | else: 212 | g = multi( 213 | comm_to_func(args.comm), **vars(args) 214 | ) 215 | 216 | if args.txt2bin is not None: 217 | assert args.output != "stdout", "Cannot use txt2bin with stdout" 218 | with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: 219 | g.write_goal(fh=f) 220 | tmp_goal_file = f.name 221 | subprocess.run( 222 | [args.txt2bin, "-i", tmp_goal_file, "-o", args.output, "-p"], 223 | check=True, 224 | ) 225 | subprocess.run(["rm", tmp_goal_file], check=True) 226 | else: 227 | if args.output == "stdout": 228 | args.output = sys.stdout 229 | else: 230 | args.output = open(args.output, "w") 231 | 232 | g.write_goal(fh=args.output) 233 | if args.output != sys.stdout: 234 | args.output.close() 235 | -------------------------------------------------------------------------------- /src/Drawviz/cmdline.h: -------------------------------------------------------------------------------- 1 | /** @file cmdline.h 2 | * @brief The header file for the command line option parser 3 | * generated by GNU Gengetopt version 2.23 4 | * http://www.gnu.org/software/gengetopt. 5 | * DO NOT modify this file, since it can be overwritten 6 | * @author GNU Gengetopt */ 7 | 8 | #ifndef CMDLINE_H 9 | #define CMDLINE_H 10 | 11 | /* If we use autoconf. */ 12 | #ifdef HAVE_CONFIG_H 13 | #include "config.h" 14 | #endif 15 | 16 | #include /* for FILE */ 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif /* __cplusplus */ 21 | 22 | #ifndef CMDLINE_PARSER_PACKAGE 23 | /** @brief the program name (used for printing errors) */ 24 | #define CMDLINE_PARSER_PACKAGE "drawviz" 25 | #endif 26 | 27 | #ifndef CMDLINE_PARSER_PACKAGE_NAME 28 | /** @brief the complete program name (used for help and version) */ 29 | #define CMDLINE_PARSER_PACKAGE_NAME "drawviz" 30 | #endif 31 | 32 | #ifndef CMDLINE_PARSER_VERSION 33 | /** @brief the program version */ 34 | #define CMDLINE_PARSER_VERSION "0.1" 35 | #endif 36 | 37 | /** @brief Where the command line options are stored */ 38 | struct gengetopt_args_info 39 | { 40 | const char *help_help; /**< @brief Print help and exit help description. */ 41 | const char *version_help; /**< @brief Print version and exit help description. */ 42 | char * inputfile_arg; /**< @brief Name of the inputfile (event data). */ 43 | char * inputfile_orig; /**< @brief Name of the inputfile (event data) original value given at command line. */ 44 | const char *inputfile_help; /**< @brief Name of the inputfile (event data) help description. */ 45 | char * outputfile_arg; /**< @brief Name of the output file (postscript) (default='timeline.ps'). */ 46 | char * outputfile_orig; /**< @brief Name of the output file (postscript) original value given at command line. */ 47 | const char *outputfile_help; /**< @brief Name of the output file (postscript) help description. */ 48 | int linethickness_arg; /**< @brief Thickness of lines (default='1'). */ 49 | char * linethickness_orig; /**< @brief Thickness of lines original value given at command line. */ 50 | const char *linethickness_help; /**< @brief Thickness of lines help description. */ 51 | int starttime_arg; /**< @brief Starttime, if only a interval should be drawn (default='0'). */ 52 | char * starttime_orig; /**< @brief Starttime, if only a interval should be drawn original value given at command line. */ 53 | const char *starttime_help; /**< @brief Starttime, if only a interval should be drawn help description. */ 54 | int endtime_arg; /**< @brief Endtime, if only a interval should be drawn (default='0'). */ 55 | char * endtime_orig; /**< @brief Endtime, if only a interval should be drawn original value given at command line. */ 56 | const char *endtime_help; /**< @brief Endtime, if only a interval should be drawn help description. */ 57 | int arrowheads_flag; /**< @brief If this flag is given, arrowheads will be drawn (default=off). */ 58 | const char *arrowheads_help; /**< @brief If this flag is given, arrowheads will be drawn help description. */ 59 | int descrtext_flag; /**< @brief If this flag is given, text will be written below o_send and o_recv (default=off). */ 60 | const char *descrtext_help; /**< @brief If this flag is given, text will be written below o_send and o_recv help description. */ 61 | 62 | unsigned int help_given ; /**< @brief Whether help was given. */ 63 | unsigned int version_given ; /**< @brief Whether version was given. */ 64 | unsigned int inputfile_given ; /**< @brief Whether inputfile was given. */ 65 | unsigned int outputfile_given ; /**< @brief Whether outputfile was given. */ 66 | unsigned int linethickness_given ; /**< @brief Whether linethickness was given. */ 67 | unsigned int starttime_given ; /**< @brief Whether starttime was given. */ 68 | unsigned int endtime_given ; /**< @brief Whether endtime was given. */ 69 | unsigned int arrowheads_given ; /**< @brief Whether arrowheads was given. */ 70 | unsigned int descrtext_given ; /**< @brief Whether descrtext was given. */ 71 | 72 | } ; 73 | 74 | /** @brief The additional parameters to pass to parser functions */ 75 | struct cmdline_parser_params 76 | { 77 | int override; /**< @brief whether to override possibly already present options (default 0) */ 78 | int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */ 79 | int check_required; /**< @brief whether to check that all required options were provided (default 1) */ 80 | int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */ 81 | int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */ 82 | } ; 83 | 84 | /** @brief the purpose string of the program */ 85 | extern const char *gengetopt_args_info_purpose; 86 | /** @brief the usage string of the program */ 87 | extern const char *gengetopt_args_info_usage; 88 | /** @brief the description string of the program */ 89 | extern const char *gengetopt_args_info_description; 90 | /** @brief all the lines making the help output */ 91 | extern const char *gengetopt_args_info_help[]; 92 | 93 | /** 94 | * The command line parser 95 | * @param argc the number of command line options 96 | * @param argv the command line options 97 | * @param args_info the structure where option information will be stored 98 | * @return 0 if everything went fine, NON 0 if an error took place 99 | */ 100 | int cmdline_parser (int argc, char **argv, 101 | struct gengetopt_args_info *args_info); 102 | 103 | /** 104 | * The command line parser (version with additional parameters - deprecated) 105 | * @param argc the number of command line options 106 | * @param argv the command line options 107 | * @param args_info the structure where option information will be stored 108 | * @param override whether to override possibly already present options 109 | * @param initialize whether to initialize the option structure my_args_info 110 | * @param check_required whether to check that all required options were provided 111 | * @return 0 if everything went fine, NON 0 if an error took place 112 | * @deprecated use cmdline_parser_ext() instead 113 | */ 114 | int cmdline_parser2 (int argc, char **argv, 115 | struct gengetopt_args_info *args_info, 116 | int override, int initialize, int check_required); 117 | 118 | /** 119 | * The command line parser (version with additional parameters) 120 | * @param argc the number of command line options 121 | * @param argv the command line options 122 | * @param args_info the structure where option information will be stored 123 | * @param params additional parameters for the parser 124 | * @return 0 if everything went fine, NON 0 if an error took place 125 | */ 126 | int cmdline_parser_ext (int argc, char **argv, 127 | struct gengetopt_args_info *args_info, 128 | struct cmdline_parser_params *params); 129 | 130 | /** 131 | * Save the contents of the option struct into an already open FILE stream. 132 | * @param outfile the stream where to dump options 133 | * @param args_info the option struct to dump 134 | * @return 0 if everything went fine, NON 0 if an error took place 135 | */ 136 | int cmdline_parser_dump(FILE *outfile, 137 | struct gengetopt_args_info *args_info); 138 | 139 | /** 140 | * Save the contents of the option struct into a (text) file. 141 | * This file can be read by the config file parser (if generated by gengetopt) 142 | * @param filename the file where to save 143 | * @param args_info the option struct to save 144 | * @return 0 if everything went fine, NON 0 if an error took place 145 | */ 146 | int cmdline_parser_file_save(const char *filename, 147 | struct gengetopt_args_info *args_info); 148 | 149 | /** 150 | * Print the help 151 | */ 152 | void cmdline_parser_print_help(void); 153 | /** 154 | * Print the version 155 | */ 156 | void cmdline_parser_print_version(void); 157 | 158 | /** 159 | * Initializes all the fields a cmdline_parser_params structure 160 | * to their default values 161 | * @param params the structure to initialize 162 | */ 163 | void cmdline_parser_params_init(struct cmdline_parser_params *params); 164 | 165 | /** 166 | * Allocates dynamically a cmdline_parser_params structure and initializes 167 | * all its fields to their default values 168 | * @return the created and initialized cmdline_parser_params structure 169 | */ 170 | struct cmdline_parser_params *cmdline_parser_params_create(void); 171 | 172 | /** 173 | * Initializes the passed gengetopt_args_info structure's fields 174 | * (also set default values for options that have a default) 175 | * @param args_info the structure to initialize 176 | */ 177 | void cmdline_parser_init (struct gengetopt_args_info *args_info); 178 | /** 179 | * Deallocates the string fields of the gengetopt_args_info structure 180 | * (but does not deallocate the structure itself) 181 | * @param args_info the structure to deallocate 182 | */ 183 | void cmdline_parser_free (struct gengetopt_args_info *args_info); 184 | 185 | /** 186 | * Checks that all the required options were specified 187 | * @param args_info the structure to check 188 | * @param prog_name the name of the program that will be used to print 189 | * possible errors 190 | * @return 191 | */ 192 | int cmdline_parser_required (struct gengetopt_args_info *args_info, 193 | const char *prog_name); 194 | 195 | 196 | #ifdef __cplusplus 197 | } 198 | #endif /* __cplusplus */ 199 | #endif /* CMDLINE_H */ 200 | -------------------------------------------------------------------------------- /src/liballprof/template.c: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * liballprof MPIP Wrapper 3 | * 4 | * Copyright: Indiana University 5 | * Author: Torsten Hoefler 6 | * 7 | *************************************************************************/ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "fc_mangle.h" 20 | #define F77_FUNC FortranCInterface_GLOBAL 21 | 22 | #include "allprof.h" 23 | #include "numbers.h" 24 | #include "sync.h" 25 | 26 | #define true 1 27 | #define false 0 28 | 29 | #ifdef HAVE_NBC 30 | #include 31 | #endif 32 | 33 | #ifdef WRITER_THREAD 34 | #include 35 | #include 36 | #endif 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | 43 | 44 | 45 | 46 | #ifdef WRITER_THREAD 47 | #define VOLATILE volatile 48 | /* have a second buffer to swap */ 49 | static volatile char *buf1, *buf2, 50 | *curbuf, /* current buffer base address */ 51 | *bufptr; /* current position on buffer */ 52 | static volatile char exitflag=0; 53 | static sem_t threadsem, usersem; 54 | #else 55 | #define VOLATILE 56 | static char *buf1, 57 | *curbuf, /* current buffer base address */ 58 | *bufptr; /* position in current buf */ 59 | #endif 60 | static char buf_initialized = false; 61 | 62 | static int world_rank, world_size; 63 | 64 | static FILE *fp; 65 | static char mpi_initialized = false; 66 | 67 | static void resetbuffer(void *buffer) { 68 | memset(buffer, '\0', BUFSIZE); 69 | buf_initialized = true; 70 | } 71 | 72 | #ifdef WRITER_THREAD 73 | static void *writer_thread(void* arg) { 74 | /* loops infinitely - until exit notification received */ 75 | while(1) { 76 | char *tmpbuf; 77 | 78 | /* wait on semaphore to be notified */ 79 | sem_wait(&threadsem); 80 | 81 | /* check if exit flag is set :) */ 82 | if(exitflag) { 83 | /* write */ 84 | fputs((char*)curbuf, fp); 85 | sem_post(&usersem); 86 | break; 87 | } 88 | /* swap buffers */ 89 | if(curbuf == buf1) { 90 | curbuf=buf2; 91 | bufptr=buf2; 92 | tmpbuf=(char*)buf1; 93 | } else { 94 | curbuf=buf1; 95 | bufptr=buf1; 96 | tmpbuf=(char*)buf2; 97 | } 98 | 99 | /* notify user thread */ 100 | sem_post(&usersem); 101 | 102 | /* write buffer to disk */ 103 | fputs(tmpbuf, fp); 104 | resetbuffer(tmpbuf); 105 | } 106 | } 107 | #endif 108 | 109 | void print_banner(int rank, char *bindings, char *name, int size) { 110 | #ifdef PRINT_BANNER 111 | if(!rank) { 112 | char *env = getenv("HTOR_PMPI_FILE_PREFIX"); 113 | if(env == NULL) printf("*** htor's mpiplog in %s - %s bindings, logging %i processes to %s*%s!\n", name, bindings, size, FILE_PREFIX, FILE_SUFFIX); 114 | else printf("*** htor's mpiplog in %s - %s bindings, logging %i processes to %s*%s!\n", name, bindings, size, env, FILE_SUFFIX); 115 | } 116 | #endif 117 | } 118 | 119 | /* get \ceil log_base(i) \ceil with integer arithmetic */ 120 | int logi(int base, int x) { 121 | int log=0; 122 | int y=1; 123 | while(y <= x) { log++; y*=base; } 124 | return log; 125 | } 126 | 127 | /* pretty print numbers in buffer (add 0's to fill up to max) */ 128 | int pprint(char *buf, int len, int x, int max) { 129 | int log10x=logi(10,x); 130 | if(x==0) log10x=1; /* log_x(0) is undefined but has a single digit ;) */ 131 | int log10max=logi(10,max); 132 | int i; for(i=0; i ((unsigned long)curbuf)+BUFSIZE-THRESHOLD) writebuf((char*)curbuf); 272 | } 273 | 274 | #define IFDTYPE(DTYPE, dtypenum) \ 275 | if(type == DTYPE) { \ 276 | return snprintf(buffer, length, ":%i", dtypenum); \ 277 | } \ 278 | 279 | static int printdatatype(MPI_Datatype type, char *buffer, int length) { 280 | 281 | IFDTYPE(MPI_INT, LOG_MPI_INT) else 282 | IFDTYPE(MPI_INTEGER, LOG_MPI_INTEGER) else 283 | IFDTYPE(MPI_CHARACTER, LOG_MPI_CHARACTER) else 284 | IFDTYPE(MPI_LONG, LOG_MPI_LONG) else 285 | IFDTYPE(MPI_SHORT, LOG_MPI_SHORT) else 286 | IFDTYPE(MPI_UNSIGNED, LOG_MPI_UNSIGNED) else 287 | IFDTYPE(MPI_UNSIGNED_LONG, LOG_MPI_UNSIGNED_LONG) else 288 | IFDTYPE(MPI_UNSIGNED_SHORT, LOG_MPI_UNSIGNED_SHORT) else 289 | IFDTYPE(MPI_FLOAT, LOG_MPI_FLOAT) else 290 | IFDTYPE(MPI_REAL, LOG_MPI_REAL) else 291 | IFDTYPE(MPI_DOUBLE, LOG_MPI_DOUBLE) else 292 | IFDTYPE(MPI_DOUBLE_PRECISION, LOG_MPI_DOUBLE_PRECISION) else 293 | IFDTYPE(MPI_LONG_DOUBLE, LOG_MPI_LONG_DOUBLE) else 294 | IFDTYPE(MPI_BYTE, LOG_MPI_BYTE) else 295 | IFDTYPE(MPI_FLOAT_INT, LOG_MPI_FLOAT_INT) else 296 | IFDTYPE(MPI_DOUBLE_INT, LOG_MPI_DOUBLE_INT) else 297 | IFDTYPE(MPI_LONG_INT, LOG_MPI_LONG_INT) else 298 | IFDTYPE(MPI_2INT, LOG_MPI_2INT) else 299 | IFDTYPE(MPI_SHORT_INT, LOG_MPI_SHORT_INT) else 300 | IFDTYPE(MPI_LONG_DOUBLE_INT, LOG_MPI_LONG_DOUBLE_INT) else 301 | IFDTYPE(MPI_LOGICAL, LOG_MPI_LOGICAL) else 302 | IFDTYPE(MPI_COMPLEX, LOG_MPI_COMPLEX) else 303 | IFDTYPE(MPI_DOUBLE_COMPLEX, LOG_MPI_DOUBLE_COMPLEX) else 304 | return snprintf(buffer, length, ":%lu", (unsigned long)type); 305 | } 306 | 307 | #define IFOP(OP, opnum) \ 308 | if(op == OP) { \ 309 | return snprintf(buffer, length, ":%i", opnum); \ 310 | } \ 311 | 312 | static int printop(MPI_Op op, char *buffer, int length) { 313 | 314 | IFOP(MPI_MIN, LOG_MPI_MIN) else 315 | IFOP(MPI_MAX, LOG_MPI_MAX) else 316 | IFOP(MPI_SUM, LOG_MPI_SUM) else 317 | IFOP(MPI_PROD, LOG_MPI_PROD) else 318 | IFOP(MPI_LAND, LOG_MPI_LAND) else 319 | IFOP(MPI_BAND, LOG_MPI_BAND) else 320 | IFOP(MPI_LOR, LOG_MPI_LOR) else 321 | IFOP(MPI_BOR, LOG_MPI_BOR) else 322 | IFOP(MPI_LXOR, LOG_MPI_LXOR) else 323 | IFOP(MPI_BXOR, LOG_MPI_BXOR) else 324 | IFOP(MPI_MINLOC, LOG_MPI_MINLOC) else 325 | IFOP(MPI_MAXLOC, LOG_MPI_MAXLOC) else 326 | return snprintf(buffer, length, ":%lu", (unsigned long)op); 327 | } 328 | 329 | 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /src/Schedgen2/mpi_colls.py: -------------------------------------------------------------------------------- 1 | import json 2 | from goal import GoalComm 3 | from patterns import binomialtree, recdoub, ring, linear 4 | 5 | 6 | def mpi_communication_pattern_selection( 7 | algorithm: str, comm_size: int, datasize: int, ptrn_config: str = None 8 | ): 9 | if ptrn_config is not None and ptrn_config != "": 10 | # The config file should be a json file with the following format (lower bounds are inclusive, upper bounds are exclusive): 11 | # [ 12 | # { 13 | # "algorithm": "algorithm_name", # can be left empty or omitted, otherwise only matching algorithms are considered 14 | # "ptrn": "pattern_name", 15 | # "lower_bounds": { 16 | # "comm_size": -1 for no lower bound on the x-axis, 17 | # "datasize": -1 for no lower bound on the y-axis, 18 | # "combined": [(grad, intercept), (grad, intercept), ...] for the combined lower bounds 19 | # }, 20 | # "upper_bounds": { 21 | # "comm_size": -1 for no upper bound on the x-axis, 22 | # "datasize": -1 for no upper bound on the y-axis, 23 | # "combined": [(grad, intercept), (grad, intercept), ...] for the combined upper bounds 24 | # } 25 | # }, 26 | # ... 27 | # ] 28 | with open(ptrn_config, "r") as f: 29 | config = json.load(f) 30 | for c in config: 31 | if ( 32 | "algorithm" in c 33 | and c["algorithm"] != "" 34 | and c["algorithm"] != algorithm 35 | ): 36 | continue 37 | if ( 38 | c["lower_bounds"]["comm_size"] != -1 39 | and comm_size < c["lower_bounds"]["comm_size"] 40 | ): 41 | continue 42 | if ( 43 | c["upper_bounds"]["comm_size"] != -1 44 | and comm_size >= c["upper_bounds"]["comm_size"] 45 | ): 46 | continue 47 | if ( 48 | c["lower_bounds"]["datasize"] != -1 49 | and datasize < c["lower_bounds"]["datasize"] 50 | ): 51 | continue 52 | if ( 53 | c["upper_bounds"]["datasize"] != -1 54 | and datasize >= c["upper_bounds"]["datasize"] 55 | ): 56 | continue 57 | if c["lower_bounds"]["combined"] is not None: 58 | for grad, intercept in c["lower_bounds"]["combined"]: 59 | if datasize < grad * comm_size + intercept: 60 | continue 61 | if c["upper_bounds"]["combined"] is not None: 62 | for grad, intercept in c["upper_bounds"]["combined"]: 63 | if datasize >= grad * comm_size + intercept: 64 | continue 65 | return c["ptrn"] 66 | raise ValueError( 67 | f"Cannot find a pattern for comm_size={comm_size} and datasize={datasize} according to the config file" 68 | ) 69 | else: 70 | if algorithm == "reduce": 71 | # use binomial tree for large data size and when the communicator size is a power of 2 72 | if datasize > 4096 and comm_size & (comm_size - 1) == 0: 73 | return "binomialtree" 74 | else: 75 | return "linear" 76 | elif algorithm == "bcast": 77 | # use binomial tree for small data size and when the communicator size is a power of 2 78 | if datasize <= 4096 and comm_size & (comm_size - 1) == 0: 79 | return "binomialtree" 80 | else: 81 | return "linear" 82 | elif algorithm == "dissemination": 83 | # TODO currently not implemented to support different patterns 84 | pass 85 | elif algorithm == "allreduce": 86 | # Use recdoub for power of 2 communicator size and small data sizes 87 | if datasize <= 4096 and comm_size & (comm_size - 1) == 0: 88 | return "recdoub" 89 | else: 90 | return "ring" 91 | elif algorithm == "alltoall" or algorithm == "alltoallv": 92 | return "linear" 93 | else: 94 | raise ValueError(f"Communication type {algorithm} not implemented") 95 | 96 | 97 | def dissemination(comm_size, datasize, tag): 98 | # TODO: select or implement right pattern 99 | comm = GoalComm(comm_size) 100 | for rank in range(0, comm_size): 101 | dist = 1 102 | recv = None 103 | while dist < comm_size: 104 | send = comm.Send( 105 | src=rank, 106 | dst=(rank + dist + comm_size) % comm_size, 107 | size=datasize, 108 | tag=tag, 109 | ) 110 | if recv is not None: 111 | send.requires(recv) 112 | recv = comm.Recv( 113 | src=(rank - dist + comm_size) % comm_size, 114 | dst=rank, 115 | size=datasize, 116 | tag=tag, 117 | ) 118 | dist *= 2 119 | return comm 120 | 121 | 122 | def scatter( 123 | comm_size: int, 124 | datasize: int, 125 | tag: int = 42, 126 | ptrn: str = "linear", 127 | **kwargs, 128 | ): 129 | if ptrn == "binomialtree": 130 | return binomialtree( 131 | comm_size=comm_size, 132 | datasize=datasize, 133 | tag=tag, 134 | algorithm="scatter", 135 | **kwargs, 136 | ) 137 | elif ptrn == "linear": 138 | return linear( 139 | comm_size=comm_size, 140 | datasize=datasize, 141 | tag=tag, 142 | algorithm="scatter", 143 | parallel=True, 144 | **kwargs, 145 | ) 146 | else: 147 | raise ValueError(f"scatter with pattern {ptrn} not implemented") 148 | 149 | 150 | def reduce( 151 | comm_size: int, 152 | datasize: int, 153 | tag: int = 42, 154 | ptrn: str = "binomialtree", 155 | **kwargs, 156 | ): 157 | if ptrn == "binomialtree": 158 | return binomialtree( 159 | comm_size=comm_size, 160 | datasize=datasize, 161 | tag=tag, 162 | algorithm="reduce", 163 | **kwargs, 164 | ) 165 | elif ptrn == "linear": 166 | return linear( 167 | comm_size=comm_size, 168 | datasize=datasize, 169 | tag=tag, 170 | algorithm="reduce", 171 | parallel=True, 172 | **kwargs, 173 | ) 174 | else: 175 | raise ValueError(f"reduce with pattern {ptrn} not implemented") 176 | 177 | 178 | def bcast( 179 | comm_size: int, 180 | datasize: int, 181 | tag: int = 42, 182 | ptrn: str = "binomialtree", 183 | **kwargs, 184 | ): 185 | if ptrn == "binomialtree": 186 | return binomialtree( 187 | comm_size=comm_size, datasize=datasize, tag=tag, algorithm="bcast", **kwargs 188 | ) 189 | elif ptrn == "linear": 190 | return linear( 191 | comm_size=comm_size, 192 | datasize=datasize, 193 | tag=tag, 194 | algorithm="bcast", 195 | parallel=True, 196 | **kwargs, 197 | ) 198 | else: 199 | raise ValueError(f"bcast with pattern {ptrn} not implemented") 200 | 201 | 202 | def allreduce( 203 | comm_size: int, 204 | datasize: int, 205 | tag: int = 42, 206 | ptrn: str = "recdoub", 207 | **kwargs, 208 | ): 209 | comms = [] # reduce-scatter and allgather 210 | if ptrn == "recdoub": 211 | comms.append( 212 | recdoub( 213 | comm_size=comm_size, 214 | datasize=datasize, 215 | tag=tag, 216 | algorithm="reduce-scatter", 217 | **kwargs, 218 | ) 219 | ) 220 | comms.append( 221 | recdoub( 222 | comm_size=comm_size, 223 | datasize=datasize, 224 | tag=tag + comm_size, 225 | algorithm="allgather", 226 | **kwargs, 227 | ) 228 | ) 229 | elif ptrn == "ring": 230 | comms.append( 231 | ring( 232 | comm_size=comm_size, 233 | datasize=datasize, 234 | tag=tag, 235 | algorithm="reduce-scatter", 236 | rounds=comm_size - 1, 237 | **kwargs, 238 | ) 239 | ) 240 | comms.append( 241 | ring( 242 | comm_size=comm_size, 243 | datasize=datasize, 244 | tag=tag + comm_size, 245 | algorithm="allgather", 246 | rounds=comm_size - 1, 247 | **kwargs, 248 | ) 249 | ) 250 | else: 251 | raise ValueError(f"allreduce with pattern {ptrn} not implemented") 252 | comms[0].Append(comms[1]) 253 | return comms[0] 254 | 255 | 256 | def alltoall( 257 | comm_size: int, 258 | datasize: int, 259 | tag: int = 42, 260 | ptrn: str = "linear", 261 | window_size: int = 0, 262 | **kwargs, 263 | ): 264 | if ptrn == "linear": 265 | return linear( 266 | comm_size=comm_size, 267 | datasize=datasize, 268 | tag=tag, 269 | algorithm="alltoall", 270 | parallel=(window_size == 0), 271 | window_size=window_size, 272 | **kwargs, 273 | ) 274 | else: 275 | raise ValueError(f"alltoall with pattern {ptrn} not implemented") 276 | 277 | 278 | def alltoallv( 279 | comm_size: int, 280 | datasize: int, 281 | tag: int = 42, 282 | ptrn: str = "linear", 283 | window_size: int = 0, 284 | **kwargs, 285 | ): 286 | # TODO: currently data is only randomized, add support for custom data sizes 287 | if ptrn == "linear": 288 | return linear( 289 | comm_size=comm_size, 290 | datasize=datasize, 291 | tag=tag, 292 | algorithm="alltoallv", 293 | parallel=(window_size == 0), 294 | randomized_data=True, 295 | window_size=window_size, 296 | **kwargs, 297 | ) 298 | else: 299 | raise ValueError(f"alltoallv with pattern {ptrn} not implemented") 300 | -------------------------------------------------------------------------------- /src/Schedgen2/process_trace.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import ast 3 | import re 4 | import glob 5 | import argparse 6 | import os 7 | from mpi_colls import allreduce, alltoall 8 | from goal import GoalComm 9 | 10 | 11 | class AllprofParser: 12 | 13 | def __init__(self, requestsize=8, verbose=False): 14 | self.comm = None 15 | self.verbose = verbose 16 | self.requests = [] # for each comm_world rank, this holds one dict, mapping reqptrs to ops 17 | self.REQUEST_SIZE = requestsize # this is the size in bytes of a MPI_Request, i.e., in waitall we step through the array using this stepsize 18 | self.last_op = {} # one pair (op, endtime) per comm_world rank 19 | 20 | def getLastOp(self, rank: int): 21 | if rank not in self.last_op: 22 | return None 23 | return self.last_op[rank] 24 | 25 | def setLastOp(self, rank: int, op, endtime: int): 26 | self.last_op[rank] = (op, endtime) 27 | 28 | def parseLine(self, rank: int, allprof_line: str): 29 | # if the line is a comment, ignore it 30 | if re.match("#.*\n", allprof_line): 31 | return 32 | # if the line is whitespace ignore it 33 | if re.match("\s*\n", allprof_line): 34 | return 35 | # check if it matches any of the defined MPI functions 36 | m = re.match("(MPI_.+?):(.+:(\d+|-))\n", allprof_line) 37 | if m: 38 | name = m.group(1) 39 | args = m.group(2) 40 | if hasattr(self, name): 41 | args = args.strip().split(":") 42 | # turn args into ints where possible (ddts, comms, ... are not ints!) 43 | newargs = [] 44 | for arg in args: 45 | newarg = 0 46 | try: 47 | newarg = int(arg) 48 | except: 49 | newarg = arg 50 | newargs.append(newarg) 51 | args = newargs 52 | args.append(rank) 53 | if self.verbose: 54 | print("Parsing "+name+" with args "+str(args)) 55 | # for each line we get its start and end time (first and last elem in args) 56 | # we add a calc of the size of the difference between the endtime of the last 57 | # operation on rank and the starttime to account for any computation that might 58 | # have happened between calls - we init last_op in MPI_Init, so it might be None 59 | if self.getLastOp(rank) is not None: 60 | tstart = int(args[0]) 61 | tend = int(args[-1]) 62 | last_op, last_endtime = self.getLastOp(rank) 63 | newCalc = self.comm[rank].Calc(tstart - last_endtime) 64 | newCalc.requires(last_op) 65 | self.setLastOp(rank, newCalc, tend) 66 | newcomm = getattr(self, name)(*args) 67 | if newcomm is not None: 68 | # append rank of newcomm to self.comm, however all independent ops in newcomm depend on last_op 69 | # and the new last_op becomes the last op in newcomm (if there is only one, otherwise we make a calc of size 0) 70 | self.comm[rank].Append(newcomm[rank], dependOn=self.getLastOp(rank)[0]) 71 | lastop = None 72 | l = newcomm[rank].LastOps() 73 | if len(l) == 1: 74 | lastop = l[0] 75 | else: 76 | lastop = self.comm[rank].Calc(0) 77 | lastop.requires(self.getLastOp(rank)[0]) # just to be save in case newcomm is empty 78 | for o in l: 79 | lastop.requires(o) 80 | self.setLastOp(rank, lastop, args[-1]) 81 | else: 82 | raise NotImplementedError("Parsing of "+allprof_line.strip()+" is not implemented yet.") 83 | else: 84 | raise ValueError("The line "+allprof_line+" doesn't look like anything allprof should output!") 85 | 86 | def MPI_Initialized(self, tstart, flagptr, tend, rank): 87 | return None # this doesn't modify the goal schedule 88 | 89 | def MPI_Init(self, tstart, argcptr, argvptr, tend, rank: int): 90 | self.setLastOp(rank, self.comm[rank].Calc(tend), tend) 91 | return None 92 | 93 | def MPI_Comm_size(self, tstart, comm, sizeptr, tend, rank): 94 | return None 95 | 96 | def MPI_Comm_rank(self, tstart, comm, rankptr, tend, rank): 97 | return None 98 | 99 | def MPI_Irecv(self, tstart, buf, count, datatype, src, tag, comm, req, tend, rank): 100 | g = GoalComm(self.comm.CommSize()) 101 | ddtsize = self.getDDTSize(datatype) 102 | op = g[rank].Recv(int(src), int(tag), int(count)*ddtsize) 103 | self.addRequest(rank, req, op) 104 | return g #TODO handle splitted comms 105 | 106 | def MPI_Isend(self, tstart, buf, count, datatype, dst, tag, comm, req, tend, rank): 107 | g = GoalComm(self.comm.CommSize()) 108 | ddtsize = self.getDDTSize(datatype) 109 | op = g[rank].Send(int(dst), int(tag), int(count)*ddtsize) 110 | self.addRequest(rank, req, op) 111 | return g #TODO handle splitted comms 112 | 113 | def MPI_Waitall(self, tstart, count, requestptr, statusptr, tend, rank): 114 | calc = None 115 | for ridx in range(0, int(count)): 116 | request = int(requestptr)+ridx*self.REQUEST_SIZE 117 | op = self.findRequest(rank, request) 118 | if op is None: 119 | print("Waitall on a request we didn't see before - might be ok if the user initialized it to MPI_REQUEST_NULL, but also might mean request size is set to the wrong constant! -- check the code of the trace app!") 120 | continue 121 | if calc is None: 122 | calc = self.comm[rank].Calc(0) 123 | calc.requires(op) 124 | # Waitall directly modifies self.comm, thus returns None and we need to handle deps from/on last op in here 125 | calc.requires(self.getLastOp(rank)[0]) 126 | self.setLastOp(rank, calc, tend) 127 | return None 128 | 129 | def MPI_Wait(self, tstart, requestptr, statusptr, tend, rank): 130 | calc = None 131 | op = self.findRequest(rank, requestptr) 132 | if op is None: 133 | print("Wait on a request we didn't see before - might be ok if the user initialized it to MPI_REQUEST_NULL, but also might mean request size is set to the wrong constant! -- check the code of the trace app!") 134 | return 135 | calc = self.comm[rank].Calc(0) 136 | calc.requires(op) 137 | # Wait directly modifies self.comm, thus returns None and we need to handle deps from/on last op in here 138 | calc.requires(self.getLastOp(rank)[0]) 139 | self.setLastOp(rank, calc, tend) 140 | return None 141 | 142 | def MPI_Barrier(self, tstart, comm, tend, rank): 143 | return alltoall(datasize=0, comm_size=self.comm.CommSize()) 144 | 145 | def MPI_Wtime(self, tstart, tend, rank): 146 | return None #this does not modify the goal schedule 147 | 148 | def MPI_Allreduce(self, tstart, sendbuf, recvbuf, count, datatype, op, comm, tend, rank): 149 | datasize = self.getDDTSize(datatype) * count 150 | return allreduce(datasize, self.comm.CommSize()) 151 | 152 | def MPI_Finalize(self, tstart, tend, rank): 153 | return None #this does not modify the goal schedule 154 | 155 | def addRequest(self, rank, req, op): 156 | self.requests[rank][int(req)] = op 157 | 158 | def findRequest(self, rank, req): 159 | if int(req) in self.requests[rank]: 160 | op = self.requests[rank][int(req)] 161 | return op 162 | return None 163 | 164 | def deleteRequest(self, rank, req): 165 | if int(req) in self.requests[rank]: 166 | self.requests[rank].pop(int(req)) 167 | 168 | def getDDTSize(self, ddtstr): 169 | return int(ddtstr.split(",")[1]) 170 | 171 | def parseDir(self, tracepath, nameptrn="pmpi-trace-rank-*.txt", abortonerror=False): 172 | self.tracepath = tracepath 173 | searchpath = os.path.join(tracepath, nameptrn) 174 | files = glob.glob(searchpath) 175 | if len(files) < 1: 176 | raise ValueError("No tracefiles found at path "+str(searchpath)) 177 | self.comm = GoalComm(len(files)) 178 | for rank in range(0, self.comm.CommSize()): 179 | self.requests.append({}) 180 | for rank in range(0, self.comm.CommSize()): 181 | file_name = str(rank).join(nameptrn.split("*")) 182 | fh = open(os.path.join(tracepath, file_name), "r") 183 | while True: 184 | line = fh.readline() 185 | if not line: 186 | if self.verbose: 187 | print("Finished parsing ranks "+str(rank)+" trace.") 188 | break 189 | else: 190 | try: 191 | self.parseLine(rank, line) 192 | except Exception as e: 193 | if abortonerror: 194 | raise e 195 | sys.exit(1) 196 | else: 197 | if self.verbose: 198 | print("There was a problem but we attempt to carry on: "+str(e)) 199 | fh.close() 200 | return self.comm 201 | 202 | 203 | 204 | if __name__ == "__main__": 205 | parser = argparse.ArgumentParser( 206 | prog='Schedgen2 Trace Parser', 207 | description='Reads an MPI trace in liballprof format and outputs a GOAL schedule (or a graphical representation of it).') 208 | parser.add_argument('-v', '--verbose', action='store_true', help="Be more verbose, i.e., print progress info.") 209 | parser.add_argument('-i', '--tracedir', required=True, help="Path to the directory containing the individual traces, each tracefile name follows nameptrn.") 210 | parser.add_argument('-n', '--nameptrn', default="pmpi-trace-rank-*.txt", help="Filename of traces, use * to indicate rank id (in MPI_COMM_WORLD), defaults to pmpi-trace-rank*.txt") 211 | parser.add_argument('-f', '--output-format', default="goal", choices=["goal", "graphviz"], help="Output format, either goal or graphviz, defaults to goal") 212 | parser.add_argument('-o', '--outfile', default="-", help="Output file name, use - for stdout (if verbose mode is on progress will be printed to stdout), defaults to -.") 213 | parser.add_argument('-r', '--requestsize', default=8, help="Size of an MPI_REQUEST in bytes, defaults to 8.") 214 | parser.add_argument('-a', '--abortonerror', action='store_true', help="By default we ignore errors such as not implemented MPI functions. Use this flag to abort on such errors.") 215 | args = parser.parse_args() 216 | p = AllprofParser(requestsize=args.requestsize, verbose=args.verbose) 217 | comm = p.parseDir(args.tracedir, nameptrn=args.nameptrn, abortonerror=args.abortonerror) 218 | outfile = sys.stdout 219 | if args.outfile != "-": 220 | outfile = open(args.outfile, "w") 221 | comm.write_goal(fh=outfile, format=args.output_format) 222 | outfile.close() 223 | 224 | -------------------------------------------------------------------------------- /src/liballprof/sync.c: -------------------------------------------------------------------------------- 1 | #include "sync.h" 2 | #include 3 | 4 | #define MAX_DOUBLE 1e100 5 | #define NUMBER_SMALLER 100 6 | static double *diffs=NULL; /* global array of all diffs to all ranks - only 7 | completely valid on rank 0 */ 8 | static double gdiff; 9 | 10 | double sync_peer(int client, int peer, MPI_Comm comm) { 11 | const double ABORT_VAL = 9999999.0; 12 | int notsmaller = 0; /* count number of RTTs that are *not* smaller than 13 | the current smallest one */ 14 | int server=0; 15 | double tstart, /* local start time */ 16 | tend, /* local end time */ 17 | trem, /* remote time */ 18 | tmpdiff, /* temporary difference to remote clock */ 19 | diff; /* difference to remote clock */ 20 | int res, r; 21 | res = PMPI_Comm_rank(comm, &r); 22 | 23 | if(!client) server = 1; 24 | 25 | double smallest = MAX_DOUBLE; /* the current smallest time */ 26 | do { 27 | /* the client sends a ping to the server and waits for a pong (and 28 | * takes the RTT time). It repeats this procedure until the last 29 | * NUMBER_SMALLER RTTs have not been smaller than the smallest 30 | * (tries to find the smallest RTT). When the smallest RTT is 31 | * found, it sends a special flag (0d) to the server that it knows 32 | * that the benchmark is finished. The client computes the diff 33 | * with this smallest RTT with the scheme described in the paper. 34 | * */ 35 | if(client) { 36 | tstart = PMPI_Wtime(); 37 | res = PMPI_Send(&tstart, 1, MPI_DOUBLE, peer, 0, comm); 38 | res = PMPI_Recv(&trem, 1, MPI_DOUBLE, peer, 0, comm, MPI_STATUS_IGNORE); 39 | tend = PMPI_Wtime(); 40 | tmpdiff = tstart + (tend-tstart)/2 - trem; 41 | 42 | if(tend-tstart < smallest) { 43 | smallest = tend-tstart; 44 | notsmaller = 0; 45 | diff = tmpdiff; /* save new smallest diff-time */ 46 | } else { 47 | if(++notsmaller == NUMBER_SMALLER) { 48 | /* send abort flag to client */ 49 | trem = ABORT_VAL; 50 | res = PMPI_Send(&trem, 1, MPI_DOUBLE, peer, 0, comm); 51 | /*printf("[%i] diff to %i: %lf\n", r, peer, diff*1e6);*/ 52 | break; 53 | } 54 | } 55 | /*printf("[%i] notsmaller: %i\n", r, notsmaller);*/ 56 | } 57 | 58 | /* The server just replies with the local time to the client 59 | * requests and aborts the benchmark if the abort flag (0d) is 60 | * received in any of the requests. */ 61 | if(server) { 62 | /* printf("[%i] server: waiting for ping from %i\n", r, peer); */ 63 | res = PMPI_Recv(&tstart, 1, MPI_DOUBLE, peer, 0, comm, MPI_STATUS_IGNORE); 64 | if(tstart == ABORT_VAL) {break;} /* this is the signal from the client to stop */ 65 | trem = PMPI_Wtime(); /* fill in local time on server */ 66 | /* printf("[%i] server: got ping from %i (%lf) \n", r, peer, tstart); */ 67 | res = PMPI_Send(&trem, 1, MPI_DOUBLE, peer, 0, comm); 68 | } 69 | /* this loop is only left with a break */ 70 | } while(1); 71 | return diff; 72 | } 73 | 74 | 75 | /* tree-based synchronization mechanism 76 | * - */ 77 | double sync_tree(MPI_Comm comm) { 78 | int p, r, res, dist, round; 79 | int power; /* biggest power of two value that is smaller or equal to p */ 80 | int peer; /* synchronization peer */ 81 | double diff; 82 | 83 | res = PMPI_Comm_rank(comm, &r); 84 | res = PMPI_Comm_size(comm, &p); 85 | 86 | /* reallocate tha diffs array with the right size */ 87 | if(diffs != NULL) free(diffs); 88 | diffs = (double*)calloc(1, p*sizeof(double)); 89 | 90 | /* check if p is power of 2 91 | { int i=1; 92 | while((i = i << 1) < p) {}; 93 | if(i != p) { 94 | printf("communicator size (%i) must be power of 2 (%i)!\n", p, i); 95 | MPI_Abort(MPI_COMM_WORLD, 1); 96 | } 97 | }*/ 98 | 99 | { /* get the maximum power of 2 that is smaller than p */ 100 | int num=1; 101 | do { 102 | num *= 2; 103 | } while(num*2 <= p); 104 | power = num; 105 | } 106 | 107 | /* if I am in the powers-of two group? */ 108 | if(r < power) { 109 | dist = 1; /* this gets left-shifted (<<) every round and is after 110 | $\lceil log_2(p) \rceil$ rounds >= p */ 111 | round = 1; /* fun and printf round counter - not really needed */ 112 | do { 113 | int client, server; 114 | 115 | client = 0; server = 0; 116 | client = ((r % (dist << 1)) == 0); 117 | server = ((r % (dist << 1)) == dist); 118 | 119 | if(server) { 120 | peer = r - dist; 121 | if(peer < 0) server = 0; /* disable yourself if there is no peer*/ 122 | /*if(server) printf("(%i) %i <- %i\n", round, r, peer);*/ 123 | } 124 | if(client) { 125 | peer = r + dist; 126 | if(peer >= p) client = 0; /* disable yourself if there is no peer*/ 127 | /*if(client) printf("(%i) %i -> %i\n", round, peer, r);*/ 128 | } 129 | if(!client && !server) break; /* TODO: leave loop if no peer left - 130 | works only for power of two process 131 | groups */ 132 | 133 | diff = sync_peer(client, peer, comm); 134 | 135 | /* diff is the time difference between client and server. This is 136 | * only valid on the client, and is derived with the following 137 | * formula: diff = tstart + (tend-tstart)/2 - trem; 138 | * example: 139 | * Client Server 140 | * tstart = 100 200 (those are local times, but at the same moment) 141 | * send message (L=10) 142 | * 110 trem = 210 143 | * send message back (L=10) 144 | * tend = 120 220 145 | * 146 | * diff = 100 + (120-100)/2 - 210 147 | * = 100 + 10 - 210 = 100 148 | * 149 | * now, to get the local time on a server on a client: 150 | * t_s = r_c - diff 151 | */ 152 | 153 | /* the client measured the time difference to his peer-server of the 154 | * current round. Since rank 0 is the global synchronization point, 155 | * rank 0's array has to be up to date and the other clients have to 156 | * communicate all their knowledge to rank 0 as described in the 157 | * paper. */ 158 | 159 | if(client) { 160 | /* all clients just measured the time difference to node r + diff 161 | * (=peer) */ 162 | diffs[peer] = diff; 163 | 164 | /* we are a client - we need to receive all the knowledge 165 | * (differences) that the server we just synchronized with holds! 166 | * Our server has been "round-1" times client and measures 167 | * "round-1" diffs */ 168 | if(round > 1) { 169 | double *recvbuf; /* receive the server's data */ 170 | int items, i; 171 | 172 | items = (1 << (round-1))-1; 173 | recvbuf = (double*)malloc(items*sizeof(double)); 174 | 175 | res = PMPI_Recv(recvbuf, items, MPI_DOUBLE, peer, 0, comm, MPI_STATUS_IGNORE); 176 | 177 | /*printf("[%i] round: %i, client merges %i items\n", r, round, items);*/ 178 | /* merge data into my own field */ 179 | for(i=0; i 1) { 194 | int i, tmpdist, tmppeer, items; 195 | double *sendbuf; 196 | 197 | items = (1 << (round-1))-1; 198 | sendbuf = (double*)malloc(items*sizeof(double)); 199 | 200 | /*printf("[%i] round: %i, server sends %i items\n", r, round, items);*/ 201 | 202 | /* fill buffer - every server holds the $2^(round-1)-1$ next 203 | * diffs */ 204 | for(i=0; i power=4 219 | * rank 0..3 are synched at this stage and rank 4 and 5 have to sync 220 | * with 0 and 1 respectively */ 221 | if(r < power) { 222 | /* check if I have a partner in the non power group */ 223 | if(p - power > r) { /* I have a partner */ 224 | peer = power + r; /* that's my partner */ 225 | /*printf("[%i] server for %i\n", r, peer);*/ 226 | sync_peer(0, peer, comm); /* I am the server */ 227 | } 228 | } else { 229 | peer = r - power; /* that's my partner */ 230 | /*printf("[%i] client for %i\n", r, peer);*/ 231 | diff = sync_peer(1, peer, comm); /* I am the client */ 232 | res = PMPI_Send(&diff, 1, MPI_DOUBLE, 0, 1, comm); 233 | } 234 | 235 | if(0 == r) { 236 | int syncpeer; 237 | MPI_Request *reqs; 238 | double *tmpdiffs; 239 | 240 | reqs = (MPI_Request*)malloc((p-power)*sizeof(MPI_Request)); 241 | tmpdiffs = (double*)malloc((p-power)*sizeof(double)); 242 | 243 | /* pre-post all recv-request to speed it up a bit */ 244 | for(peer = power; peer < p; peer++) { 245 | res = PMPI_Irecv(&tmpdiffs[peer-power], 1, MPI_DOUBLE, peer, 1, comm, &reqs[peer-power]); 246 | } 247 | 248 | PMPI_Waitall(p-power,reqs,MPI_STATUSES_IGNORE); 249 | 250 | for(peer = power; peer < p; peer++) { 251 | syncpeer = peer-power; /* the rank that 'peer' synchronized with */ 252 | diffs[peer] = diffs[peer-power] - tmpdiffs[peer-power]; 253 | } 254 | 255 | free(reqs); 256 | free(tmpdiffs); 257 | } 258 | 259 | /* scatter all the time diffs to the processes */ 260 | PMPI_Scatter(diffs, 1, MPI_DOUBLE, &gdiff, 1, MPI_DOUBLE, 0, comm); 261 | /*printf("[%i] diff_tree: %lf usec\n", r, gdiff*1e6);*/ 262 | return gdiff*1e6; 263 | } 264 | 265 | /* linear synchronization mechanism 266 | * - */ 267 | double sync_lin(MPI_Comm comm) { 268 | int p, r, res, peer=0; 269 | 270 | res = PMPI_Comm_rank(comm, &r); 271 | res = PMPI_Comm_size(comm, &p); 272 | 273 | /* reallocate tha diffs array with the right size */ 274 | if(diffs != NULL) free(diffs); 275 | diffs = (double*)calloc(1, p*sizeof(double)); 276 | 277 | if(r == 0) { 278 | for(peer = 1; peer < p; peer++) { 279 | diffs[peer] = sync_peer(0, peer, comm); 280 | } 281 | } else { 282 | sync_peer(1, peer, comm); 283 | } 284 | 285 | /* scatter all the time diffs to the processes */ 286 | PMPI_Scatter(diffs, 1, MPI_DOUBLE, &gdiff, 1, MPI_DOUBLE, 0, comm); 287 | /*printf("[%i] diff_lin: %lf usec\n", r, gdiff*1e6);*/ 288 | return gdiff*1e6; 289 | } 290 | 291 | 292 | -------------------------------------------------------------------------------- /src/Schedgen2/goal.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | class GoalLabeller: 5 | def __init__(self): 6 | self.next_label = 1 7 | self.next_comm = 1 8 | self.op_dict = {} 9 | self.comm_dict = {} 10 | 11 | def GetLabel(self, op): 12 | if op in self.op_dict: 13 | pass 14 | else: 15 | self.op_dict[op] = self.next_label 16 | self.next_label += 1 17 | return self.op_dict[op] 18 | 19 | def GetCommID(self, comm): 20 | if comm in self.comm_dict: 21 | pass 22 | else: 23 | self.comm_dict[comm] = self.next_comm 24 | self.next_comm += 1 25 | return self.comm_dict[comm] 26 | 27 | def MakeTag(self, tag, comm): 28 | """Combine the user tag and the comm tag portion""" 29 | return tag * 1000 + comm 30 | 31 | 32 | class GoalOp: 33 | def __init__(self): 34 | self.depends_on = [] 35 | 36 | def requires(self, required): 37 | # TODO check that self and required translate to the same rank in comm_world - we don't have the rank here :( 38 | self.depends_on.append(required) 39 | 40 | 41 | class GoalSend(GoalOp): 42 | def __init__(self, dst, tag, size): 43 | super().__init__() 44 | self.dst = dst 45 | self.tag = tag 46 | self.size = size 47 | 48 | def write_goal(self, labeller, fh, comm, basecomm, format="goal"): 49 | if format == "goal": 50 | fh.write( 51 | "l{label}: send {size}b to {dst} tag {tag}\n".format( 52 | label=labeller.GetLabel(self), 53 | size=str(self.size), 54 | dst=str(comm.TranslateRank(self.dst, basecomm)), 55 | tag=str(labeller.MakeTag(self.tag, labeller.GetCommID(comm))), 56 | ) 57 | ) 58 | elif format == "graphviz": 59 | fh.write( 60 | "\"l{label}\" [label=\"send {size}b to {dst} tag {tag}\"];\n".format( 61 | label=labeller.GetLabel(self), 62 | size=str(self.size), 63 | dst=str(comm.TranslateRank(self.dst, basecomm)), 64 | tag=str(labeller.MakeTag(self.tag, labeller.GetCommID(comm))), 65 | ) 66 | ) 67 | else: 68 | raise NotImplementedError("Requested output format "+str(format)+" not implemented!") 69 | 70 | 71 | 72 | class GoalRecv(GoalOp): 73 | def __init__(self, src, tag, size): 74 | super().__init__() 75 | self.src = src 76 | self.tag = tag 77 | self.size = size 78 | 79 | def write_goal(self, labeller, fh, comm, basecomm, format="goal"): 80 | if format == "goal": 81 | fh.write( 82 | "l{label}: recv {size}b from {src} tag {tag}\n".format( 83 | label=labeller.GetLabel(self), 84 | size=str(self.size), 85 | src=str(comm.TranslateRank(self.src, basecomm)), 86 | tag=str(labeller.MakeTag(self.tag, labeller.GetCommID(comm))), 87 | ) 88 | ) 89 | elif format == "graphviz": 90 | fh.write( 91 | "\"l{label}\" [label=\"recv {size}b from {src} tag {tag}\"];\n".format( 92 | label=labeller.GetLabel(self), 93 | size=str(self.size), 94 | src=str(comm.TranslateRank(self.src, basecomm)), 95 | tag=str(labeller.MakeTag(self.tag, labeller.GetCommID(comm))), 96 | ) 97 | ) 98 | else: 99 | raise NotImplementedError("Requested output format "+str(format)+" not implemented!") 100 | 101 | 102 | class GoalCalc(GoalOp): 103 | def __init__(self, size): 104 | super().__init__() 105 | self.size = size 106 | 107 | def write_goal(self, labeller, fh, comm, basecomm, format="goal"): 108 | if format == "goal": 109 | fh.write( 110 | "l{label}: calc {size}\n".format( 111 | label=labeller.GetLabel(self), size=str(self.size)) 112 | ) 113 | elif format == "graphviz": 114 | fh.write( 115 | "\"l{label}\" [label=\"calc {size}\"]\n".format( 116 | label=labeller.GetLabel(self), size=str(self.size) 117 | ) 118 | ) 119 | else: 120 | raise NotImplementedError("Requested output format "+str(format)+" not implemented!") 121 | 122 | class GoalRank: 123 | def __init__(self, comm, rank): 124 | self.comm = comm 125 | self.rank = rank 126 | self.base_rank = None 127 | self.ops = [] 128 | 129 | def Send(self, dst, tag, size): 130 | if dst > self.comm.CommSize(): 131 | raise ValueError(str(dst) + " is larger than comm size!") 132 | op = GoalSend(dst=dst, tag=tag, size=size) 133 | self.ops.append(op) 134 | return op 135 | 136 | def Recv(self, src, tag, size): 137 | if src > self.comm.CommSize(): 138 | raise ValueError(str(src) + " is larger than comm size!") 139 | op = GoalRecv(src=src, tag=tag, size=size) 140 | self.ops.append(op) 141 | return op 142 | 143 | def Calc(self, size): 144 | op = GoalCalc(size=size) 145 | self.ops.append(op) 146 | return op 147 | 148 | def Merge(self, mrank): 149 | self.ops += mrank.ops 150 | 151 | def Append(self, arank, dependOn=None, allOpsDepend=False): 152 | """ Append arank to self. If dependOn is None, all ops in self need to finish before we start executing aranks ops. If dependOn is given we only depend on that. 153 | By default (allOpsDepend) only independent ops in arank depend on self, however if allOpsDepend=True all ops do. """ 154 | if dependOn is None: 155 | c = self.Calc(0) 156 | for l in self.LastOps(): 157 | if l == c: 158 | pass 159 | else: 160 | c.requires(l) 161 | else: 162 | c = dependOn 163 | self.ops += arank.ops 164 | depops = arank.IndepOps() 165 | if allOpsDepend: 166 | depops = arank.ops 167 | for i in depops: 168 | i.requires(c) 169 | 170 | def IndepOps(self): 171 | res = [x for x in self.ops if (len(x.depends_on) == 0)] 172 | return res 173 | 174 | def LastOps(self): 175 | rem = [] 176 | for x in self.ops: 177 | for d in x.depends_on: 178 | rem.append(d) 179 | s = set(rem) 180 | res = [x for x in self.ops if x not in s] 181 | return res 182 | 183 | 184 | def write_goal(self, labeller, fh, rankid=True, basecomm=None, format="goal"): 185 | if basecomm is None: 186 | basecomm = ( 187 | self.comm 188 | ) # stupid python evals default args at method definition, not call time :( 189 | if rankid: 190 | if format == "goal": 191 | fh.write("rank " + str(self.rank) + " {\n") 192 | elif format == "graphviz": 193 | fh.write("subgraph cluster_" + str(self.rank) + " {\n") 194 | fh.write("style=filled; color=lightgrey; node [style=filled,color=white]; label=\"rank "+str(self.rank)+"\";") 195 | for op in self.ops: 196 | op.write_goal(labeller, fh, self.comm, basecomm, format=format) 197 | for op in self.ops: 198 | for req in op.depends_on: 199 | if format == "goal": 200 | fh.write( 201 | "l{label1} requires l{label2}\n".format( 202 | label1=labeller.GetLabel(op), label2=labeller.GetLabel(req) 203 | ) 204 | ) 205 | if format == "graphviz": 206 | # we "invert" dependencies in grphviz format, i.e, a->b means a is executed before b. 207 | # Where in goal it would be "b requires a" - but this would make graphs look upside down. 208 | fh.write( 209 | "l{label2} -> l{label1}\n".format( 210 | label1=labeller.GetLabel(op), label2=labeller.GetLabel(req) 211 | ) 212 | ) 213 | for sc in self.comm.subcomms: 214 | sc.write_goal_subcomm(labeller, fh, self.rank, basecomm, format=format) 215 | if rankid: 216 | fh.write("}\n\n") 217 | 218 | 219 | class GoalComm: 220 | def __init__(self, comm_size): 221 | self.base_comm = self 222 | self.comm_size = comm_size 223 | self.subcomms = [] 224 | self.ranks = [GoalRank(comm=self, rank=rank) for rank in range(comm_size)] 225 | 226 | def __getitem__(self, index): 227 | return self.ranks[index] 228 | 229 | def Append(self, comm): 230 | """Append comm to self, such that when all ops in self are finished, those in comm can start.""" 231 | if comm.CommSize() > self.CommSize(): 232 | raise ValueError("Cannot append a larger comm to a smaller one!") 233 | if len(comm.subcomms) > 0: 234 | raise ValueError("Cannot append a comm with subcomms, flatten first?") 235 | for idx, rank in enumerate(self.ranks): 236 | rank.Append(comm[idx]) 237 | 238 | def Merge(self, comm): 239 | """Merge comm into self, such that the ops in both run in parallel.""" 240 | if comm.CommSize() > self.CommSize(): 241 | raise "Cannot merge a larger comm to a smaller one!" 242 | if len(comm.subcomms) > 0: 243 | raise ValueError("Cannot append a comm with subcomms, flatten first?") 244 | for idx, rank in enumerate(self.ranks): 245 | rank.Merge(comm[idx]) 246 | 247 | def Send(self, src, dst, tag, size): 248 | return self[src].Send(dst, tag, size) 249 | 250 | def Recv(self, dst, src, tag, size): 251 | return self[dst].Recv(src, tag, size) 252 | 253 | def Calc(self, host, size): 254 | return self[host].Calc(size) 255 | 256 | def CommSize(self): 257 | return self.comm_size 258 | 259 | def CommSplit(self, color, key): 260 | if len(list(color)) < self.comm_size or len(list(key)) < self.comm_size: 261 | raise ValueError( 262 | "The length of color and key array must match the communicator size." 263 | ) 264 | newcomms = [] 265 | order = [ 266 | (oldrank, color[oldrank], key[oldrank]) 267 | for oldrank in range(0, self.comm_size) 268 | ] 269 | color_buckets = {} 270 | for o in order: 271 | if o[1] in color_buckets: 272 | color_buckets[o[1]].append(o) 273 | else: 274 | color_buckets[o[1]] = [o] 275 | for c in color_buckets.keys(): 276 | c_list = sorted( 277 | color_buckets[c], key=lambda x: x[2] 278 | ) # sort by key within color 279 | nc = GoalComm(len(c_list)) 280 | nc.base_comm = self 281 | for idx, r in enumerate(nc): 282 | r.base_rank = c_list[idx][ 283 | 0 284 | ] # store the rank the new rank had in the comm it was splitted from 285 | newcomms.append(nc) 286 | self.subcomms += newcomms 287 | return newcomms 288 | 289 | def write_goal(self, labeller=None, fh=sys.stdout, format="goal"): 290 | if format == "goal": 291 | fh.write("num_ranks " + str(len(self.ranks)) + "\n\n") 292 | elif format == "graphviz": 293 | fh.write("digraph G {\n") 294 | if labeller is None: 295 | labeller = GoalLabeller() 296 | for r in self.ranks: 297 | r.write_goal(labeller, fh, rankid=True, basecomm=self, format=format) 298 | if format == "graphviz": 299 | fh.write("}\n") 300 | 301 | 302 | def write_goal_subcomm(self, labeller, fh, rank, basecomm, format="goal"): 303 | """if this comm has a rank with base_rank=rank, print its goal ops without enclosing brackets""" 304 | for r in self.ranks: 305 | if r.base_rank == rank: 306 | r.write_goal(labeller, fh, rankid=False, basecomm=basecomm, format=format) 307 | 308 | def TranslateRank(self, rank, basecomm): 309 | """Find out the rank id of the given rank (in self) in basecomm""" 310 | if self == basecomm: 311 | return rank 312 | if rank == None: 313 | raise ValueError("Attempt to translate a non-existing rank!") 314 | return self.base_comm.TranslateRank(self.ranks[rank].base_rank, basecomm) 315 | 316 | 317 | if __name__ == "__main__": 318 | comms = [ GoalComm(4), GoalComm(4) ] 319 | for c in comms: 320 | for i in range(1, c.CommSize()): 321 | c[0].Send(dst=i, tag=42, size=23) 322 | for i in range(1, c.CommSize()): 323 | c[i].Recv(src=0, tag=42, size=23) 324 | c.write_goal() 325 | comms[0].Append(comms[1]) 326 | comms[0].write_goal() 327 | -------------------------------------------------------------------------------- /src/Schedgen2/patterns.py: -------------------------------------------------------------------------------- 1 | import random 2 | from math import log2, ceil 3 | from typing import List, Union 4 | from goal import GoalComm 5 | 6 | 7 | def binomialtree( 8 | comm_size: int, 9 | datasize: int, 10 | tag: int, 11 | algorithm: str = "reduce", 12 | compute_time_dependency: int = 0, 13 | **kwargs, 14 | ) -> GoalComm: 15 | """ 16 | Create a binomial tree communication pattern. 17 | 18 | :param comm_size: number of ranks in the communicator 19 | :param datasize: size of data to send or receive 20 | :param tag: tag that is used for all send and receive operations 21 | :param algorithm: communication algorithm that uses this pattern; default is reduce 22 | :param compute_time_dependency: compute time dependency for each send operation; if 0 (default), no compute time is added 23 | :param kwargs: additional arguments that are ignored 24 | :return: GoalComm object that represents the communication pattern 25 | """ 26 | assert algorithm in [ 27 | "reduce", 28 | "bcast", 29 | "scatter", 30 | ], "direction must be reduce, bcast, or scatter" 31 | comm = GoalComm(comm_size) 32 | for rank in range(0, comm_size): 33 | send = None 34 | recv = None 35 | for r in range(0, ceil(log2(comm_size))): 36 | peer = rank + pow(2, r) 37 | if (rank + pow(2, r) < comm_size) and (rank < pow(2, r)): 38 | if algorithm == "reduce": 39 | recv = comm.Recv(size=datasize, src=peer, dst=rank, tag=tag) 40 | elif algorithm in ["bcast", "scatter"]: 41 | send = comm.Send(size=datasize, dst=peer, src=rank, tag=tag) 42 | else: 43 | raise ValueError( 44 | "direction " 45 | + str(algorithm) 46 | + " in binomialtree not implemented." 47 | ) 48 | if (send is not None) and (recv is not None): 49 | if compute_time_dependency > 0: 50 | calc = comm.Calc(host=rank, size=compute_time_dependency) 51 | calc.requires(recv) 52 | send.requires(calc) 53 | else: 54 | send.requires(recv) 55 | peer = rank - pow(2, r) 56 | if (rank >= pow(2, r)) and (rank < pow(2, r + 1)): 57 | if algorithm == "reduce": 58 | send = comm.Send(size=datasize, dst=peer, src=rank, tag=tag) 59 | if algorithm in ["bcast", "scatter"]: 60 | recv = comm.Recv(size=datasize, src=peer, dst=rank, tag=tag) 61 | 62 | return comm 63 | 64 | 65 | def recdoub( 66 | comm_size: int, 67 | datasize: int, 68 | tag: int, 69 | algorithm: str = "reduce-scatter", 70 | compute_time_dependency: int = 0, 71 | **kwargs, 72 | ) -> GoalComm: 73 | """ 74 | Create a recursive doubling communication pattern. 75 | 76 | :param comm_size: number of ranks in the communicator 77 | :param datasize: size of data to send or receive 78 | :param tag: tag that is used for all send and receive operations 79 | :param algorithm: communication algorithm that uses this pattern; default is reduce-scatter 80 | :param compute_time_dependency: compute time dependency for each send operation; if 0 (default), no compute time is added 81 | :param kwargs: additional arguments that are ignored 82 | :return: GoalComm object that represents the communication pattern 83 | """ 84 | 85 | assert algorithm in [ 86 | "reduce-scatter", 87 | "allgather", 88 | ], f"the pattern does not currently support the {algorithm} algorithm" 89 | 90 | comm = GoalComm(comm_size) 91 | num_steps = int(log2(comm_size)) 92 | dependencies = [None] * comm_size 93 | for r in range(num_steps): 94 | for rank in range(comm_size): 95 | if algorithm in ["reduce-scatter"]: 96 | dest = rank ^ (2**r) 97 | message_size = datasize // (2 ** (r + 1)) 98 | elif algorithm in ["allgather"]: 99 | dest = rank ^ (2 ** (num_steps - r - 1)) 100 | message_size = datasize // (2 ** (num_steps - r)) 101 | else: 102 | raise ValueError( 103 | f"the pattern does not currently support the {algorithm} algorithm" 104 | ) 105 | if dest < comm_size: 106 | send = comm.Send(size=message_size, src=rank, dst=dest, tag=tag + r) 107 | if dependencies[rank] is not None: 108 | send.requires(dependencies[rank]) 109 | dependencies[rank] = comm.Recv( 110 | size=message_size, src=dest, dst=rank, tag=tag + r 111 | ) 112 | if compute_time_dependency > 0: 113 | calc = comm.Calc(host=rank, size=compute_time_dependency) 114 | calc.requires(dependencies[rank]) 115 | dependencies[rank] = calc 116 | return comm 117 | 118 | 119 | def ring( 120 | comm_size: int, 121 | datasize: int, 122 | tag: int, 123 | algorithm: str = "reduce-scatter", 124 | rounds: int = 1, 125 | compute_time_dependency: int = 0, 126 | **kwargs, 127 | ) -> GoalComm: 128 | """ 129 | Create a ring communication pattern. 130 | 131 | :param comm_size: number of ranks in the communicator 132 | :param datasize: size of data to send in each round 133 | :param tag: base tag that is incremented for each round 134 | :param algorithm: communication algorithm that uses this pattern; default is reduce-scatter 135 | :param rounds: number of rounds to send data around the ring 136 | :param compute_time_dependency: compute time dependency for each send operation; if 0 (default), no compute time is added 137 | :param kwargs: additional arguments that are ignored 138 | :return: GoalComm object that represents the communication pattern 139 | """ 140 | comm = GoalComm(comm_size) 141 | dependencies = [None] * comm_size 142 | if algorithm in ["reduce-scatter", "allgather"]: 143 | datasize = datasize // comm_size 144 | for r in range(rounds): 145 | for rank in range(comm_size): 146 | send = comm.Send( 147 | size=datasize, src=rank, dst=(rank + 1) % comm_size, tag=tag + r 148 | ) 149 | if dependencies[rank] is not None: 150 | send.requires(dependencies[rank]) 151 | dependencies[rank] = comm.Recv( 152 | size=datasize, src=(rank - 1) % comm_size, dst=rank, tag=tag + r 153 | ) 154 | if compute_time_dependency > 0: 155 | calc = comm.Calc(host=rank, size=compute_time_dependency) 156 | calc.requires(dependencies[rank]) 157 | dependencies[rank] = calc 158 | return comm 159 | 160 | 161 | def _single_source_or_destination_linear( 162 | comm: GoalComm, 163 | anchor: int, 164 | datasizes: Union[List[int], List[List[int]]], 165 | tag: int, 166 | algorithm: str = "bcast", 167 | parallel: bool = True, 168 | window_size: int = 0, 169 | compute_time_dependency: int = 0, 170 | ) -> GoalComm: 171 | """ 172 | Create a single source or destination linear communication pattern. 173 | 174 | :param comm: GoalComm object that contains the ranks 175 | :param anchor: rank that is the source or destination 176 | :param datasizes: size(s) of data to send or receive 177 | :param tag: tag that is used for all send and receive operations 178 | :param algorithm: communication algorithm that uses this pattern; default is bcast (single source, multiple destinations) 179 | :param parallel: whether to send multiple messages in parallel; default is True (send messages in parallel) 180 | :param window_size: number of operations that can be in flight at once; default is 0 (no windowing) 181 | :param compute_time_dependency: compute time dependency for each send operation; default is 0 (no compute time) 182 | :return: GoalComm object that represents the communication pattern 183 | """ 184 | assert algorithm in [ 185 | "bcast", 186 | "reduce", 187 | "alltoall", 188 | "alltoallv", 189 | "scatter", 190 | "incast", 191 | "outcast", 192 | ], f"the pattern does not currently support the {algorithm} algorithm" 193 | assert ( 194 | parallel and window_size == 0 and compute_time_dependency == 0 195 | ) or algorithm not in [ 196 | "reduce", 197 | "incast", 198 | ], f"We do not introduce dependencies, windowing, or compute time for linear receives" 199 | 200 | dependency = None 201 | if window_size > 0: 202 | window = [None] * window_size 203 | next_slot = 0 204 | for rank in range(comm.comm_size): 205 | if rank == anchor: 206 | continue 207 | if algorithm in ["bcast", "alltoall", "alltoallv", "scatter", "outcast"]: 208 | if algorithm in ["alltoall", "alltoallv"]: 209 | datasize = datasizes[anchor][rank] 210 | else: 211 | datasize = datasizes[rank] 212 | send = comm.Send(src=anchor, dst=rank, size=datasize, tag=tag) 213 | recv = comm.Recv(src=anchor, dst=rank, size=datasize, tag=tag) 214 | if not parallel: 215 | if window_size == 0: 216 | if dependency is not None: 217 | send.requires(dependency) 218 | dependency = send 219 | if compute_time_dependency > 0: 220 | calc = comm.Calc(host=anchor, size=compute_time_dependency) 221 | calc.requires(dependency) 222 | dependency = calc 223 | else: 224 | if window[next_slot] is not None: 225 | send.requires(window[next_slot]) 226 | window[next_slot] = send 227 | next_slot = (next_slot + 1) % window_size 228 | if compute_time_dependency > 0: 229 | send.requires( 230 | comm.Calc(host=anchor, size=compute_time_dependency) 231 | ) 232 | else: 233 | if compute_time_dependency > 0: 234 | send.requires(comm.Calc(host=anchor, size=compute_time_dependency)) 235 | elif algorithm in ["reduce", "incast"]: 236 | datasize = datasizes[rank] 237 | send = comm.Send(src=rank, dst=anchor, size=datasize, tag=tag) 238 | recv = comm.Recv(src=rank, dst=anchor, size=datasize, tag=tag) 239 | if compute_time_dependency > 0: 240 | send.requires(comm.Calc(host=rank, size=compute_time_dependency)) 241 | else: 242 | raise ValueError( 243 | f"the pattern does not currently support the {algorithm} algorithm" 244 | ) 245 | 246 | 247 | def linear( 248 | comm_size: int, 249 | datasize: int, 250 | tag: int, 251 | algorithm: str = "bcast", 252 | parallel: bool = True, 253 | randomized_data: bool = False, 254 | window_size: int = 0, 255 | compute_time_dependency: int = 0, 256 | **kwargs, 257 | ) -> GoalComm: 258 | """ 259 | Create a linear communication pattern. 260 | 261 | :param comm_size: number of ranks in the communicator 262 | :param datasize: size of data to send 263 | :param tag: tag that is used for all send and receive operations 264 | :param algorithm: communication algorithm that uses this pattern; default is bcast (single source, multiple destinations) 265 | :param parallel: whether to send multiple messages in parallel; default is True (send messages in parallel) 266 | :param randomized_data: whether to randomize the data sent or received; default is False (same size for all messages) 267 | :param window_size: number of operations that can be in flight at once; default is 0 (no windowing) 268 | :param compute_time_dependency: compute time dependency for each send operation; default is 0 (no compute time) 269 | :param kwargs: additional arguments that are ignored 270 | :return: GoalComm object that represents the communication pattern 271 | """ 272 | comm = GoalComm(comm_size) 273 | 274 | assert algorithm in [ 275 | "bcast", 276 | "reduce", 277 | "alltoall", 278 | "alltoallv", 279 | "scatter", 280 | "incast", 281 | "outcast", 282 | ], f"the pattern does not currently support the {algorithm} algorithm" 283 | 284 | if algorithm in ["alltoall", "alltoallv"]: 285 | datasizes = [ 286 | [ 287 | (datasize + int(0.1 * random.randint(-datasize, datasize))) 288 | if randomized_data 289 | else datasize 290 | for _ in range(comm_size) 291 | ] 292 | for _ in range(comm_size) 293 | ] 294 | 295 | for anchor in range(comm_size): 296 | _single_source_or_destination_linear( 297 | comm, 298 | anchor, 299 | datasizes, 300 | tag, 301 | algorithm, 302 | parallel, 303 | window_size, 304 | compute_time_dependency, 305 | ) 306 | else: 307 | datasizes = [ 308 | (datasize + int(0.1 * random.randint(-datasize, datasize))) 309 | if randomized_data 310 | else datasize 311 | for _ in range(comm_size) 312 | ] 313 | _single_source_or_destination_linear( 314 | comm, 315 | 0, 316 | datasizes, 317 | tag, 318 | algorithm, 319 | parallel, 320 | window_size, 321 | compute_time_dependency, 322 | ) 323 | 324 | return comm 325 | -------------------------------------------------------------------------------- /src/Drawviz/TimelineDrawing.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 The Trustees of Indiana University and Indiana 3 | * University Research and Technology 4 | * Corporation. All rights reserved. 5 | * 6 | * Author(s): Torsten Hoefler 7 | * Timo Schneider 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "TimelineDrawing.hpp" 22 | 23 | 24 | void TimelineDrawing::init_graph(int numranks, int numcpus, int width = 800, int height = 800, std::string filename = "timeline.ps") { 25 | 26 | this->numranks = numranks; 27 | this->numcpus = numcpus; 28 | this->width = width; 29 | this->height = height; 30 | 31 | this->ranksep = height/(numranks+2); 32 | this->cpusep = (ranksep*0.75) / 5; // this means we assume 4 cpus at max 33 | this->timesep = width/100; 34 | this->fontsize = 10; 35 | this->leftmargin = 50; 36 | 37 | PS_boot(); 38 | this->psdoc = PS_new(); 39 | PS_open_file(this->psdoc, filename.c_str()); 40 | PS_begin_page(this->psdoc, (this->numranks+2)*this->ranksep, (this->numranks+2)*this->ranksep); 41 | this->psfont = PS_findfont(this->psdoc, "Helvetica", "", 0); 42 | PS_setfont(this->psdoc, this->psfont, this->fontsize); 43 | 44 | } 45 | 46 | void TimelineDrawing::close_graph() { 47 | 48 | PS_end_page(this->psdoc); 49 | PS_close(this->psdoc); 50 | PS_delete(this->psdoc); 51 | PS_shutdown(); 52 | } 53 | 54 | void TimelineDrawing::draw_everything(int maxtime) { 55 | 56 | this->timesep = ((double) (this->width - (this->leftmargin * 2))) / (double) maxtime; 57 | 58 | for (unsigned int i=0; ioverheads.size(); i++) { 59 | if (this->overheads.at(i).type == 1) { 60 | draw_osend(this->overheads.at(i).rank, 61 | this->overheads.at(i).cpu, 62 | this->overheads.at(i).start, 63 | this->overheads.at(i).end, 64 | this->overheads.at(i).r, 65 | this->overheads.at(i).g, 66 | this->overheads.at(i).b); 67 | } 68 | if (this->overheads.at(i).type == 2) { 69 | draw_orecv(this->overheads.at(i).rank, 70 | this->overheads.at(i).cpu, 71 | this->overheads.at(i).start, 72 | this->overheads.at(i).end, 73 | this->overheads.at(i).r, 74 | this->overheads.at(i).g, 75 | this->overheads.at(i).b 76 | ); 77 | 78 | } 79 | if (this->overheads.at(i).type == 3) { 80 | draw_loclop(this->overheads.at(i).rank, 81 | this->overheads.at(i).cpu, 82 | this->overheads.at(i).start, 83 | this->overheads.at(i).end, 84 | this->overheads.at(i).r, 85 | this->overheads.at(i).g, 86 | this->overheads.at(i).b 87 | ); 88 | 89 | } 90 | if (this->overheads.at(i).type == 4) { 91 | draw_noise(this->overheads.at(i).rank, 92 | this->overheads.at(i).cpu, 93 | this->overheads.at(i).start, 94 | this->overheads.at(i).end, 95 | this->overheads.at(i).r, 96 | this->overheads.at(i).g, 97 | this->overheads.at(i).b 98 | ); 99 | } 100 | 101 | } 102 | for (unsigned int i=0; itransmissions.size(); i++) { 103 | draw_transmission(this->transmissions.at(i).source, 104 | this->transmissions.at(i).dest, 105 | this->transmissions.at(i).starttime, 106 | this->transmissions.at(i).endtime, 107 | this->transmissions.at(i).size, 108 | this->transmissions.at(i).G, 109 | this->transmissions.at(i).r, 110 | this->transmissions.at(i).g, 111 | this->transmissions.at(i).b 112 | ); 113 | } 114 | } 115 | 116 | void TimelineDrawing::draw_ranklines() { 117 | 118 | for (int i=0; ileftmargin, (i+2)*ranksep); 121 | PS_lineto(psdoc, this->width - this->leftmargin , (i+2)*ranksep); 122 | PS_stroke(psdoc); 123 | char textbuffer[128]; 124 | snprintf(textbuffer, 128, "Rank %i", i); 125 | PS_setfont(psdoc, this->psfont, this->fontsize); 126 | PS_show_xy(psdoc, textbuffer, 5, (i+2)*ranksep); 127 | for (int j=1; jpsfont, this->fontsize/1.75); 129 | PS_setlinewidth(psdoc, 0.05); 130 | PS_moveto(psdoc, this->leftmargin, (i+2)*ranksep - j*cpusep); 131 | PS_lineto(psdoc, this->width - this->leftmargin , (i+2)*ranksep - j*cpusep ); 132 | PS_stroke(psdoc); 133 | PS_setlinewidth(psdoc, 0.2); 134 | snprintf(textbuffer, 128, "CPU %i", j); 135 | PS_show_xy(psdoc, textbuffer, 7, (i+2)*ranksep - j*cpusep); 136 | } 137 | } 138 | 139 | PS_setfont(psdoc, this->psfont, this->fontsize); 140 | PS_show_xy(psdoc, "Time", width * 0.5, ranksep*0.3); 141 | 142 | } 143 | 144 | void TimelineDrawing::draw_seperator(int rank, int cpu, int pos) { 145 | 146 | PS_setlinewidth(psdoc, 0.1); 147 | PS_moveto(psdoc, 148 | this->leftmargin + pos * this->timesep, 149 | (rank+2) * this->ranksep - cpu * this->cpusep - 3 ); 150 | PS_lineto(psdoc, 151 | this->leftmargin + pos * this->timesep, 152 | (rank+2) * this->ranksep - cpu * this->cpusep + 3 ); 153 | PS_stroke(psdoc); 154 | } 155 | 156 | void TimelineDrawing::draw_osend(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b) { 157 | 158 | PS_setcolor(psdoc, "stroke", "rgb", r, g, b, 0.0); 159 | PS_setlinewidth(psdoc, args_info.linethickness_arg+1.0); 160 | PS_moveto(psdoc, 161 | this->leftmargin + start * this->timesep, 162 | (rank+2)*this->ranksep - cpu*this->cpusep); 163 | PS_lineto(psdoc, 164 | this->leftmargin + end * this->timesep, 165 | (rank+2)*this->ranksep - cpu*this->cpusep); 166 | PS_stroke(psdoc); 167 | 168 | this->draw_seperator(rank, cpu, start); 169 | this->draw_seperator(rank, cpu, end); 170 | 171 | if (args_info.descrtext_given) { 172 | PS_setfont(psdoc, this->psfont, this->fontsize/2); 173 | int xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 174 | xpos -= (PS_stringwidth(psdoc, "o", this->psfont, this->fontsize/2) / 2); 175 | PS_show_xy(psdoc, "o", xpos, 176 | (rank+2)*ranksep - cpu*cpusep + ranksep * 0.1 ); 177 | 178 | xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 179 | xpos -= (PS_stringwidth(psdoc, "send", this->psfont, this->fontsize/2) / 2); 180 | PS_show_xy(psdoc, "send", xpos, 181 | (rank+2)*ranksep - cpu*cpusep - ranksep * 0.1 ); 182 | } 183 | } 184 | 185 | void TimelineDrawing::draw_orecv(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b) { 186 | 187 | PS_setcolor(psdoc, "stroke", "rgb", r, g, b, 0.0); 188 | PS_setlinewidth(psdoc, args_info.linethickness_arg+1.0); 189 | PS_moveto(psdoc, 190 | this->leftmargin + start * this->timesep, 191 | (rank+2)*this->ranksep - cpu*this->cpusep); 192 | PS_lineto(psdoc, 193 | this->leftmargin + end * this->timesep, 194 | (rank+2)*this->ranksep - cpu*this->cpusep); 195 | PS_stroke(psdoc); 196 | 197 | this->draw_seperator(rank, cpu, start); 198 | this->draw_seperator(rank, cpu, end); 199 | 200 | if (args_info.descrtext_given) { 201 | PS_setfont(psdoc, this->psfont, this->fontsize/2); 202 | int xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 203 | xpos -= (PS_stringwidth(psdoc, "o", this->psfont, this->fontsize/2) / 2); 204 | PS_show_xy(psdoc, "o", xpos, 205 | (rank+2)*ranksep - cpu*cpusep + ranksep * 0.1 ); 206 | 207 | xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 208 | xpos -= (PS_stringwidth(psdoc, "recv", this->psfont, this->fontsize/2) / 2); 209 | PS_show_xy(psdoc, "recv", xpos, 210 | (rank+2)*ranksep - cpu*cpusep - ranksep * 0.1 ); 211 | } 212 | 213 | } 214 | 215 | void TimelineDrawing::draw_loclop(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b) { 216 | 217 | PS_setcolor(psdoc, "stroke", "rgb", r, g, b, 0.0); 218 | 219 | PS_setlinewidth(psdoc, args_info.linethickness_arg+1.0); 220 | PS_moveto(psdoc, 221 | this->leftmargin + start * this->timesep, 222 | (rank+2)*this->ranksep - cpu*this->cpusep); 223 | PS_lineto(psdoc, 224 | this->leftmargin + end * this->timesep, 225 | (rank+2)*this->ranksep - cpu*this->cpusep); 226 | PS_stroke(psdoc); 227 | 228 | this->draw_seperator(rank, cpu, start); 229 | this->draw_seperator(rank, cpu, end); 230 | 231 | if (args_info.descrtext_given) { 232 | PS_setfont(psdoc, this->psfont, this->fontsize/2); 233 | int xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 234 | xpos -= (PS_stringwidth(psdoc, "calc", this->psfont, this->fontsize/2) / 2); 235 | PS_show_xy(psdoc, "calc", xpos, 236 | (rank+2)*ranksep - cpu*cpusep - ranksep * 0.1 ); 237 | } 238 | 239 | 240 | } 241 | 242 | void TimelineDrawing::draw_noise(int rank, int cpu, uint64_t start, uint64_t end, float r, float g, float b) { 243 | 244 | PS_setcolor(psdoc, "stroke", "rgb", r, g, b, 0.0); 245 | 246 | PS_setlinewidth(psdoc, args_info.linethickness_arg+1.0); 247 | PS_moveto(psdoc, 248 | this->leftmargin + start * this->timesep, 249 | (rank+2)*this->ranksep - cpu*this->cpusep); 250 | PS_lineto(psdoc, 251 | this->leftmargin + end * this->timesep, 252 | (rank+2)*this->ranksep - cpu*this->cpusep); 253 | PS_stroke(psdoc); 254 | 255 | this->draw_seperator(rank, cpu, start); 256 | this->draw_seperator(rank, cpu, end); 257 | 258 | if (args_info.descrtext_given) { 259 | PS_setfont(psdoc, this->psfont, this->fontsize/2); 260 | int xpos = this->leftmargin + (((end-start)/2 + start) * this->timesep); 261 | xpos -= (PS_stringwidth(psdoc, "calc", this->psfont, this->fontsize/2) / 2); 262 | PS_show_xy(psdoc, "calc", xpos, 263 | (rank+2)*ranksep - cpu*cpusep - ranksep * 0.1 ); 264 | } 265 | 266 | } 267 | 268 | void TimelineDrawing::draw_transmission(int source, int dest, uint64_t starttime, uint64_t endtime, int size, int G, float r, float g, float b) { 269 | 270 | PS_setcolor(psdoc, "stroke", "rgb", r, g, b, 0.0); 271 | PS_setlinewidth(psdoc, args_info.linethickness_arg); 272 | 273 | for (int i = 0; i <= size-1; i++) { 274 | PS_setdash(psdoc, 2.0, 2.0); 275 | PS_moveto(psdoc, this->leftmargin + (starttime + i * G) * this->timesep, (source+2)*ranksep); 276 | 277 | // store coordinates for drawing the arrowheads 278 | int sx = this->leftmargin + (starttime + i * G) * this->timesep; 279 | int sy = (source+2)*ranksep; 280 | 281 | // the behaviour of the sim changed! oldsin: transmission "ends" with last byte, 282 | // newsim: transmission ends with first, so orecv can start earlier 283 | int L = endtime - starttime;// - (size-1)*G; 284 | //assert(L > 0); 285 | PS_lineto(psdoc, this->leftmargin + ((starttime + i * G) + L) * this->timesep, (dest+2)*ranksep); 286 | 287 | // store coordinates for drawing the arrowheads 288 | int dx = this->leftmargin + ((starttime + i * G) + L) * this->timesep; 289 | int dy = (dest+2)*ranksep; 290 | 291 | PS_stroke(psdoc); 292 | 293 | if (args_info.arrowheads_given) { 294 | // draw arrowhead 295 | int x1, y1, x2, y2; 296 | calc_arrowhead_coords(sx, sy, dx, dy, &x1, &y1, &x2, &y2); 297 | PS_setdash(psdoc, 0.0, 0.0); 298 | PS_moveto(psdoc, dx, dy); 299 | PS_lineto(psdoc, x1, y1); 300 | PS_stroke(psdoc); 301 | PS_moveto(psdoc, dx, dy); 302 | PS_lineto(psdoc, x2, y2); 303 | PS_stroke(psdoc); 304 | } 305 | } 306 | } 307 | 308 | void TimelineDrawing::add_osend(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b) { 309 | 310 | overh os; 311 | os.type = 1; 312 | os.rank = rank; 313 | os.cpu = cpu; 314 | os.start = start; 315 | os.end = end; 316 | os.r = r; 317 | os.g = g; 318 | os.b = b; 319 | 320 | this->overheads.push_back(os); 321 | 322 | } 323 | 324 | void TimelineDrawing::add_orecv(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b) { 325 | 326 | overh orecv; 327 | orecv.type = 2; 328 | orecv.rank = rank; 329 | orecv.cpu = cpu; 330 | orecv.start = start; 331 | orecv.end = end; 332 | orecv.r = r; 333 | orecv.g = g; 334 | orecv.b = b; 335 | 336 | this->overheads.push_back(orecv); 337 | 338 | } 339 | 340 | void TimelineDrawing::add_loclop(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b) { 341 | 342 | overh lop; 343 | lop.type = 3; 344 | lop.rank = rank; 345 | lop.cpu = cpu; 346 | lop.start = start; 347 | lop.end = end; 348 | lop.r = r; 349 | lop.g = g; 350 | lop.b = b; 351 | 352 | this->overheads.push_back(lop); 353 | 354 | } 355 | 356 | void TimelineDrawing::add_noise(int rank, uint64_t start, uint64_t end, int cpu, float r, float g, float b) { 357 | 358 | overh noise; 359 | noise.type = 4; 360 | noise.rank = rank; 361 | noise.cpu = cpu; 362 | noise.start = start; 363 | noise.end = end; 364 | noise.r = r; 365 | noise.g = g; 366 | noise.b = b; 367 | 368 | this->overheads.push_back(noise); 369 | 370 | } 371 | 372 | void TimelineDrawing::add_transmission(int source, int dest, uint64_t starttime, uint64_t endtime, int size, int G, float r, float g, float b) { 373 | 374 | trans tm; 375 | tm.source = source; 376 | tm.dest = dest; 377 | tm.starttime = starttime; 378 | tm.endtime = endtime; 379 | tm.size = size; 380 | tm.G = G; 381 | tm.r = r; 382 | tm.g = g; 383 | tm.b = b; 384 | 385 | this->transmissions.push_back(tm); 386 | 387 | std::stringstream os; 388 | os << "transmission " << source << " " << dest << " " << starttime << " "; 389 | os << endtime << " " << G << ";\n"; 390 | this->content.append(os.str()); 391 | } 392 | 393 | void TimelineDrawing::calc_arrowhead_coords(int sx, int sy, int dx, int dy, int *x1, int *y1, int *x2, int *y2) { 394 | 395 | double pi = 3.141592; 396 | double angle = atan2 (dy - sy, dx - sx) + pi; 397 | double arrowlength = 6*args_info.linethickness_arg; 398 | 399 | *x1 = dx + arrowlength * cos(angle - pi/12); 400 | *y1 = dy + arrowlength * sin(angle - pi/12); 401 | *x2 = dx + arrowlength * cos(angle + pi/12); 402 | *y2 = dy + arrowlength * sin(angle + pi/12); 403 | 404 | } 405 | 406 | -------------------------------------------------------------------------------- /src/liballprof2/gensem.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import re 4 | import clang.cindex 5 | import argparse 6 | from collections import defaultdict 7 | import yaml 8 | 9 | class AllprofCodegen: 10 | 11 | def __init__(self, libclang_path): 12 | self.libclang_path=libclang_path 13 | self.nodes = [] 14 | self.semantics = {} 15 | self.types = defaultdict(list) 16 | self.BLACKLISTED_FUNCTIONS = [ 17 | 'MPI_Comm_c2f', # this might be a macro i.e., in mpich 18 | 'MPI_Comm_f2c', # this might be a macro i.e., in mpich 19 | 'MPI_Group_f2c', # this might be a macro i.e., in mpich 20 | 'MPI_Group_c2f', # this might be a macro i.e., in mpich 21 | 'MPI_Win_f2c', # this might be a macro i.e., in mpich 22 | 'MPI_Win_c2f', # this might be a macro i.e., in mpich 23 | 'MPI_Type_f2c', # this might be a macro i.e., in mpich 24 | 'MPI_Type_c2f', # this might be a macro i.e., in mpich 25 | 'MPI_Errhandler_f2c', # this might be a macro i.e., in mpich 26 | 'MPI_Errhandler_c2f', # this might be a macro i.e., in mpich 27 | 'MPI_Request_f2c', # this might be a macro i.e., in mpich 28 | 'MPI_Request_c2f', # this might be a macro i.e., in mpich 29 | 'MPI_File_f2c', # this might be a macro i.e., in mpich 30 | 'MPI_File_c2f', # this might be a macro i.e., in mpich 31 | 'MPI_Info_f2c', # this might be a macro i.e., in mpich 32 | 'MPI_Info_c2f', # this might be a macro i.e., in mpich 33 | 'MPI_Message_f2c', # this might be a macro i.e., in mpich 34 | 'MPI_Message_c2f', # this might be a macro i.e., in mpich 35 | 'MPI_Op_f2c', # this might be a macro i.e., in mpich 36 | 'MPI_Op_c2f', # this might be a macro i.e., in mpich 37 | ] 38 | 39 | def get_count_for_param_in_func(self, param, func): 40 | # TODO minimize this 41 | mapping = {} 42 | GET_NDIMS_CART_COMM = "int ndims; PMPI_Cartdim_get(comm, &ndims);" 43 | GET_COMM_SIZE = "int rank, size; PMPI_Comm_size(comm, &size); PMPI_Comm_rank(comm, &rank);" 44 | GET_NEIGH_GRAPH_COMM = "int ideg, odeg, wted; PMPI_Dist_graph_neighbors_count(comm, &ideg, &odeg, &wted);" 45 | mapping[("MPI_Cart_create", "dims")] = "ndims" 46 | mapping[("MPI_Cart_create", "periods")] = "ndims" 47 | mapping[("MPI_Cart_map", "dims")] = "ndims" 48 | mapping[("MPI_Cart_map", "periods")] = "ndims" 49 | mapping[("MPI_Cart_rank", "coords")] = (GET_NDIMS_CART_COMM, "ndims") 50 | mapping[("MPI_Cart_sub", "remain_dims")] = (GET_NDIMS_CART_COMM, "ndims") 51 | mapping[("MPI_Dist_graph_create", "nodes")] = "n" 52 | mapping[("MPI_Dist_graph_create", "degrees")] = "n" 53 | mapping[("MPI_Dist_graph_create", "targets")] = "n" 54 | mapping[("MPI_Dist_graph_create", "weights")] = "n" 55 | mapping[("MPI_Dist_graph_create_adjacent", "sources")] = "indegree" 56 | mapping[("MPI_Dist_graph_create_adjacent", "sourceweights")] = "indegree" 57 | mapping[("MPI_Dist_graph_create_adjacent", "destinations")] = "outdegree" 58 | mapping[("MPI_Dist_graph_create_adjacent", "destweights")] = "outdegree" 59 | mapping[("MPI_Comm_spawn_multiple", "array_of_maxprocs")] = "count" 60 | mapping[("MPI_Graph_create", "index")] = "nnodes" 61 | mapping[("MPI_Graph_create", "edges")] = "index[nnodes-1]" 62 | mapping[("MPI_Graph_map", "index")] = "nnodes" 63 | mapping[("MPI_Graph_map", "edges")] = "index[nnodes-1]" 64 | mapping[("MPI_Group_excl", "ranks")] = "n" 65 | mapping[("MPI_Group_incl", "ranks")] = "n" 66 | mapping[("MPI_Group_translate_ranks", "ranks1")] = "n" 67 | mapping[("MPI_Type_create_darray", "gsize_array")] = "ndims" 68 | mapping[("MPI_Type_create_darray", "distrib_array")] = "ndims" 69 | mapping[("MPI_Type_create_darray", "darg_array")] = "ndims" 70 | mapping[("MPI_Type_create_darray", "psize_array")] = "ndims" 71 | mapping[("MPI_Type_create_hindexed", "array_of_blocklengths")] = "count" 72 | mapping[("MPI_Type_create_indexed_block", "array_of_displacements")] = "count" 73 | mapping[("MPI_Type_create_struct", "array_of_block_lengths")] = "count" 74 | mapping[("MPI_Type_create_subarray", "size_array")] = "ndims" 75 | mapping[("MPI_Type_create_subarray", "subsize_array")] = "ndims" 76 | mapping[("MPI_Type_create_subarray", "start_array")] = "ndims" 77 | mapping[("MPI_Type_indexed", "array_of_blocklengths")] = "count" 78 | mapping[("MPI_Type_indexed", "array_of_displacements")] = "count" 79 | mapping[("MPI_Allgatherv", "recvcounts")] = (GET_COMM_SIZE, "size") 80 | mapping[("MPI_Allgatherv", "displs")] = (GET_COMM_SIZE, "size") 81 | mapping[("MPI_Iallgatherv", "recvcounts")] = (GET_COMM_SIZE, "size") 82 | mapping[("MPI_Iallgatherv", "displs")] = (GET_COMM_SIZE, "size") 83 | mapping[("MPI_Alltoallv", "sendcounts")] = (GET_COMM_SIZE, "size") 84 | mapping[("MPI_Alltoallv", "sdispls")] = (GET_COMM_SIZE, "size") 85 | mapping[("MPI_Alltoallv", "recvcounts")] = (GET_COMM_SIZE, "size") 86 | mapping[("MPI_Alltoallv", "rdispls")] = (GET_COMM_SIZE, "size") 87 | mapping[("MPI_Ialltoallv", "sendcounts")] = (GET_COMM_SIZE, "size") 88 | mapping[("MPI_Ialltoallv", "sdispls")] = (GET_COMM_SIZE, "size") 89 | mapping[("MPI_Ialltoallv", "recvcounts")] = (GET_COMM_SIZE, "size") 90 | mapping[("MPI_Ialltoallv", "rdispls")] = (GET_COMM_SIZE, "size") 91 | mapping[("MPI_Alltoallw", "sendcounts")] = (GET_COMM_SIZE, "size") 92 | mapping[("MPI_Alltoallw", "sdispls")] = (GET_COMM_SIZE, "size") 93 | mapping[("MPI_Alltoallw", "sendtypes")] = (GET_COMM_SIZE, "size") 94 | mapping[("MPI_Alltoallw", "recvcounts")] = (GET_COMM_SIZE, "size") 95 | mapping[("MPI_Alltoallw", "rdispls")] = (GET_COMM_SIZE, "size") 96 | mapping[("MPI_Alltoallw", "recvtypes")] = (GET_COMM_SIZE, "size") 97 | mapping[("MPI_Ialltoallw", "sendcounts")] = (GET_COMM_SIZE, "size") 98 | mapping[("MPI_Ialltoallw", "sdispls")] = (GET_COMM_SIZE, "size") 99 | mapping[("MPI_Ialltoallw", "sendtypes")] = (GET_COMM_SIZE, "size") 100 | mapping[("MPI_Ialltoallw", "recvcounts")] = (GET_COMM_SIZE, "size") 101 | mapping[("MPI_Ialltoallw", "rdispls")] = (GET_COMM_SIZE, "size") 102 | mapping[("MPI_Ialltoallw", "recvtypes")] = (GET_COMM_SIZE, "size") 103 | mapping[("MPI_Cart_coords", "coords")] = "maxdims" 104 | mapping[("MPI_Cart_get", "dims")] = "maxdims" 105 | mapping[("MPI_Cart_get", "periods")] = "maxdims" 106 | mapping[("MPI_Cart_get", "coords")] = "maxdims" 107 | mapping[("MPI_Dist_graph_neighbors", "sources")] = "maxindegree" 108 | mapping[("MPI_Dist_graph_neighbors", "sourceweights")] = "maxindegree" 109 | mapping[("MPI_Dist_graph_neighbors", "destinations")] = "maxoutdegree" 110 | mapping[("MPI_Dist_graph_neighbors", "destweights")] = "maxoutdegree" 111 | mapping[("MPI_Comm_spawn", "argv")] = None 112 | mapping[("MPI_Comm_spawn", "array_of_errcodes")] = "maxprocs" 113 | mapping[("MPI_Comm_spawn_multiple", "array_of_commands")] = "count" # only at root 114 | mapping[("MPI_Comm_spawn_multiple", "array_of_argv")] = "count" # only at root 115 | mapping[("MPI_Comm_spawn_multiple", "array_of_info")] = "count" # only at root 116 | mapping[("MPI_Comm_spawn_multiple", "array_of_errcodes")] = "count" # only at root 117 | mapping[("MPI_Dims_create", "dims")] = "ndims" 118 | mapping[("MPI_Gatherv", "recvcounts")] = (GET_COMM_SIZE, "size") 119 | mapping[("MPI_Gatherv", "displs")] = (GET_COMM_SIZE, "size") 120 | mapping[("MPI_Igatherv", "recvcounts")] = (GET_COMM_SIZE, "size") 121 | mapping[("MPI_Igatherv", "displs")] = (GET_COMM_SIZE, "size") 122 | mapping[("MPI_Graph_get", "index")] = "maxindex" 123 | mapping[("MPI_Graph_get", "edges")] = "maxedges" 124 | mapping[("MPI_Graph_neighbors", "neighbors")] = "maxneighbors" 125 | mapping[("MPI_Group_range_excl", "ranges")] = "n" 126 | mapping[("MPI_Group_range_incl", "ranges")] = "n" 127 | mapping[("MPI_Group_translate_ranks", "ranks2")] = "n" 128 | mapping[("MPI_Neighbor_allgatherv", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 129 | mapping[("MPI_Neighbor_allgatherv", "displs")] = (GET_NEIGH_GRAPH_COMM, "ideg") 130 | mapping[("MPI_Ineighbor_allgatherv", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 131 | mapping[("MPI_Ineighbor_allgatherv", "displs")] = (GET_NEIGH_GRAPH_COMM, "ideg") 132 | mapping[("MPI_Neighbor_alltoallv", "sendcounts")] = (GET_NEIGH_GRAPH_COMM, "odeg") 133 | mapping[("MPI_Neighbor_alltoallv", "sdispls")] = (GET_NEIGH_GRAPH_COMM, "odeg") 134 | mapping[("MPI_Neighbor_alltoallv", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 135 | mapping[("MPI_Neighbor_alltoallv", "rdispls")] = (GET_NEIGH_GRAPH_COMM, "ideg") 136 | mapping[("MPI_Ineighbor_alltoallv", "sendcounts")] = (GET_NEIGH_GRAPH_COMM, "odeg") 137 | mapping[("MPI_Ineighbor_alltoallv", "sdispls")] = (GET_NEIGH_GRAPH_COMM, "odeg") 138 | mapping[("MPI_Ineighbor_alltoallv", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 139 | mapping[("MPI_Ineighbor_alltoallv", "rdispls")] = (GET_NEIGH_GRAPH_COMM, "ideg") 140 | mapping[("MPI_Neighbor_alltoallw", "sendcounts")] = (GET_NEIGH_GRAPH_COMM, "odeg") 141 | mapping[("MPI_Neighbor_alltoallw", "sdispls")] = (GET_NEIGH_GRAPH_COMM, "odeg") 142 | mapping[("MPI_Neighbor_alltoallw", "sendtypes")] = (GET_NEIGH_GRAPH_COMM, "odeg") 143 | mapping[("MPI_Neighbor_alltoallw", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 144 | mapping[("MPI_Neighbor_alltoallw", "rdispls")] = (GET_NEIGH_GRAPH_COMM, "ideg") 145 | mapping[("MPI_Neighbor_alltoallw", "recvtypes")] = (GET_NEIGH_GRAPH_COMM, "ideg") 146 | mapping[("MPI_Ineighbor_alltoallw", "sendcounts")] = (GET_NEIGH_GRAPH_COMM, "odeg") 147 | mapping[("MPI_Ineighbor_alltoallw", "sdispls")] = (GET_NEIGH_GRAPH_COMM, "odeg") 148 | mapping[("MPI_Ineighbor_alltoallw", "sendtypes")] = (GET_NEIGH_GRAPH_COMM, "odeg") 149 | mapping[("MPI_Ineighbor_alltoallw", "recvcounts")] = (GET_NEIGH_GRAPH_COMM, "ideg") 150 | mapping[("MPI_Ineighbor_alltoallw", "rdispls")] = (GET_NEIGH_GRAPH_COMM, "ideg") 151 | mapping[("MPI_Ineighbor_alltoallw", "recvtypes")] = (GET_NEIGH_GRAPH_COMM, "ideg") 152 | mapping[("MPI_Pack_external", "datarep")] = "strlen(datarep)" 153 | mapping[("MPI_Pack_external_size", "datarep")] = "strlen(datarep)" 154 | mapping[("MPI_Reduce_scatter", "recvcounts")] = (GET_COMM_SIZE, "size") 155 | mapping[("MPI_Ireduce_scatter", "recvcounts")] = (GET_COMM_SIZE, "size") 156 | mapping[("MPI_Scatterv", "sendcounts")] = (GET_COMM_SIZE, "(rank==root ? size : 0)") 157 | mapping[("MPI_Scatterv", "displs")] = (GET_COMM_SIZE, "(rank==root ? size : 0)") 158 | mapping[("MPI_Iscatterv", "sendcounts")] = (GET_COMM_SIZE, "(rank == root ? size : 0)") 159 | mapping[("MPI_Iscatterv", "displs")] = (GET_COMM_SIZE, "(rank == root ? size : 0)") 160 | mapping[("MPI_Startall", "array_of_requests")] = "count" 161 | mapping[("MPI_Testall", "array_of_requests")] = "count" 162 | mapping[("MPI_Testall", "array_of_statuses")] = "count" 163 | mapping[("MPI_Testany", "array_of_requests")] = "count" 164 | mapping[("MPI_Testsome", "array_of_requests")] = "incount" 165 | mapping[("MPI_Testsome", "array_of_indices")] = "*outcount" 166 | mapping[("MPI_Testsome", "array_of_statuses")] = "*outcount" 167 | mapping[("MPI_Type_create_hindexed_block", "array_of_displacements")] = "count" 168 | mapping[("MPI_Type_create_hindexed", "array_of_displacements")] = "count" 169 | mapping[("MPI_Type_create_struct", "array_of_displacements")] = "count" 170 | mapping[("MPI_Type_create_struct", "array_of_types")] = "count" 171 | mapping[("MPI_Type_get_contents", "array_of_integers")] = "max_integers" 172 | mapping[("MPI_Type_get_contents", "array_of_addresses")] = "max_addresses" 173 | mapping[("MPI_Type_get_contents", "array_of_datatypes")] = "max_datatypes" 174 | mapping[("MPI_Unpack_external", "datarep")] = "strlen(datarep)" 175 | mapping[("MPI_Waitall", "array_of_requests")] = "(array_of_statuses != MPI_STATUSES_IGNORE ? count : 0)" 176 | mapping[("MPI_Waitany", "array_of_requests")] = "count" 177 | mapping[("MPI_Waitsome", "array_of_requests")] = "incount" 178 | mapping[("MPI_Waitsome", "array_of_indices")] = "*outcount" 179 | mapping[("MPI_Waitsome", "array_of_statuses")] = "(array_of_statuses != MPI_STATUSES_IGNORE ? *outcount : 0)" 180 | if (func, param) not in mapping: 181 | print(f"Did not find mapping[(\"{func}\", \"{param}\")] = \"\"") 182 | return None 183 | else: 184 | r = mapping[(func, param)] 185 | if type(r) is tuple: 186 | return r 187 | else: 188 | return (None, r) 189 | 190 | def traverse_ast(self, node, depth=0, print_ast=False): 191 | if print_ast: 192 | print(' ' * depth + f'{node.kind} ({node.displayname})') 193 | if node.kind is clang.cindex.CursorKind.FUNCTION_DECL and re.match("MPI_.*", node.displayname) : 194 | self.nodes += [node] 195 | for child in node.get_children(): 196 | self.traverse_ast(child, depth + 1, print_ast) 197 | 198 | 199 | def semnatics_for_func(self, node): 200 | function_name = node.spelling 201 | return_type = node.result_type.spelling 202 | if function_name.startswith("MPI_T_") or (function_name in self.BLACKLISTED_FUNCTIONS): 203 | return 204 | self.semantics[function_name] = {} 205 | self.semantics[function_name]['return_type'] = return_type 206 | self.semantics[function_name]['params'] = [] 207 | 208 | for param_cursor in node.get_children(): 209 | param_dict = {} 210 | if param_cursor.kind != clang.cindex.CursorKind.PARM_DECL: 211 | continue 212 | param_type = param_cursor.type.spelling 213 | param_name = param_cursor.spelling 214 | param_dict['name'] = param_name 215 | param_dict['type'] = param_type 216 | if "[]" in param_type: 217 | prolog, varname = self.get_count_for_param_in_func(param=param_name, func=function_name) 218 | param_dict['elem_count'] = varname 219 | param_dict['prolog_elem_count'] = prolog 220 | param_dict['trace_each_elem'] = True 221 | self.semantics[function_name]['params'].append(param_dict) 222 | 223 | # clang doesn't work with varargs??? 224 | if function_name == "MPI_Pcontrol": 225 | param_dict = {} 226 | param_dict['name'] = "..." 227 | param_dict['type'] = "" 228 | self.semantics[function_name]['params'].append(param_dict) 229 | 230 | def process_func(self, node, mode): 231 | if mode == 'semantics': 232 | self.semnatics_for_func(node) 233 | 234 | 235 | def process_header(self, filename, mode): 236 | clang.cindex.Config.set_library_path(self.libclang_path) 237 | index = clang.cindex.Index.create() 238 | translation_unit = index.parse(filename) 239 | if not translation_unit: 240 | print("Error parsing the file.") 241 | return 242 | root_cursor = translation_unit.cursor 243 | self.traverse_ast(root_cursor) 244 | for node in self.nodes: 245 | self.process_func(node, mode) 246 | 247 | 248 | 249 | if __name__ == "__main__": 250 | parser = argparse.ArgumentParser( 251 | prog='liballprof_gencode', 252 | description='Generates wrappers for the MPI functions present in the supplied MPI header file. The wrappers output in liballprof2 trace format.', 253 | epilog='') 254 | parser.add_argument('-m', '--mpi-header', default="mpi.h", help="MPI header file to use as input (default: mpi.h)") 255 | parser.add_argument('-s', '--semantics-file', default='mpi_sem.yml', help="Name of the file that specifies the tracer semantics (default: mpi-sem.yml)") 256 | parser.add_argument('-l', '--libclang-path', default="", help="Path to libclang, if empty let clang python module guess. (default=\"\")") 257 | args = parser.parse_args() 258 | 259 | codegen = AllprofCodegen(libclang_path=args.libclang_path) 260 | codegen.outfile = open(args.semantics_file, "w") 261 | codegen.process_header(args.mpi_header, mode='semantics') 262 | codegen.outfile.write(yaml.dump(codegen.semantics)) 263 | codegen.outfile.close() 264 | --------------------------------------------------------------------------------