├── NEWS ├── TODO ├── ChangeLog ├── utils ├── AUTHORS ├── NEWS ├── README ├── ChangeLog ├── Makefile.defs ├── mf │ └── common.defs ├── mxml-2.6.tar.gz ├── make_mxml ├── util.cc ├── .cvsignore ├── Makefile.am ├── configure.ac ├── Makefile ├── write_cube.h ├── write_html.h ├── cubew │ └── lib │ │ ├── cartesian.h │ │ ├── Makefile │ │ ├── machine.h │ │ ├── node.h │ │ ├── thread.h │ │ ├── process.h │ │ ├── region.h │ │ ├── cnode.h │ │ └── vector.h ├── getopts.cc └── ipm_join ├── VERSION ├── include ├── config.h ├── memusage.h ├── ipm_env.h ├── machtopo.h ├── mod_procctrl.h ├── utest.h ├── jobdata.h ├── mod_pmon.h ├── ipm_types.h ├── mod_mpiio.h ├── mod_papi.h ├── ipm.h ├── mod_cublas.h ├── mod_cufft.h ├── mod_omptracepoints.h ├── mod_clustering.h ├── ipm_time.h ├── mod_posixio.h ├── base64.h ├── ipm_debug.h ├── ipm_sizes.h ├── calltable.h ├── ipm_core.h ├── ipm_introspect.h ├── ipm_modules.h ├── mod_callpath.h └── regstack.h ├── test ├── test.pomp-standalone │ ├── test.dat │ ├── Makefile │ ├── opari.rc │ ├── main.c.opari.inc │ ├── README │ ├── Makefile.am │ ├── opari_omp.h │ └── main.c ├── test.alltoall │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.bcast │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.comm │ ├── Makefile │ └── main.c ├── test.fcomm │ ├── Makefile │ └── main.f90 ├── test.fhello │ ├── Makefile │ ├── Makefile.am │ └── main.f ├── test.flow1 │ ├── Makefile │ └── main.c ├── test.forever │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.fork │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.fring │ ├── Makefile │ ├── Makefile.am │ └── main.f ├── test.gather │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.gatherv │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.hello │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.jacobi │ ├── Makefile │ ├── kraken.pbs.sh │ ├── Makefile.am │ └── lawrencium.pbs.sh ├── test.keyhist │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.mpiio │ ├── Makefile │ └── main.c ├── test.pcontrol │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.printenv │ ├── Makefile │ └── main.c ├── test.recurse │ ├── Makefile │ └── main.c ├── test.allgather │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.allgatherv │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.allreduce │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.alltoallv │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.comm_split │ ├── Makefile │ └── main.c ├── test.introspect │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.mpi_in_place │ ├── Makefile │ └── main.c ├── test.posixio-read │ ├── Makefile │ ├── main.c │ └── Makefile.am ├── test.simple_mpi │ ├── Makefile │ └── Makefile.am ├── test.status_ignore │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.posixio-helloworld │ ├── Makefile │ ├── main.c │ └── Makefile.am ├── test.mpi_in_place_f │ ├── Makefile │ └── main.f ├── test.cufft │ ├── Makefile │ └── main.cu ├── test.cuda │ ├── Makefile │ └── main.cu ├── test.omp │ ├── Makefile │ ├── Makefile.am │ └── main.c ├── test.status_ignore_f │ ├── Makefile.am │ └── main.f ├── test.pmon │ ├── Makefile.am │ └── pmon.c ├── test.zgemm │ └── Makefile ├── Makefile.am ├── test.python │ └── main.py ├── Makefile_f90 ├── Makefile_c └── run-test-suite.sh ├── Makefile.clean-local ├── Makefile.orig ├── doc ├── mod_deps.txt ├── ipm_todo.txt └── initialization.txt ├── bin ├── Makefile.am ├── hpcenv.pl ├── mpirun └── ipm_expand.pl ├── src ├── calltable.c ├── ipm_modules.c ├── ipm_synopsis.c ├── machtopo.c ├── mpi_finalize.c ├── mod_mpiio.c ├── mod_cufft.c ├── perfdata.c ├── mod_cublas.c ├── mpi_pcontrol.c ├── mod_callpath_evtgraph.c ├── memusage.c ├── mod_posixio.c ├── hashkey.c ├── ipm_introspect.c └── jobdata.c ├── etc ├── fake_cufft_c.c ├── fake_posixio_c.c ├── fake_cuda_c.c ├── fake_cublas_c.c ├── wrap_mpiio_f.c ├── Makefile.am ├── ipm_key_mem ├── ipm_key_cufft ├── wrap_mpi_f.c ├── wrap_mpiio_c.c ├── wrap_cufft_c.c └── wrap_cublas_c.c ├── AUTHORS ├── Makefile.am ├── README ├── .gitignore ├── m4 ├── ipm_mpistatuscount.m4 ├── ipm_hostdetection.m4 └── ipm_underscore.m4 ├── bootstrap.sh └── COPYING /NEWS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/AUTHORS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/NEWS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2.00 2 | -------------------------------------------------------------------------------- /utils/ChangeLog: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /include/config.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/Makefile.defs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/mf/common.defs: -------------------------------------------------------------------------------- 1 | CCC=gcc -------------------------------------------------------------------------------- /test/test.pomp-standalone/test.dat: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test.alltoall/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.bcast/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.comm/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.fcomm/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_f90 -------------------------------------------------------------------------------- /test/test.fhello/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_f90 -------------------------------------------------------------------------------- /test/test.flow1/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.forever/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.fork/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.fring/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_f90 -------------------------------------------------------------------------------- /test/test.gather/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.gatherv/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.hello/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.jacobi/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.keyhist/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.mpiio/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.pcontrol/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.printenv/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.recurse/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.allgather/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.allgatherv/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.allreduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.alltoallv/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.comm_split/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.introspect/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.mpi_in_place/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.posixio-read/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.simple_mpi/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.status_ignore/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.pomp-standalone/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.posixio-helloworld/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.mpi_in_place_f/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile_f90 2 | -------------------------------------------------------------------------------- /test/test.pomp-standalone/opari.rc: -------------------------------------------------------------------------------- 1 | 2 | 1 3 | main.c 4 | 1 1 4 5 | -------------------------------------------------------------------------------- /utils/mxml-2.6.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerscadmin/IPM/HEAD/utils/mxml-2.6.tar.gz -------------------------------------------------------------------------------- /Makefile.clean-local: -------------------------------------------------------------------------------- 1 | 2 | clean-local: 3 | -rm -f *keyhist.*.txt 4 | -rm -f compat.xml.map.txt 5 | -------------------------------------------------------------------------------- /test/test.cufft/Makefile: -------------------------------------------------------------------------------- 1 | CCC = nvcc 2 | SOURCE = main.cu 3 | LFLAGS = -lcufft 4 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.cuda/Makefile: -------------------------------------------------------------------------------- 1 | #CCC = nvcc -arch sm_20 2 | CCC = nvcc 3 | SOURCE = main.cu 4 | include ../Makefile_c -------------------------------------------------------------------------------- /test/test.omp/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS+=-fopenmp 2 | LFLAGS+=-fopenmp 3 | 4 | include ../Makefile_c 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Makefile.orig: -------------------------------------------------------------------------------- 1 | 2 | all : 3 | make -C src 4 | 5 | clean : 6 | make -C src clean 7 | rm -f *~ linkwrap.txt 8 | -------------------------------------------------------------------------------- /doc/mod_deps.txt: -------------------------------------------------------------------------------- 1 | required: mpi|upc|omp|pthreads 2 | 3 | optional: dependedncies in () 4 | 5 | machtopo 6 | hpm 7 | posixio 8 | mpiio (mpi) 9 | -------------------------------------------------------------------------------- /include/memusage.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MEMUSAGE_H_INCLUDED 3 | #define MEMUSAGE_H_INCLUDED 4 | 5 | int ipm_get_procmem(double *bytes); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /include/ipm_env.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef IPM_ENV_H_INCLUDED 3 | #define IPM_ENV_H_INCLUDED 4 | 5 | int ipm_get_env(void); 6 | 7 | #endif /* IPM_ENV_H_INCLUDED */ 8 | -------------------------------------------------------------------------------- /utils/make_mxml: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | tar -xzvf mxml-2.6.tar.gz 4 | cd mxml-2.6 5 | ./configure --prefix=$PWD/../mxml 6 | make 7 | make install 8 | cd .. 9 | 10 | -------------------------------------------------------------------------------- /include/machtopo.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOD_MACHTOPO_H_INCLUDED 3 | #define MOD_MACHTOPO_H_INCLUDED 4 | 5 | void ipm_get_machtopo(); 6 | 7 | #endif /* MOD_MACHTOPO_H_INCLUDED */ 8 | -------------------------------------------------------------------------------- /bin/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | ipmbindir = $(bindir) 3 | 4 | ipmbin_SCRIPTS = \ 5 | ipm_parse 6 | 7 | 8 | #EXTRA_DIST = $(ipmbin_DATA) 9 | EXTRA_DIST = ipm_parse make_wrappers 10 | -------------------------------------------------------------------------------- /test/test.pomp-standalone/main.c.opari.inc: -------------------------------------------------------------------------------- 1 | #include "pomp_lib.h" 2 | 3 | struct ompregdescr omp_rd_1 = { 4 | "parallel", "", 0, "main.c", 7, 7, 10, 10 5 | }; 6 | 7 | #define POMP_DLIST_00001 shared(omp_rd_1) 8 | 9 | -------------------------------------------------------------------------------- /test/test.posixio-helloworld/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | int main() 5 | { 6 | FILE *f; 7 | 8 | f = fopen("test.dat", "w"); 9 | fprintf(f, "Hello world\n"); 10 | fclose(f); 11 | return 0; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /include/mod_procctrl.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOD_PROCCTRL_H_INCLUDED 3 | #define MOD_PROCCTRL_H_INCLUDED 4 | 5 | #include "ipm_modules.h" 6 | 7 | int mod_procctrl_init(ipm_mod_t* mod, int flags); 8 | 9 | #endif /* MOD_PROCCTRL_H_INCLUDED */ 10 | -------------------------------------------------------------------------------- /test/test.jacobi/kraken.pbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/csh 2 | #PBS -A TG-ASC070021N 3 | #PBS -l walltime=00:30:00,size=64 4 | #PBS -j oe 5 | #PBS -N jacobi 6 | #PBS -o jacobi-$PBS_JOBID.out 7 | 8 | set echo 9 | cd $PBS_O_WORKDIR 10 | 11 | time aprun -n 64 ./jacobi 6400 100 8 8 12 | -------------------------------------------------------------------------------- /src/calltable.c: -------------------------------------------------------------------------------- 1 | 2 | #include "ipm_sizes.h" 3 | #include "calltable.h" 4 | 5 | 6 | ipm_call_t ipm_calltable[MAXSIZE_CALLTABLE]; 7 | 8 | void init_calltable() 9 | { 10 | int i; 11 | for( i=0; i 3 | 4 | int main() 5 | { 6 | char buf[80]; 7 | FILE *f; 8 | 9 | f = fopen("/dev/urandom", "r"); 10 | 11 | fread( buf, 80, 1, f ); 12 | fgetc( f ); 13 | fread( buf, 80, 1, f ); 14 | fgetc( f ); 15 | 16 | fclose(f); 17 | return 0; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /test/test.jacobi/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = jacobi jacobi.ipm 3 | 4 | JACOBI_SOURCES = jacobi.c 5 | 6 | CC = $(MPICC) 7 | 8 | jacobi_ipm_SOURCES = $(JACOBI_SOURCES) 9 | jacobi_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | jacobi_SOURCES = $(JACOBI_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.status_ignore_f/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = fhello fhello.ipm 3 | 4 | HELLO_SOURCES = main.f 5 | 6 | F77 = $(MPIF77) 7 | 8 | fhello_ipm_SOURCES = $(HELLO_SOURCES) 9 | fhello_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | fhello_SOURCES = $(HELLO_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.forever/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = forever forever.ipm 3 | 4 | FOREVER_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | forever_ipm_SOURCES = $(FOREVER_SOURCES) 9 | forever_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | forever_SOURCES = $(FOREVER_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.keyhist/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = keyhist keyhist.ipm 3 | 4 | KEYHIST_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | keyhist_ipm_SOURCES = $(KEYHIST_SOURCES) 9 | keyhist_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | keyhist_SOURCES = $(KEYHIST_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.pmon/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = pmon pmon.ipm 3 | 4 | PMON_SOURCES = pmon.c 5 | 6 | CC = $(MPICC) 7 | AM_CFLAGS= -Wall -Wextra 8 | pmon_ipm_SOURCES = $(PMON_SOURCES) 9 | 10 | pmon_ipm_LDADD = -L$(top_builddir)/src/.libs/ -lipm 11 | 12 | pmon_SOURCES = $(PMON_SOURCES) 13 | 14 | include $(top_srcdir)/Makefile.clean-local 15 | -------------------------------------------------------------------------------- /utils/util.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "ipm_parse.h" 6 | 7 | int IPM_DIAG(job_t *job, const char* format, ...) 8 | { 9 | int rv=0; 10 | va_list ap; 11 | 12 | if( !(job->quiet) ) { 13 | va_start (ap,format); 14 | vfprintf(stderr, format, ap); 15 | } 16 | 17 | return rv; 18 | } 19 | -------------------------------------------------------------------------------- /include/utest.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef UTEST_H_INCLUDED 3 | #define UTEST_H_INCLUDED 4 | 5 | #define VERIFY(myrank_, call_, bytes_, orank_, region_, count_) \ 6 | fprintf(stdout, "%03d.VERIFY: call=\"%s\" bytes=\"%d\" orank=\"%d\" region=\"%d\" count=\"%d\"\n", \ 7 | myrank_, call_, bytes_, orank_, region_, count_); 8 | 9 | 10 | #endif /* UTEST_H_INCLUDED */ 11 | -------------------------------------------------------------------------------- /test/test.fring/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = fring fring.ipm 3 | 4 | FRING_SOURCES = main.f 5 | 6 | F77 = $(MPIFC) 7 | 8 | fring_ipm_SOURCES = $(FRING_SOURCES) 9 | fring_ipm_LDADD = $(top_builddir)/src/.libs/libipmf.a $(top_builddir)/src/.libs/libipm.a 10 | 11 | fring_SOURCES = $(FRING_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.introspect/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = introspect.ipm 3 | 4 | INTROSPECT_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | introspect_ipm_SOURCES = $(INTROSPECT_SOURCES) 9 | introspect_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | introspect_SOURCES = $(INTROSPECT_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.pcontrol/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = pcontrol pcontrol.ipm 3 | 4 | PCONTROL_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | pcontrol_ipm_SOURCES = $(PCONTROL_SOURCES) 9 | pcontrol_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | pcontrol_SOURCES = $(PCONTROL_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | 15 | -------------------------------------------------------------------------------- /etc/fake_cufft_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | 9 | #include "cuda.h" 10 | #include "cufft.h" 11 | 12 | /** HEADER_END **/ 13 | 14 | 15 | __CRET__ __real___CFNAME__(__CPARAMS__) 16 | { 17 | #if __RETURN_VALUE__ 18 | return 0; 19 | #endif 20 | } 21 | 22 | -------------------------------------------------------------------------------- /test/test.fhello/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = fhello fhello.ipm 3 | 4 | HELLO_SOURCES = main.f 5 | 6 | F77 = $(MPIF77) 7 | 8 | fhello_ipm_SOURCES = $(HELLO_SOURCES) 9 | fhello_ipm_LDADD = $(top_builddir)/src/.libs/libipmf.a $(top_builddir)/src/.libs/libipm.a 10 | 11 | fhello_SOURCES = $(HELLO_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /test/test.simple_mpi/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = simple_mpi simple_mpi.ipm 3 | 4 | SIMPLE_MPI_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | simple_mpi_ipm_SOURCES = $(SIMPLE_MPI_SOURCES) 9 | simple_mpi_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | simple_mpi_SOURCES = $(SIMPLE_MPI_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | -------------------------------------------------------------------------------- /utils/.cvsignore: -------------------------------------------------------------------------------- 1 | ipm_path 2 | ipm_report 3 | ipm_parse 4 | *.o 5 | \#* 6 | *~ 7 | stamp-h1 8 | missing 9 | install-sh 10 | depcomp 11 | configure 12 | config.sub 13 | config.status 14 | config.log 15 | config.h.in 16 | config.h 17 | config.guess 18 | compile 19 | autom4te.cache 20 | aclocal.m4 21 | a.out.dSYM 22 | Makefile.in 23 | Makefile 24 | INSTALL 25 | .deps 26 | -------------------------------------------------------------------------------- /test/test.hello/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = hello hello.ipm 5 | 6 | HELLO_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | hello_ipm_SOURCES = $(HELLO_SOURCES) 11 | hello_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | 13 | hello_SOURCES = $(HELLO_SOURCES) 14 | 15 | include $(top_srcdir)/Makefile.clean-local 16 | -------------------------------------------------------------------------------- /test/test.posixio-read/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = posix-read posix-read.ipm 3 | 4 | POSIX_READ_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | posix_read_ipm_SOURCES = $(POSIX_READ_SOURCES) 9 | posix_read_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | posix_read_SOURCES = $(POSIX_READ_SOURCES) 12 | 13 | 14 | include $(top_srcdir)/Makefile.clean-local 15 | -------------------------------------------------------------------------------- /test/test.posixio-helloworld/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | CC = $(MPICC) 3 | 4 | 5 | bin_PROGRAMS = posixio-helloworld posixio-helloworld.ipm 6 | 7 | SOURCES = main.c 8 | 9 | posixio_helloworld_ipm_SOURCES = $(SOURCES) 10 | posixio_helloworld_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 11 | 12 | posixio_helloworld_SOURCES = $(SOURCES) 13 | 14 | include $(top_srcdir)/Makefile.clean-local 15 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | IPM was origionally developed and written by David Skinner 2 | 3 | Since version 0.810 significant coauthorship and contribution has been made by 4 | Nick Wright 5 | Noel Keen 6 | Karl Fuerlinger 7 | Sascha Hunold 8 | 9 | suggestions, contributions and collaborations welcome. 10 | -------------------------------------------------------------------------------- /etc/fake_posixio_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define MPI3CONST const 12 | 13 | /** HEADER_END **/ 14 | 15 | 16 | __CRET__ __real___CFNAME__(__CPARAMS__) 17 | { 18 | #if __RETURN_VALUE__ 19 | return 0; 20 | #endif 21 | } 22 | 23 | -------------------------------------------------------------------------------- /test/test.status_ignore/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = status_ignore status_ignore.ipm 3 | 4 | STATUS_IGNORE_SOURCES = main.c 5 | 6 | CC = $(MPICC) 7 | 8 | status_ignore_ipm_SOURCES = $(STATUS_IGNORE_SOURCES) 9 | status_ignore_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | 11 | status_ignore_SOURCES = $(STATUS_IGNORE_SOURCES) 12 | 13 | include $(top_srcdir)/Makefile.clean-local 14 | 15 | -------------------------------------------------------------------------------- /etc/fake_cuda_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | 9 | #include "cuda_runtime_api.h" 10 | #include "cuda.h" 11 | 12 | #define MPI3CONST const 13 | 14 | /** HEADER_END **/ 15 | 16 | 17 | __CRET__ __real___CFNAME__(__CPARAMS__) 18 | { 19 | #if __RETURN_VALUE__ 20 | return 0; 21 | #endif 22 | } 23 | 24 | -------------------------------------------------------------------------------- /etc/fake_cublas_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | 9 | #include "cuda.h" 10 | #include "cublas.h" 11 | 12 | #define MPI3CONST const 13 | 14 | /** HEADER_END **/ 15 | 16 | 17 | __CRET__ __real___CFNAME__(__CPARAMS__) 18 | { 19 | #if __RETURN_VALUE__ 20 | __CRET__ rv; 21 | return rv; 22 | #endif 23 | } 24 | 25 | -------------------------------------------------------------------------------- /include/jobdata.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | void ipm_get_job_id(char *id, int len); 4 | 5 | void ipm_get_job_user(char *user, int len); 6 | 7 | void ipm_get_job_allocation(char *allocation, int len); 8 | 9 | void ipm_get_mach_info(char *machi, int len); 10 | 11 | void ipm_get_mach_name(char *machn, int len); 12 | 13 | void ipm_get_exec_cmdline(char *cmdl, char *rpath); 14 | 15 | void ipm_get_exec_md5sum(char *md5sum, char *rpath); 16 | -------------------------------------------------------------------------------- /test/test.hello/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | int myrank, nprocs; 8 | 9 | MPI_Init( &argc, &argv ); 10 | 11 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 12 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 13 | 14 | fprintf(stderr, "Hello from rank %d of %d\n", 15 | myrank, nprocs ); 16 | 17 | MPI_Finalize(); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /src/ipm_modules.c: -------------------------------------------------------------------------------- 1 | 2 | #include "ipm_core.h" 3 | #include "ipm_sizes.h" 4 | #include "ipm_modules.h" 5 | 6 | 7 | ipm_mod_t modules[MAXNUM_MODULES]; 8 | 9 | void ipm_module_init(struct ipm_module *mod) 10 | { 11 | mod->state=STATE_NOTINIT; 12 | mod->init=0; 13 | mod->output=0; 14 | mod->finalize=0; 15 | mod->xml=0; 16 | mod->regfunc=0; 17 | mod->name=0; 18 | mod->ct_offs=0; 19 | mod->ct_range=0; 20 | } 21 | -------------------------------------------------------------------------------- /test/test.bcast/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = bcast bcast.ipm 5 | 6 | BCAST_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | bcast_ipm_SOURCES = $(BCAST_SOURCES) $(top_srcdir)/include/utest.h 11 | bcast_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | bcast_ipm_CFLAGS = -DUTEST 13 | 14 | bcast_SOURCES = $(BCAST_SOURCES) 15 | 16 | 17 | include $(top_srcdir)/Makefile.clean-local 18 | -------------------------------------------------------------------------------- /test/test.gather/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = gather gather.ipm 5 | 6 | GATHER_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | gather_ipm_SOURCES = $(GATHER_SOURCES) $(top_srcdir)/include/utest.h 11 | gather_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | gather_ipm_CFLAGS = -DUTEST 13 | 14 | gather_SOURCES = $(GATHER_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.gatherv/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = gatherv gatherv.ipm 5 | 6 | GATHERV_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | gatherv_ipm_SOURCES = $(GATHERV_SOURCES) $(top_srcdir)/include/utest.h 11 | gatherv_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | gatherv_ipm_CFLAGS = -DUTEST 13 | 14 | gatherv_SOURCES = $(GATHERV_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.alltoall/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = alltoall alltoall.ipm 5 | 6 | ALLTOALL_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | alltoall_ipm_SOURCES = $(ALLTOALL_SOURCES) $(top_srcdir)/include/utest.h 11 | alltoall_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | alltoall_ipm_CFLAGS = -DUTEST 13 | 14 | alltoall_SOURCES = $(ALLTOALL_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.allgather/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = allgather allgather.ipm 5 | 6 | ALLGATHER_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | allgather_ipm_SOURCES = $(ALLGATHER_SOURCES) $(top_srcdir)/include/utest.h 11 | allgather_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | allgather_ipm_CFLAGS = -DUTEST 13 | 14 | allgather_SOURCES = $(ALLGATHER_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.allreduce/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = allreduce allreduce.ipm 5 | 6 | ALLREDUCE_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | allreduce_ipm_SOURCES = $(ALLREDUCE_SOURCES) $(top_srcdir)/include/utest.h 11 | allreduce_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | allreduce_ipm_CFLAGS = -DUTEST 13 | 14 | allreduce_SOURCES = $(ALLREDUCE_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.alltoallv/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = alltoallv alltoallv.ipm 5 | 6 | ALLTOALLV_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | alltoallv_ipm_SOURCES = $(ALLTOALLV_SOURCES) $(top_srcdir)/include/utest.h 11 | alltoallv_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | alltoallv_ipm_CFLAGS = -DUTEST 13 | 14 | alltoallv_SOURCES = $(ALLTOALLV_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | -------------------------------------------------------------------------------- /test/test.pomp-standalone/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | bin_PROGRAMS = pomp pomp.ipm 3 | 4 | POMP_SOURCES = main.c opari_omp.h pomp_lib.h 5 | 6 | CC = $(MPICC) 7 | 8 | pomp_ipm_SOURCES = $(POMP_SOURCES) 9 | pomp_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 10 | pomp_ipm_CFLAGS =-fopenmp 11 | pomp_ipm_LFLAGS =-fopenmp 12 | 13 | pomp_SOURCES = $(POMP_SOURCES) 14 | pomp_CFLAGS =-fopenmp 15 | pomp_LFLAGS =-fopenmp 16 | 17 | 18 | include $(top_srcdir)/Makefile.clean-local 19 | -------------------------------------------------------------------------------- /test/test.zgemm/Makefile: -------------------------------------------------------------------------------- 1 | 2 | EXTRADEP=fortran_thunking.o 3 | 4 | 5 | 6 | #LFLAGS = -L$(MKL_LIBDIR) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 7 | #LFLAGS = $(CUDA_LIB64) -lcublas 8 | LFLAGS = $(CUDA_LIB64) -lcublas -L$(MKL_LIBDIR) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 9 | 10 | MPIF90OPTS=-mp 11 | 12 | include ../Makefile_f90 13 | 14 | fortran_thunking.o : fortran_thunking.c fortran.h 15 | nvcc -c $(CUDA_INCLUDE) fortran_thunking.c 16 | 17 | -------------------------------------------------------------------------------- /test/test.allgatherv/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | AM_CPPFLAGS = -I$(top_srcdir)/include 3 | 4 | bin_PROGRAMS = allgatherv allgatherv.ipm 5 | 6 | ALLGATHERV_SOURCES = main.c 7 | 8 | CC = $(MPICC) 9 | 10 | allgatherv_ipm_SOURCES = $(ALLGATHERV_SOURCES) $(top_srcdir)/include/utest.h 11 | allgatherv_ipm_LDADD = $(top_builddir)/src/.libs/libipm.a 12 | allgatherv_ipm_CFLAGS = -DUTEST 13 | 14 | allgatherv_SOURCES = $(ALLGATHERV_SOURCES) 15 | 16 | include $(top_srcdir)/Makefile.clean-local 17 | 18 | -------------------------------------------------------------------------------- /test/test.jacobi/lawrencium.pbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # specify the queue: lr_debug, lr_batch 3 | #PBS -q lr_regular 4 | # specify the account your job will be charged to 5 | ##PBS -A 6 | # specify number of nodes, processors per node and node property 7 | #PBS -l nodes=4:ppn=4:lr 8 | # specify output for STDERR and STDOUT 9 | #PBS -e job.err 10 | #PBS -o job.out 11 | #executable commands... 12 | # change to working directory 13 | 14 | cd $PBS_O_WORKDIR 15 | mpirun -np 64 ./jacobi.ipm 1000 1000 8 8 16 | -------------------------------------------------------------------------------- /include/mod_pmon.h: -------------------------------------------------------------------------------- 1 | #ifndef PMON_H 2 | #define PMON_H 3 | 4 | #include "ipm_modules.h" 5 | 6 | typedef struct ipm_pmon 7 | { 8 | double node_initial_energy; 9 | double node_final_energy; 10 | 11 | double cpu_initial_energy; 12 | double cpu_final_energy; 13 | 14 | double mem_initial_energy; 15 | double mem_final_energy; 16 | } ipm_pmon_t; 17 | 18 | int mod_pmon_init(ipm_mod_t* mod, int flags); 19 | double ipm_pmon_get_region_energy(int id); 20 | 21 | 22 | #endif //PMON_H 23 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | SUBDIRS = src bin etc utils 4 | 5 | tests : all 6 | cd test && $(MAKE) || exit 1 7 | 8 | .PHONY: tests 9 | 10 | tests-clean: 11 | cd test && $(MAKE) clean 12 | 13 | #clean-local: 14 | # cd test && $(MAKE) clean 15 | 16 | DIST_SUBDIRS = src bin etc test utils 17 | # EXTRA_DIST = utils/*.cc utils/configure* utils/*.h utils/Makefile.* 18 | EXTRA_DIST = utils/cubew/lib/*.c utils/cubew/lib/*.h utils/cubew/lib/Makefile utils/mf/common.defs utils/Makefile.defs 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /test/test.fhello/main.f: -------------------------------------------------------------------------------- 1 | program hello 2 | implicit none 3 | integer size, rank, ierr 4 | c integer request, provide 5 | include 'mpif.h' 6 | 7 | c request = MPI_THREAD_SINGLE 8 | 9 | c call pmpi_init_thread(request, provide, ierr) 10 | call mpi_init(ierr) 11 | call mpi_comm_size(MPI_COMM_WORLD, size, ierr) 12 | call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) 13 | 14 | write(6, "(2(a,i3))") " MPI: size = ", size, " rank = ", rank 15 | call mpi_finalize(ierr) 16 | end 17 | -------------------------------------------------------------------------------- /test/test.printenv/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | int main( int argc, char* argv[], char** envp ) 9 | { 10 | int myrank, nprocs; 11 | 12 | char **env; 13 | MPI_Init( &argc, &argv); 14 | 15 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 16 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 17 | 18 | for( env=envp; *env!=0; env++ ) 19 | { 20 | fprintf(stderr, "%d of %d:%s\n", myrank, nprocs, (*env)); 21 | } 22 | 23 | MPI_Finalize(); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Integrated Performance Monitoring 2 | ================================= 3 | 4 | Overview: 5 | 6 | Scalable, low-overhead performance profiling of application codes 7 | 8 | Reporting and analysis of performance profiles 9 | 10 | Portable & Open Source. 11 | 12 | 13 | Parallel 14 | ======== 15 | 16 | interupt_handler() 17 | 18 | main() { 19 | 20 | ipm_init() 21 | ... 22 | ipm_finalize() 23 | 24 | } 25 | 26 | Serial 27 | ====== 28 | 29 | atexit_handler() 30 | 31 | ipm_init() 32 | main() { 33 | code() 34 | } 35 | ipm_finalize() 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /test/test.pomp-standalone/opari_omp.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************/ 2 | /* OPARI Version 1.1 */ 3 | /* Copyright (C) 2001 */ 4 | /* Forschungszentrum Juelich, Zentralinstitut fuer Angewandte Mathematik */ 5 | /*************************************************************************/ 6 | 7 | #ifndef OPARI_OMP_H 8 | #define OPARI_OMP_H 9 | 10 | #ifdef _OPENMP 11 | #include 12 | #endif 13 | 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /utils/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | 3 | if ENABLE_PARSER 4 | 5 | AM_CPPFLAGS = -I../include -I./cubew/lib 6 | 7 | bin_PROGRAMS = ipm_parse 8 | 9 | ipm_parse_CXXFLAGS=-I./cubew/lib 10 | ipm_parse_LDFLAGS=-L./cubew/lib 11 | ipm_parse_LDADD=-lcubew3 12 | 13 | ipm_parse_SOURCES = ipm_parse.cc \ 14 | ipm_parse.h \ 15 | getopts.cc \ 16 | read_ipm.cc \ 17 | util.cc \ 18 | write_cube.cc \ 19 | write_cube.h \ 20 | write_html.cc \ 21 | write_html.h \ 22 | ../src/report_banner.c 23 | 24 | report_banner.o: ../src/report_banner.c 25 | $(CXX) $(CXXFLAGS) -I../include -c $< 26 | 27 | endif 28 | -------------------------------------------------------------------------------- /test/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | SUBDIRS = test.allgather \ 3 | test.allgatherv \ 4 | test.allreduce \ 5 | test.alltoall \ 6 | test.alltoallv \ 7 | test.bcast \ 8 | test.fhello \ 9 | test.forever \ 10 | test.fork \ 11 | test.fring \ 12 | test.gather \ 13 | test.gatherv \ 14 | test.hello \ 15 | test.jacobi \ 16 | test.keyhist \ 17 | test.pmon \ 18 | test.pcontrol \ 19 | test.posixio-helloworld \ 20 | test.posixio-read \ 21 | test.simple_mpi \ 22 | test.status_ignore 23 | 24 | dnl test.pomp-standalone (sahu: disabled for now) 25 | 26 | -------------------------------------------------------------------------------- /etc/wrap_mpiio_f.c: -------------------------------------------------------------------------------- 1 | 2 | /* ---- wrapping FFNAME ---- */ 3 | 4 | /* 5 | * 6 | */ 7 | 8 | #if 0 9 | 10 | FRET FFNAME(FPARAMS) 11 | { 12 | #if HAVE_CREQ /* HAVE _CREQ */ 13 | MPI_Request creq; 14 | #endif 15 | #if HAVE_CSTAT /* HAVE _CSTAT */ 16 | MPI_Status cstat; 17 | #endif 18 | 19 | *info=CFNAME(F2CARGS); 20 | 21 | #if HAVE_CSTAT /* HAVE _CSTAT */ 22 | if (*info==MPI_SUCCESS) 23 | MPI_Status_c2f(&cstat, status); 24 | #endif 25 | 26 | #if HAVE_CREQ /* HAVE _CREQ */ 27 | if( *info==MPI_SUCCESS ) 28 | *req=MPI_Request_c2f(creq); 29 | #endif 30 | } 31 | 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /etc/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | WRAPPER_TEMPLATES = $(wildcard $(srcdir)/fake_*.c $(srcdir)/wrap_*.c) 3 | WRAPPER_HELPER = $(wildcard $(srcdir)/ipm_key_*) 4 | 5 | #WRAPPER_TEMPLATES = \ 6 | fake_cublas_c.c fake_cuda_c.c fake_cufft_c.c fake_posixio_c.c \ 7 | wrap_cublas_c.c wrap_cufft_c.c wrap_mpi_f.c wrap_mpiio_f.c \ 8 | wrap_cuda_c.c wrap_mpi_c.c wrap_mpiio_c.c wrap_posixio_c.c 9 | #WRAPPER_HELPER = \ 10 | ipm_key_cublas ipm_key_cufft ipm_key_mpi ipm_key_posixio \ 11 | ipm_key_cuda ipm_key_mem ipm_key_mpiio 12 | 13 | EXTRA_DIST = $(WRAPPER_HELPER) $(WRAPPER_TEMPLATES) 14 | sysconf_DATA = $(WRAPPER_HELPER) 15 | 16 | 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | 4 | # Libraries 5 | *.lib 6 | *.a 7 | 8 | # Shared objects (inc. Windows DLLs) 9 | *.dll 10 | *.so 11 | *.so.* 12 | *.dylib 13 | 14 | # Executables 15 | *.exe 16 | *.out 17 | *.app 18 | 19 | # autotools 20 | Makefile.in 21 | m4/libtool.m4 22 | m4/libtool.m4_orig 23 | m4/ltoptions.m4 24 | m4/ltsugar.m4 25 | m4/ltversion.m4 26 | m4/lt~obsolete.m4 27 | missing 28 | aclocal.m4 29 | autom4te.cache 30 | compile 31 | config.guess 32 | config.h.in 33 | config.sub 34 | configure 35 | install-sh 36 | ltmain.sh 37 | INSTALL 38 | *~ 39 | 40 | # other generated files 41 | GEN.calltable_mpi.h 42 | GEN.fproto.mpi.h 43 | -------------------------------------------------------------------------------- /etc/ipm_key_mem: -------------------------------------------------------------------------------- 1 | # 2 | ##module MEM 3 | # 4 | #1|MEM_MALLOC_ID|void* malloc(size_t size)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 5 | #2|MEM_CALLOC_ID|void* calloc(size_t nmemb, size_t size)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 6 | #3|MEM_REALLOC_ID|void* realloc(void * ptr, size_t size)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 7 | 4|MEM_FREE_ID|void free(void * ptr)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 8 | #5|MEM_MMAP_ID|void* mmap(void * start, size_t length, int prot , int flags, int fd, off_t offset)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 9 | #6|MEM_MUNMAP_ID|int munmap(void * start, size_t length)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE 10 | -------------------------------------------------------------------------------- /include/ipm_types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TYPES_H_INCLUDED 3 | #define TYPES_H_INCLUDED 4 | 5 | #define IPM_ADDR_TYPE unsigned long long int 6 | #define IPM_ADDR_TYPEF "%p" 7 | 8 | /* 9 | #define IPM_COUNT_TYPE unsigned long long int 10 | #define IPM_COUNT_TYPEF "llu" 11 | #define IPM_COUNT_MPITYPE MPI_UNSIGNED_LONG_LONG 12 | */ 13 | 14 | #define IPM_COUNT_TYPE unsigned long int 15 | #define IPM_COUNT_TYPEF "lu" 16 | #define IPM_COUNT_MPITYPE MPI_UNSIGNED_LONG 17 | #define IPM_COUNT_MAX 4294967295UL 18 | 19 | #define IPM_RANK_TYPE int 20 | #define IPM_RANK_TYPEF "d" 21 | 22 | 23 | 24 | 25 | #endif /* TYPES_H_INCLUDED */ 26 | -------------------------------------------------------------------------------- /test/test.forever/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_DOUBLE 8 | #define ROOT 0 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i; 13 | int myrank, nprocs; 14 | char *buf; 15 | int dsize; 16 | 17 | MPI_Init( &argc, &argv ); 18 | 19 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 20 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 21 | PMPI_Type_size(DATATYPE, &dsize); 22 | 23 | buf=(char*)malloc(SIZE*dsize); 24 | 25 | while(1) /* forever */ 26 | { 27 | MPI_Bcast( buf, SIZE, DATATYPE, ROOT, MPI_COMM_WORLD ); 28 | } 29 | 30 | MPI_Finalize(); 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /test/test.pmon/pmon.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "mpi.h" 5 | 6 | int main(int argc, char* argv[]) { 7 | MPI_Init(&argc, &argv); 8 | 9 | MPI_Pcontrol(1, "all_sleep_calls"); 10 | MPI_Pcontrol(1, "first_2_sleeps"); 11 | MPI_Pcontrol(1, "sleep_5"); 12 | sleep(5); 13 | MPI_Pcontrol(-1, "sleep_5"); 14 | MPI_Pcontrol(1, "sleep_10"); 15 | sleep(10); 16 | MPI_Pcontrol(-1, "sleep_10"); 17 | MPI_Pcontrol(-1, "first_2_sleeps"); 18 | MPI_Pcontrol(1, "sleep_15"); 19 | sleep(15); 20 | MPI_Pcontrol(-1, "sleep_15"); 21 | MPI_Pcontrol(-1, "all_sleep_calls"); 22 | 23 | MPI_Finalize(); 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /include/mod_mpiio.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MPIIO_H_INCLUDED 3 | #define MPIIO_H_INCLUDED 4 | 5 | #include 6 | #include "ipm_modules.h" 7 | 8 | int mod_mpiio_init(ipm_mod_t* mod, int flags); 9 | 10 | typedef struct mpiiodata 11 | { 12 | double iotime; 13 | double iotime_e; 14 | } mpiiodata_t; 15 | 16 | #define IPM_MPIIO_RANK_NONE_C(rank_) rank_=IPM_MPI_RANK_NORANK; 17 | #define IPM_MPIIO_RANK_NONE_F(rank_) rank_=IPM_MPI_RANK_NORANK; 18 | 19 | extern mpiiodata_t mpiiodata[MAXNUM_REGIONS]; 20 | 21 | #define IPM_MPIIO_BYTES_NONE_C( bytes_ ) \ 22 | bytes_=0; 23 | 24 | #define IPM_MPIIO_BYTES_COUNT_DATATYPE_C( bytes_ ) \ 25 | { \ 26 | PMPI_Type_size(datatype, &bytes_); \ 27 | bytes_ *= count; \ 28 | } 29 | 30 | #endif /* MPIIO_H_INCLUDED */ 31 | -------------------------------------------------------------------------------- /utils/configure.ac: -------------------------------------------------------------------------------- 1 | 2 | AC_INIT([ipmparse], [1.0.0], [ipm-dev@nersc.gov]) 3 | AC_CONFIG_SRCDIR([ipm_parse.cc]) 4 | 5 | AC_CANONICAL_TARGET 6 | AM_INIT_AUTOMAKE 7 | 8 | AC_PROG_CXX 9 | 10 | #if test "x$CC" != "xgcc"; then 11 | # echo "CC=gcc required... giving up" 12 | # exit 1 13 | #fi 14 | 15 | AM_PROG_CC_C_O 16 | AC_CONFIG_HEADERS(config.h) 17 | 18 | AC_PROG_INSTALL 19 | 20 | if test ! -f "cubew/lib/libcubew3.a"; then 21 | echo "build libcubew3 in cubew/lib first!" 22 | exit 1 23 | else 24 | CXXFLAGS+=" -I./cubew/lib" 25 | LDFLAGS+=" -L./cubew/lib/" 26 | LIBS+=" -lcubew3" 27 | fi 28 | 29 | AC_CHECK_LIB(mxml, mxmlLoadFile, [], 30 | [ echo "To compile ipm_parse you need to have libmxml installed." ] 31 | ) 32 | 33 | AC_CONFIG_FILES([ 34 | Makefile 35 | ]) 36 | 37 | AC_OUTPUT 38 | 39 | 40 | -------------------------------------------------------------------------------- /test/test.allgather/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_DOUBLE 8 | #define REPEAT 10 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | int dsize; 16 | 17 | MPI_Init( &argc, &argv ); 18 | 19 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 20 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 21 | MPI_Type_size(DATATYPE, &dsize); 22 | 23 | sbuf=(char*)malloc(SIZE*dsize); 24 | rbuf=(char*)malloc(SIZE*dsize*nprocs); 25 | 26 | for( i=0; i 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_DOUBLE 8 | #define REPEAT 5 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | int dsize; 16 | 17 | MPI_Init( &argc, &argv ); 18 | 19 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 20 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 21 | MPI_Type_size(DATATYPE, &dsize); 22 | 23 | sbuf=(char*)malloc(SIZE*dsize*nprocs); 24 | rbuf=(char*)malloc(SIZE*dsize*nprocs); 25 | 26 | for( i=0; i 10 | #include 11 | 12 | 13 | int main( int argc, char* argv[]) 14 | { 15 | 16 | POMP_Parallel_fork(&omp_rd_1); 17 | #line 7 "main.c" 18 | #pragma omp parallel POMP_DLIST_00001 19 | { POMP_Parallel_begin(&omp_rd_1); 20 | #line 8 "main.c" 21 | { 22 | fprintf(stderr, "Thread %d of %d\n", 23 | omp_get_thread_num(), omp_get_num_threads()); 24 | if( omp_get_thread_num()==0 ) 25 | sleep(1); 26 | } 27 | POMP_Barrier_enter(&omp_rd_1); 28 | #pragma omp barrier 29 | POMP_Barrier_exit(&omp_rd_1); 30 | POMP_Parallel_end(&omp_rd_1); } 31 | POMP_Parallel_join(&omp_rd_1); 32 | #line 11 "main.c" 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /include/mod_papi.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef PAPI_H_INCLUDED 3 | #define PAPI_H_INCLUDED 4 | 5 | #include 6 | 7 | #include "ipm_modules.h" 8 | #include "ipm_sizes.h" 9 | 10 | #if PAPI_VERSION>=PAPI_VERSION_NUMBER(3,9,0,0) 11 | #else 12 | #define PAPI_COMPONENT_INDEX(evt_) 0 13 | #undef MAXNUM_PAPI_COMPONENTS 14 | #define MAXNUM_PAPI_COMPONENTS 1 15 | #endif 16 | 17 | typedef struct 18 | { 19 | int code; 20 | char name[MAXSIZE_PAPI_EVTNAME]; 21 | } ipm_papi_event_t; 22 | 23 | typedef struct 24 | { 25 | int evtset; 26 | int nevts; 27 | short ctr2evt[MAXNUM_PAPI_COUNTERS]; 28 | int domain; 29 | } ipm_papi_evtset_t; 30 | 31 | int mod_papi_init(ipm_mod_t *mod, int flags); 32 | 33 | int ipm_papi_read(long long *val); 34 | 35 | double ipm_papi_gflops(long long *ctr, double time); 36 | 37 | 38 | #endif /* PAPI_H_INCLUDED */ 39 | -------------------------------------------------------------------------------- /test/test.python/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from multiprocessing import Process 3 | import os 4 | import time 5 | 6 | def sleeper(name, seconds): 7 | print 'starting child process with id: ', os.getpid() 8 | print 'parent process:', os.getppid() 9 | print 'sleeping for %s ' % seconds 10 | time.sleep(seconds) 11 | print "Done sleeping" 12 | 13 | 14 | if __name__ == '__main__': 15 | print "in parent process (id %s)" % os.getpid() 16 | p = Process(target=sleeper, args=('bob', 5)) 17 | p.start() 18 | print "in parent process after child process start" 19 | print "parent process about to join child process" 20 | p.join() 21 | print "in parent process after child process join" 22 | print "parent process exiting with id ", os.getpid() 23 | print "The parent's parent process:", os.getppid() 24 | 25 | -------------------------------------------------------------------------------- /test/test.allreduce/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define SIZE 100 7 | #define DATATYPE MPI_BYTE 8 | #define REPEAT 5 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | char *svbuf, *rvbuf; 16 | int *scnt, *sdpl; 17 | int dsize; 18 | 19 | MPI_Init( &argc, &argv ); 20 | 21 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 22 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 23 | PMPI_Type_size(DATATYPE, &dsize); 24 | 25 | rbuf=(char*)malloc(SIZE*dsize); 26 | sbuf=(char*)malloc(SIZE*dsize); 27 | 28 | for( i=0; i 3 | #include 4 | #include 5 | 6 | #define REPEAT 10 7 | 8 | int main( int argc, char* argv[] ) 9 | { 10 | int i, res; 11 | int myrank, nprocs; 12 | MPI_Group g1; 13 | MPI_Comm com1, com2, com3; 14 | 15 | MPI_Init( &argc, &argv ); 16 | 17 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 18 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 19 | 20 | for( i=0; i 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ipm_parse.h" 11 | 12 | using std::map; 13 | using std::list; 14 | using std::vector; 15 | 16 | typedef struct 17 | { 18 | cube_t *cube; 19 | cube_machine *machine; 20 | 21 | // root metrics 22 | cube_metric *time; 23 | cube_metric *calls; 24 | 25 | // metrics for each func 26 | std::map tmetrics; 27 | std::map cmetrics; 28 | 29 | // cnodes for each region 30 | std::map cnodes; 31 | 32 | // thread for each rank 33 | std::vector threads; 34 | 35 | } cubedata_t; 36 | 37 | 38 | void write_cube_defs(FILE *f, job_t *job, cubedata_t *cd); 39 | 40 | #endif /* IPM_CUBE_H_INCLUDED */ 41 | -------------------------------------------------------------------------------- /test/test.gather/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | #define SIZE 10 8 | #define DATATYPE MPI_DOUBLE 9 | #define ROOT 1 10 | #define REPEAT 1 11 | 12 | int main( int argc, char* argv[] ) 13 | { 14 | int i, j; 15 | int myrank, nprocs; 16 | char *sbuf, *rbuf; 17 | int dsize; 18 | 19 | MPI_Init( &argc, &argv ); 20 | 21 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 22 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 23 | PMPI_Type_size(DATATYPE, &dsize); 24 | 25 | rbuf=0; 26 | if( myrank==ROOT ) { 27 | rbuf=(char*)malloc(SIZE*nprocs*dsize); 28 | } 29 | 30 | sbuf=(char*)malloc(SIZE*dsize); 31 | 32 | for( i=0; i 3 | #include 4 | #include 5 | 6 | 7 | #define SIZE 10 8 | #define DATATYPE MPI_DOUBLE 9 | #define ROOT 1 10 | #define REPEAT 1 11 | 12 | int main( int argc, char* argv[] ) 13 | { 14 | int i, j; 15 | int myrank, nprocs; 16 | char *sbuf, *rbuf; 17 | int dsize; 18 | 19 | MPI_Init( &argc, &argv ); 20 | 21 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 22 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 23 | PMPI_Type_size(DATATYPE, &dsize); 24 | 25 | rbuf=0; 26 | if( myrank==ROOT ) { 27 | rbuf=(char*)malloc(SIZE*nprocs*dsize); 28 | } 29 | 30 | sbuf=(char*)malloc(SIZE*dsize); 31 | 32 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | 7 | #define SIZE 10 8 | #define DATATYPE MPI_DOUBLE 9 | #define ROOT 0 10 | #define REPEAT 5 11 | 12 | int main( int argc, char* argv[] ) 13 | { 14 | int i; 15 | int myrank, nprocs; 16 | char *buf; 17 | int dsize; 18 | MPI_Status status; 19 | 20 | MPI_Init( &argc, &argv ); 21 | 22 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 23 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 24 | PMPI_Type_size(DATATYPE, &dsize); 25 | 26 | buf=(char*)malloc(SIZE*dsize); 27 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | int main() 11 | { 12 | pid_t childpid; 13 | int retval; 14 | int status; 15 | 16 | childpid = fork(); 17 | 18 | fopen("/dev/null", "r"); 19 | 20 | if (childpid >= 0) /* fork succeeded */ 21 | { 22 | if (childpid == 0) /* fork() returns 0 to the child process */ 23 | { 24 | fopen("/dev/null", "r"); 25 | 26 | fprintf(stderr, "CHILD: pid=%d\n", getpid()); 27 | abort(); 28 | sleep(1); /* sleep for 1 second */ 29 | 30 | 31 | } 32 | else /* fork() returns new pid to the parent process */ 33 | { 34 | fopen("/dev/null", "r"); 35 | fopen("/dev/null", "r"); 36 | fprintf(stderr, "PARENT: pid=%d\n", getpid()); 37 | sleep(1); 38 | } 39 | } 40 | else 41 | { 42 | perror("fork"); /* display error message */ 43 | exit(0); 44 | } 45 | return 0; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /utils/write_html.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WRITE_HTML_H_INCLUDED 3 | #define WRITE_HTML_H_INCLUDED 4 | 5 | #include 6 | #include "ipm_parse.h" 7 | 8 | extern char buf1[128]; 9 | extern char buf2[128]; 10 | 11 | char *print_time(char *, int, struct timeval tv); 12 | void write_html(FILE *f, job_t *job, banner_t *b); 13 | 14 | 15 | #define JOBNAME(job) basename(job->cmdline.c_str()) 16 | #define CMDPATH(job) (job->cmdline.c_str()) 17 | #define CODENAME(job) basename(job->cmdline.c_str()) 18 | #define MAXMEM(job,gs) gs.dsum 19 | #define USERNAME(job) (job->username.c_str()) 20 | #define MPI_TASKS(job) (job->ntasks) 21 | #define HOST(job) (job->hostname.c_str()) 22 | #define WALLCLOCK(job) "wallclock" 23 | #define GROUP(job) "group" 24 | #define COMM(job) "comm" 25 | #define STATE(job) "completed" 26 | #define GFLOP_SEC(job) "gflops" 27 | #define START(job) print_time(buf1, 128, job->start) 28 | #define STOP(job) print_time(buf2, 128, job->final) 29 | 30 | 31 | #endif /* WRITE_HTML_H_INCLUDED */ 32 | -------------------------------------------------------------------------------- /src/ipm_synopsis.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | from stat mech var(x) = -^2 [single pass] 4 | http://en.wikipedia.org/wiki/Computational_formula_for_the_variance 5 | http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 6 | */ 7 | 8 | MPI_Op ipm_SUM_OP; 9 | MPI_Op ipm_MIN_OP; 10 | MPI_Op ipm_MAX_OP; 11 | 12 | 13 | /* globally synchronizing collective */ 14 | int ipm_synopsis(void *sendbuf, void *recvbuf, int dim, int *N, int *n, char *datatype, char *op, int root) { 15 | int rv; 16 | int size, rank; 17 | int W[3]; 18 | 19 | if(dim <= 0 || dim > 3) { 20 | printf("ipm_synopsis failed dim="%d,dim); exit(1); 21 | } 22 | 23 | for(i=0;i<3;i++) { 24 | if(dim >= 1) { 25 | if(n[i] <= i) { printf("ipm_synopsis failed n[%d]="%d,n[%d],i,i); exit(1); } 26 | W[i] = N[i]/n[i]; 27 | if(W[i]*n[i] != N[i]) { 28 | printf("ipm_synopsis failed N[%d]\%n[%d] != 0",i,i); exit(1); 29 | } 30 | } 31 | 32 | #ifdef HAVE_MPI 33 | PMPI_Comm_size( MPI_COMM_WORLD, &size); 34 | PMPI_Comm_rank( MPI_COMM_WORLD, &rank); 35 | if(!rank) { 36 | } else { 37 | PMPI_Send( 38 | } 39 | #endif 40 | 41 | return rv; 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /test/test.mpiio/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define SIZE 10 8 | #define DATATYPE MPI_DOUBLE 9 | #define ROOT 0 10 | #define REPEAT 5 11 | #define FILENAME "test.out" 12 | 13 | int main( int argc, char* argv[] ) 14 | { 15 | int i, myrank, nprocs; 16 | char *buf; 17 | int dsize; 18 | 19 | MPI_File fh; 20 | MPI_Status status; 21 | MPI_Info info; 22 | 23 | MPI_Init( &argc, &argv ); 24 | 25 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 26 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 27 | MPI_Type_size(DATATYPE, &dsize); 28 | 29 | buf=(char*)malloc(SIZE*dsize); 30 | for( i=0; i 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_BYTE 8 | #define REPEAT 5 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | int *rcnt, *rdpl, rsize; 16 | int dsize; 17 | 18 | MPI_Init( &argc, &argv ); 19 | 20 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 21 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 22 | MPI_Type_size(DATATYPE, &dsize); 23 | 24 | rcnt = (int*) malloc(nprocs*(sizeof(int))); 25 | rdpl = (int*) malloc(nprocs*(sizeof(int))); 26 | 27 | for( i=0; i 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_DOUBLE 8 | #define ROOT 1 9 | #define REPEAT 5 10 | 11 | int main( int argc, char* argv[] ) 12 | { 13 | int i, j; 14 | int myrank, nprocs; 15 | char *sbuf, *rbuf; 16 | int *rcnt, *rdpl; 17 | int dsize; 18 | 19 | MPI_Init( &argc, &argv ); 20 | 21 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 22 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 23 | PMPI_Type_size(DATATYPE, &dsize); 24 | 25 | rbuf=0; 26 | rcnt=0; rdpl=0; 27 | if( myrank==ROOT ) 28 | { 29 | rbuf=(char*)malloc(SIZE*dsize * ((nprocs*(nprocs+1))/2+nprocs) ); 30 | rcnt=(int*) malloc(sizeof(int)*nprocs); 31 | rdpl=(int*) malloc(sizeof(int)*nprocs); 32 | 33 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/ipm_time.h" 8 | 9 | #define INNER 10 10 | #define OUTER 50 11 | 12 | int main( int argc, char* argv[] ) 13 | { 14 | int i, j; 15 | int myrank, nprocs; 16 | char buf[256]; 17 | 18 | MPI_Init( &argc, &argv); 19 | 20 | PMPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 21 | PMPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 22 | 23 | MPI_Bcast(buf, 1, MPI_BYTE, 0, MPI_COMM_WORLD); 24 | 25 | for( i=0; i> ./$TEST_NAME.c < 19 | #include 20 | #include 21 | int main(int argc, char **argv) { 22 | MPI_Status s; 23 | s.$tag = 0; 24 | return 0; 25 | } 26 | EOF 27 | 28 | cat >> ./run <> $CONFIG_LOG 40 | ./run >> $CONFIG_LOG 2>&1 41 | if test $? == 0 ; then 42 | # echo "yes" 43 | MPI_STATUS_COUNT=$tag 44 | break 45 | # else 46 | # echo "no" 47 | fi 48 | cd $BUILD_ROOT 49 | done 50 | 51 | if test "x$MPI_STATUS_COUNT" == "x" ; then 52 | AC_MSG_RESULT( unknown ) 53 | exit 1 54 | else 55 | AC_MSG_RESULT( $MPI_STATUS_COUNT ) 56 | AC_SUBST(IPM_MPISTATUSCOUNT, $MPI_STATUS_COUNT) 57 | fi 58 | 59 | cd $CWD 60 | 61 | ]) 62 | -------------------------------------------------------------------------------- /bin/hpcenv.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $hostname = `hostname`; 4 | $os = `uname -s`; 5 | $os =~ s/\n//g; 6 | 7 | if( $os eq "Linux" ) { 8 | $res = `grep CentOS /etc/issue`; 9 | if( $res =~ /CentOS/ ) { 10 | $os = "Linux-CentOS"; 11 | } 12 | } 13 | 14 | $system = "$os"; 15 | 16 | if( $hostname =~ /^nid\d\d\d\d\d/ ) { 17 | $system = "Franklin"; 18 | } 19 | if( $hostname =~ /^hopper/ ) { 20 | $system = "Hopper"; 21 | } 22 | if( $hostname =~ /^kraken.*/ ) { 23 | $system = "Kraken"; 24 | } 25 | if( $hostname =~ /ranger/ ) { 26 | $system = "Ranger"; 27 | } 28 | if( $hostname =~ /^n\d\d\d\d.scs\d\d/ ) { 29 | $system = "Lawrencium"; 30 | } 31 | if( $hostname =~ /^a01/ ) { 32 | $system = "hlrb2"; 33 | } 34 | if( $hostname =~ /ipm2dev/ ) { 35 | $system = "ipm2dev"; 36 | } 37 | if( $hostname =~ /turing/ ) { 38 | $system = "turing"; 39 | } 40 | if( $hostname =~ /tesla/ ) { 41 | $system = "tesla"; 42 | } 43 | if( $hostname =~ /cvrsvc/ ) { 44 | $system = "carver"; 45 | } 46 | if( $hostname =~ /salzburg/ ) { 47 | $system = "salzburg"; 48 | } 49 | if( $hostname =~ /^atv/ ) { 50 | $system = "atvcluster"; 51 | } 52 | if( $hostname =~ /^lx/ ) { 53 | $system = "LRZ_Linux_Cluster"; 54 | } 55 | if( $hostname =~ /cloud02/ ) { 56 | $system = "LMU"; 57 | } 58 | 59 | 60 | 61 | 62 | print "$system\n"; 63 | -------------------------------------------------------------------------------- /test/test.bcast/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/ipm_time.h" 8 | 9 | 10 | #define SIZE 10 11 | #define DATATYPE MPI_DOUBLE 12 | #define ROOT 0 13 | #define REPEAT 1 14 | 15 | int main( int argc, char* argv[] ) 16 | { 17 | int i; 18 | int myrank, nprocs; 19 | char *buf; 20 | int dsize; 21 | double t1, t2; 22 | 23 | int req, prov; 24 | req = MPI_THREAD_SINGLE; 25 | 26 | MPI_Init( &argc, &argv); 27 | 28 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 29 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 30 | PMPI_Type_size(DATATYPE, &dsize); 31 | 32 | buf=(char*)malloc(SIZE*dsize); 33 | 34 | IPM_TIMESTAMP(t1); 35 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/ipm_time.h" 8 | 9 | void fibsender(int myrank, int n); 10 | void fibreceiver(int myrank, int n); 11 | 12 | 13 | #define REPEAT 1 14 | 15 | int main( int argc, char* argv[] ) 16 | { 17 | int myrank, nprocs; 18 | 19 | 20 | MPI_Init( &argc, &argv); 21 | 22 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 23 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 24 | 25 | if( nprocs%2 ) { 26 | fprintf(stderr, "Use even number of procs!\n"); 27 | exit(1); 28 | } 29 | 30 | fibsender(myrank, 5); 31 | fibreceiver(myrank, 5); 32 | 33 | MPI_Finalize(); 34 | return 0; 35 | } 36 | 37 | 38 | void fibsender(int myrank, int n) { 39 | char buf[1]; 40 | 41 | if( n==0 ) return; 42 | 43 | if( n==1 ) { 44 | MPI_Send( &buf, 1, MPI_BYTE, myrank+1, 33, MPI_COMM_WORLD ); 45 | } 46 | else { 47 | fibsender(myrank, n-1); 48 | fibsender(myrank, n-2); 49 | } 50 | } 51 | 52 | void fibreceiver(int myrank, int n) { 53 | char buf[1]; 54 | MPI_Status stat; 55 | 56 | if( n==0 ) return; 57 | 58 | if( n==1 ) { 59 | MPI_Recv( &buf, 1, MPI_BYTE, myrank-1, 33, MPI_COMM_WORLD, &stat ); 60 | } 61 | else { 62 | fibsender(myrank, n-1); 63 | fibsender(myrank, n-2); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /test/test.comm_split/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define SIZE 10 7 | #define DATATYPE MPI_DOUBLE 8 | #define REPEAT 10 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | int dsize; 16 | MPI_Comm newcomm; 17 | MPI_Request request; 18 | int newrank, newnprocs; 19 | 20 | MPI_Init( &argc, &argv ); 21 | 22 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 23 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 24 | MPI_Type_size(DATATYPE, &dsize); 25 | 26 | sbuf=(char*)malloc(SIZE*dsize); 27 | rbuf=(char*)malloc(SIZE*dsize); 28 | 29 | /* Create a new communicator for odd and even ranks */ 30 | MPI_Comm_split( MPI_COMM_WORLD, myrank%2, myrank, &newcomm ); 31 | MPI_Comm_rank( newcomm, &newrank ); 32 | MPI_Comm_size( newcomm, &newnprocs ); 33 | 34 | /* 35 | fprintf(stderr, "Old: %d/%d, new: %d/%d\n", myrank, nprocs, 36 | newrank, newnprocs); 37 | */ 38 | 39 | 40 | if( (myrank%2) ) { 41 | MPI_Isend(sbuf, SIZE, DATATYPE, (newrank+1)%newnprocs, 33, newcomm, &request); 42 | MPI_Recv(rbuf, SIZE, DATATYPE, (newrank-1+newnprocs)%newnprocs, 33, newcomm, MPI_STATUS_IGNORE); 43 | MPI_Wait(&request, MPI_STATUS_IGNORE); 44 | } 45 | 46 | 47 | for( i=0; i verify.txt 61 | ipm_verify.pl verify.txt 62 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | usage () { 6 | echo "Usage: $0 [--verbose] [--help]" 7 | echo " --verbose : display autoreconf warnings" 8 | echo " --help : display this message" 9 | } 10 | 11 | # parse args 12 | warnings="--warnings=none" 13 | if [ $# -eq 1 ]; then 14 | if [ "$1" == "--verbose" ]; then 15 | warnings="" 16 | elif [ "$1" == "--help" ]; then 17 | usage 18 | exit 0 19 | else 20 | usage 21 | exit 1 22 | fi 23 | elif [ $# -ne 0 ]; then 24 | usage 25 | exit 1 26 | fi 27 | 28 | # clean up earlier configuration 29 | if [ -f Makefile ]; then 30 | make distclean 31 | fi 32 | 33 | # initial sweep - install missing files, overwriting previous state 34 | autoreconf --install --force $warnings 35 | 36 | # check whether we can find the insertion point for our libtool.m4 bugfix 37 | if ! grep -q "\-L\* | \-R\* | \-l\*)" m4/libtool.m4; then 38 | echo "error: could not find target key for patching libtool.m4 - please report this to the IPM developers along with your m4/libtool.m4" 39 | exit 1 40 | fi 41 | 42 | # apply the fix 43 | mv m4/libtool.m4 m4/libtool.m4_orig 44 | awk '{ print $0 } /-L\* \| -R\* \| -l\*\)/ { printf("\n\t# Some compilers *also* place space between \"-l\" and the library name.\n\t# Remove the space.\n\tif test $p = \"-l\"; then prev=$p; continue; fi\n\n"); }' m4/libtool.m4_orig > m4/libtool.m4 45 | 46 | # autoreconf once more (no --force) to assimilate libtool.m4 changes 47 | autoreconf $warnings 48 | -------------------------------------------------------------------------------- /test/test.cufft/main.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define NX 256 9 | #define BATCH 10 10 | 11 | int main(int argc, char *argv[]) 12 | { 13 | cufftHandle plan; 14 | cufftComplex *devPtr; 15 | cufftComplex data[NX*BATCH]; 16 | int i; 17 | 18 | 19 | /* source data creation */ 20 | for(i= 0 ; i < NX*BATCH ; i++){ 21 | data[i].x = 1.0f; 22 | data[i].y = 1.0f; 23 | } 24 | 25 | /* GPU memory allocation */ 26 | cudaMalloc((void**)&devPtr, sizeof(cufftComplex)*NX*BATCH); 27 | 28 | /* transfer to GPU memory */ 29 | cudaMemcpy(devPtr, data, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyHostToDevice); 30 | 31 | /* creates 1D FFT plan */ 32 | cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH); 33 | 34 | /* executes FFT processes */ 35 | cufftExecC2C(plan, devPtr, devPtr, CUFFT_FORWARD); 36 | 37 | /* executes FFT processes (inverse transformation) */ 38 | cufftExecC2C(plan, devPtr, devPtr, CUFFT_INVERSE); 39 | 40 | /* transfer results from GPU memory */ 41 | cudaMemcpy(data, devPtr, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyDeviceToHost); 42 | 43 | /* deletes CUFFT plan */ 44 | cufftDestroy(plan); 45 | 46 | /* frees GPU memory */ 47 | cudaFree(devPtr); 48 | 49 | for(i = 0 ; i < NX*BATCH ; i++){ 50 | printf("data[%d] %f %f\n", i, data[i].x, data[i].y); 51 | } 52 | 53 | return 0; 54 | } 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /test/test.alltoallv/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define SIZE 1 7 | #define DATATYPE MPI_BYTE 8 | #define REPEAT 1 9 | 10 | int main( int argc, char* argv[] ) 11 | { 12 | int i, j; 13 | int myrank, nprocs; 14 | char *sbuf, *rbuf; 15 | int *scnt, *rcnt; 16 | int *sdpl, *rdpl; 17 | int dsize; 18 | int ssize, rsize; 19 | 20 | MPI_Init( &argc, &argv ); 21 | 22 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 23 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 24 | MPI_Type_size(DATATYPE, &dsize); 25 | 26 | scnt = malloc( sizeof(int)*nprocs ); 27 | sdpl = malloc( sizeof(int)*nprocs ); 28 | rcnt = malloc( sizeof(int)*nprocs ); 29 | rdpl = malloc( sizeof(int)*nprocs ); 30 | 31 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | 7 | #define REPEAT 1 8 | //#define IPM 1 9 | 10 | void do_some_flops() 11 | { 12 | int i, j; 13 | double a; 14 | 15 | a=1.01; 16 | for(i=1; i<1000; i++) { 17 | for(j=1; j<1000; j++ ) { 18 | a+=(double)(i)+(double)(i)/(double)(j); 19 | } 20 | } 21 | 22 | fprintf(stdout, "%5.3f\n", a); 23 | } 24 | 25 | 26 | void func_with_omp() 27 | { 28 | MPI_Pcontrol(1, "func_with_omp"); 29 | 30 | do_some_flops(); 31 | 32 | /* manually call the trace functions if not 33 | using the PGI compiler */ 34 | #if defined(IPM) && !defined(__PGI) 35 | _mp_trace_parallel_enter(); 36 | #endif 37 | #pragma omp parallel 38 | { 39 | #if defined(IPM) && !defined(__PGI) 40 | _mp_trace_parallel_begin(); 41 | #endif 42 | 43 | if(omp_get_thread_num()==0) { 44 | sleep(1.0); 45 | } 46 | #if defined(IPM) && !defined(__PGI) 47 | _mp_trace_parallel_end(); 48 | #endif 49 | } 50 | #if defined(IPM) && !defined(__PGI) 51 | _mp_trace_parallel_exit(); 52 | #endif 53 | 54 | MPI_Pcontrol(-1, "func_with_omp"); 55 | } 56 | 57 | 58 | int main( int argc, char* argv[] ) 59 | { 60 | int i; 61 | int myrank, nprocs; 62 | 63 | MPI_Init( &argc, &argv ); 64 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 65 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 66 | 67 | for( i=0; i 3 | #include 4 | #include 5 | #include 6 | 7 | #define SIZE 10 8 | #define DATATYPE MPI_DOUBLE 9 | #define ROOT 0 10 | #define REPEAT 10 11 | 12 | void xxx() 13 | { 14 | MPI_Pcontrol(1, "xxx"); 15 | MPI_Pcontrol(-1, "xxx"); 16 | } 17 | 18 | void foo() 19 | { 20 | int nprocs, i; 21 | MPI_Pcontrol(1, "foo"); 22 | 23 | for( i=0; i<1000; i++ ) { 24 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs); 25 | } 26 | 27 | MPI_Pcontrol(-1, "foo"); 28 | } 29 | 30 | void bar1() 31 | { 32 | int myrank, i; 33 | MPI_Pcontrol(1, "bar1"); 34 | foo(); 35 | 36 | for( i=0; i<10; i++ ) { 37 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 38 | } 39 | 40 | MPI_Pcontrol(-1, "bar1"); 41 | } 42 | 43 | void bar2() 44 | { 45 | int myrank, i; 46 | MPI_Pcontrol(1, "bar2"); 47 | foo(); 48 | 49 | for( i=0; i<20; i++ ) { 50 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 51 | } 52 | 53 | MPI_Pcontrol(-1, "bar2"); 54 | } 55 | 56 | 57 | 58 | int main( int argc, char* argv[] ) 59 | { 60 | int i; 61 | int myrank, nprocs; 62 | char *buf; 63 | int dsize; 64 | double a=1.01; 65 | 66 | MPI_Init( &argc, &argv ); 67 | 68 | MPI_Pcontrol(1, "main"); 69 | 70 | sleep(1.0); 71 | 72 | 73 | 74 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 75 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 76 | PMPI_Type_size(DATATYPE, &dsize); 77 | 78 | bar1(); 79 | bar2(); 80 | 81 | MPI_Pcontrol(-1, "main"); 82 | 83 | MPI_Finalize(); 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /bin/mpirun: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # an mpirun wrapper script, most likely will need to be tailored to the 4 | # software (modules, softenv, etc) environment on your machine 5 | 6 | # the key premise of this script is that it's put in place first in the path 7 | # it thus co-opts mpirun in the user batch script and finds the _next_ 8 | # mpirun in the path, which it uses as the real mpirun in turn 9 | 10 | 11 | # this script is based on the use case where you want to implicitly run with 12 | # IPM on across all or some of the users on a machine, i.e., always-on 13 | # profiling w/o need to change batch scripts 14 | 15 | # IPM_ON -> default run with (1) or without (0) IPM 16 | # EXEMPT -> a exception list of users who get IPM_ON=0 17 | # ACTIVE -> a exception list of users who get IPM_ON=1 18 | 19 | # questions -> deskinner@lbl.gov 20 | 21 | REAL_MPIRUN=`/usr/bin/which -a mpirun | awk '(NR==2)'` 22 | 23 | if [ "$REAL_MPIRUN" = "" ]; then 24 | REAL_MPIRUN=/usr/common/usg/openmpi/default/pgi/bin/mpirun 25 | fi 26 | 27 | # default 28 | IPM_ON=0 29 | 30 | # skip these users 31 | EXEMPT=(waldo betty germaine) 32 | 33 | # always profile these users 34 | ACTIVE=(dskinner horus betty) 35 | 36 | for name in ${EXEMPT[@]} 37 | do 38 | if [ "$USER" = "$name" ]; then 39 | IPM_ON=0 40 | fi 41 | done 42 | 43 | #for name in ${ACTIVE[@]} 44 | #do 45 | # if [ "$USER" = "$name" ]; then 46 | # IPM_ON=1 47 | # fi 48 | #done 49 | 50 | if [ $IPM_ON = 1 ] ; then 51 | $REAL_MPIRUN -x LD_PRELOAD=/usr/common/usg/ipm/magellan/lib/libipm.so $* 52 | else 53 | $REAL_MPIRUN $* 54 | fi 55 | 56 | -------------------------------------------------------------------------------- /include/ipm_time.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef IPM_TIME_H_INCLUDED 3 | #define IPM_TIME_H_INCLUDED 4 | 5 | #include 6 | #include 7 | 8 | extern double ipm_seconds_per_tick; 9 | 10 | 11 | 12 | #define IPM_TIMEVAL( tv_ ) \ 13 | ((tv_.tv_sec)+(tv_.tv_usec)*1.0e-6) 14 | 15 | #ifdef HAVE_RDTSC 16 | 17 | #define IPM_TIMESTAMP( time_ ) \ 18 | { \ 19 | unsigned int low, high; \ 20 | unsigned long long ticks; \ 21 | __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)); \ 22 | ticks = high; \ 23 | ticks = ticks<<32; \ 24 | ticks += low; \ 25 | time_ = (double)(ticks) * ipm_seconds_per_tick; \ 26 | } 27 | 28 | #else 29 | 30 | 31 | /* 32 | use the macro IPM_TIMESTAMP with a double parameter argument. the 33 | returned value is time in seconds passed since some point of time in 34 | the past 35 | */ 36 | #define IPM_TIMESTAMP( time_ ) \ 37 | { \ 38 | static struct timeval tv; \ 39 | gettimeofday( &tv, NULL ); \ 40 | time_=IPM_TIMEVAL(tv); \ 41 | } 42 | 43 | #endif /* HAVE_RDTSC */ 44 | 45 | 46 | double ipm_timestamp(); 47 | 48 | /* wallclock time, based on gettimeofday() */ 49 | double ipm_wtime(); 50 | 51 | double ipm_utime(); 52 | double ipm_stime(); 53 | 54 | double ipm_mtime(); 55 | double ipm_iotime(); 56 | double ipm_mpiiotime(); 57 | double ipm_omptime(); 58 | double ipm_ompidletime(); 59 | 60 | double ipm_cudatime(); 61 | double ipm_cublastime(); 62 | double ipm_cuffttime(); 63 | 64 | void ipm_time_init(int flags); 65 | 66 | 67 | #endif /* IPM_TIME_H_INCLUDED */ 68 | -------------------------------------------------------------------------------- /include/mod_posixio.h: -------------------------------------------------------------------------------- 1 | #ifndef MOD_POSIXIO_H_INCLUDED 2 | #define MOD_POSIXIO_H_INCLUDED 3 | 4 | #include 5 | #include "ipm_modules.h" 6 | 7 | int mod_posixio_init(ipm_mod_t* mod, int flags); 8 | 9 | typedef struct iodata 10 | { 11 | double iotime; 12 | double iotime_e; 13 | } iodata_t; 14 | 15 | 16 | #define IPM_POSIXIO_BYTES_NONE_C( bytes_ ) \ 17 | bytes_=0; 18 | 19 | #define IPM_POSIXIO_BYTES_COUNT_C( bytes_ ) \ 20 | bytes_=count; 21 | 22 | #define IPM_POSIXIO_BYTES_RETURN_COUNT_C( bytes_ ) \ 23 | (rv!=-1)?(bytes_=rv):(bytes_=0) 24 | 25 | #define IPM_POSIXIO_BYTES_NMEMB_C( bytes_ ) \ 26 | bytes_=nmemb*size; 27 | 28 | #define IPM_POSIXIO_BYTES_RETURN_NMEMB_C( bytes_ ) \ 29 | bytes_=rv*size; 30 | 31 | #define IPM_POSIXIO_BYTES_RETURN_EOF_C( bytes_ ) \ 32 | (rv==EOF)?(bytes_=0):(bytes_=1); 33 | 34 | #define IPM_POSIXIO_BYTES_CHAR_C( bytes_ ) \ 35 | bytes_=sizeof(char); 36 | 37 | /* 38 | This macro is for fgets 39 | char* fgets(char *s, int size, FILE *stream) 40 | fgets() returns s on success, and NULL on error or when end of file 41 | occurs while no characters have been read 42 | */ 43 | #define IPM_POSIXIO_BYTES_RETURN_NULL_STR_C( bytes_ ) \ 44 | (rv==NULL)?(bytes_=0):(bytes_=strlen(rv)); 45 | 46 | 47 | #define IPM_POSIXIO_KEY(key_,call_,rank_,size_,reg_,csite_) \ 48 | KEY_CLEAR(key_); \ 49 | KEY_SET_ACTIVITY(key_,call_); \ 50 | KEY_SET_REGION(key_,reg_); \ 51 | KEY_SET_RANK(key_,rank_); \ 52 | KEY_SET_BYTES(key_,size_); \ 53 | KEY_SET_CALLSITE(key_,csite_); 54 | 55 | #endif /* MOD_POSIXIO_H_INCLUDED */ 56 | -------------------------------------------------------------------------------- /test/test.status_ignore_f/main.f: -------------------------------------------------------------------------------- 1 | program hello 2 | implicit none 3 | integer size, rank, ierr 4 | double precision val(100) 5 | integer req 6 | integer status 7 | 8 | include 'mpif.h' 9 | 10 | 11 | 12 | call mpi_init(ierr) 13 | call mpi_comm_size(MPI_COMM_WORLD, size, ierr) 14 | call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) 15 | 16 | ! MPI_STATUS_IGNORE in mpi_recv 17 | 18 | if (size < 2 ) then 19 | write(*,*) "Need at least 2 procs for this program" 20 | call mpi_abort(1) 21 | end if 22 | 23 | write(6, "(2(a,i3))") " MPI: size = ", size, " rank = ", rank 24 | 25 | 26 | if (rank .eq. 0) then 27 | call mpi_send( val, 100, MPI_DOUBLE_PRECISION, 1, 33, 28 | + MPI_COMM_WORLD, ierr ) 29 | end if 30 | 31 | 32 | if (rank .eq. 1) then 33 | call mpi_recv( val, 100, MPI_DOUBLE_PRECISION, 0, 33, 34 | + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr ) 35 | end if 36 | 37 | ! MPI_STATUS_IGNORE in mpi_wait 38 | 39 | if (rank .eq. 0) then 40 | call mpi_isend( val, 100, MPI_DOUBLE_PRECISION, 1, 33, 41 | + MPI_COMM_WORLD, req, ierr ) 42 | call mpi_wait( req, MPI_STATUS_IGNORE, ierr ) 43 | end if 44 | 45 | 46 | if (rank .eq. 1) then 47 | call mpi_recv( val, 100, MPI_DOUBLE_PRECISION, 0, 33, 48 | + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr ) 49 | end if 50 | 51 | 52 | 53 | 54 | call mpi_finalize(ierr) 55 | end 56 | -------------------------------------------------------------------------------- /src/machtopo.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ipm.h" 8 | #include "ipm_core.h" 9 | #include "ipm_sizes.h" 10 | 11 | #ifdef HAVE_MPI 12 | 13 | void ipm_get_machtopo() 14 | { 15 | unsigned i, j, np; 16 | char *allnames = NULL; 17 | char *unique = NULL; 18 | unsigned nunique; 19 | int x; 20 | 21 | np = task.ntasks; 22 | 23 | if( task.taskid==0 ) { 24 | allnames = IPM_CALLOC( np, MAXSIZE_HOSTNAME ); 25 | unique = IPM_CALLOC( np, MAXSIZE_HOSTNAME ); 26 | 27 | if( !allnames || !unique ) { 28 | IPMERR("Out of memory allocating buffers in ipm_get_machtopo\n"); 29 | return; 30 | } 31 | } 32 | 33 | IPM_GATHER( task.hostname, MAXSIZE_HOSTNAME, MPI_BYTE, 34 | allnames, MAXSIZE_HOSTNAME, MPI_BYTE, 35 | 0, MPI_COMM_WORLD ); 36 | 37 | nunique=0; 38 | if( task.taskid==0 ) 39 | { 40 | for( i=0; i 3 | 4 | #include "ipm.h" 5 | #include "config.h" 6 | #include "hashtable.h" 7 | #include "hashkey.h" 8 | #include "GEN.calltable_mpi.h" 9 | #include "GEN.fproto.mpi.h" 10 | 11 | #ifdef HAVE_KEYHIST 12 | #include "mod_keyhist.h" 13 | #endif 14 | 15 | int MPI_Finalize() 16 | { 17 | int rv; 18 | unsigned idx, idx2; 19 | unsigned csite; 20 | IPM_KEY_TYPE key; 21 | 22 | /* --- monitoring action for MPI_Finalize --- */ 23 | 24 | if(ipm_state==STATE_FINALIZED) { /* don't ipm_finalize twice */ 25 | PMPI_Finalize(); 26 | } 27 | 28 | #ifdef HAVE_CALLPATH 29 | csite=get_callsite_id(); 30 | #else 31 | csite=0; 32 | #endif 33 | 34 | IPM_MPI_KEY(key, MPI_FINALIZE_ID_GLOBAL, 0, 0, 1, csite); 35 | IPM_HASH_HKEY(ipm_htable,key,idx); 36 | 37 | #ifdef HAVE_MPI_TRACE 38 | #ifdef HAVE_KEYHIST 39 | KEYHIST_TRACE(task.tracefile,key); 40 | #else 41 | if( task.tracefile && task.tracestate) { 42 | fprintf(task.tracefile, "%s %d %d %d %d\n", 43 | "MPI_Finalize", 0, 0, 0, csite); 44 | } 45 | #endif 46 | #endif 47 | 48 | 49 | #ifdef HAVE_KEYHIST 50 | IPM_XHASH_HKEY(ipm_xhtable,last_hkey,key,idx2); 51 | ipm_xhtable[idx2].t_tot+=(ipm_wtime()-last_tstamp); 52 | ipm_xhtable[idx2].count++; 53 | KEY_ASSIGN(last_hkey,key); 54 | #endif 55 | 56 | ipm_htable[idx].count++; 57 | ipm_htable[idx].t_min=0.0; 58 | ipm_htable[idx].t_max=0.0; 59 | ipm_htable[idx].t_tot=0.0; 60 | 61 | 62 | #ifdef HAVE_POSIXIO 63 | modules[IPM_MODULE_POSIXIO].state=STATE_NOTACTIVE; 64 | #endif 65 | 66 | #ifndef DELAYED_MPI_FINALIZE 67 | ipm_finalize(0); 68 | rv = PMPI_Finalize(); 69 | return rv; 70 | #endif /* DELAYED_MPI_FINALIZE */ 71 | 72 | return MPI_SUCCESS; 73 | } 74 | 75 | 76 | void MPI_FINALIZE_F(int *ierr) 77 | { 78 | *ierr = MPI_Finalize(); 79 | } 80 | 81 | -------------------------------------------------------------------------------- /include/ipm_sizes.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef SIZES_H_INCLUDED 3 | #define SIZES_H_INCLUDED 4 | 5 | /* other prime number hash table sizes: 6 | 7 | #define MAXSIZE_HASH 4049 8 | #define MAXSIZE_HASH 8093 9 | #define MAXSIZE_HASH 16301 10 | #define MAXSIZE_HASH 32573 11 | #define MAXSIZE_HASH 65437 12 | 13 | */ 14 | 15 | #define MAXSIZE_HASH 65437 16 | 17 | #ifdef HAVE_KEYHIST 18 | #define MAXSIZE_XHASH 32573 19 | #endif 20 | 21 | 22 | #define MAXSIZE_HOSTNAME 16 23 | #define MAXSIZE_USERNAME 16 24 | #define MAXSIZE_ALLOCATIONNAME 16 25 | #define MAXSIZE_JOBID 32 26 | #define MAXSIZE_MACHNAME 32 27 | #define MAXSIZE_MACHINFO 32 28 | #define MAXSIZE_REGLABEL 32 29 | #define MAXSIZE_CMDLINE 4096 30 | #define MAXSIZE_FILENAME 1024 31 | #define MAXNUM_REGIONS 256 32 | #define MAXNUM_REGNESTING 32 33 | 34 | 35 | #define MAXNUM_MODULES 16 36 | 37 | /* module MPI */ 38 | #define MAXNUM_MPI_OPS 16 39 | #define MAXNUM_MPI_TYPES 64 40 | 41 | /* module callpath */ 42 | #define MAXSIZE_CALLSTACKDEPTH 30 43 | #define MAXSIZE_CALLTABLE 1024 44 | #define MAXSIZE_CALLLABEL 64 45 | #define MAXNUM_CALLSITES 8192 46 | 47 | /* module keyhist */ 48 | #define MAXSIZE_CYCLE 128 49 | #define MAXNUM_CYCLES 128 50 | 51 | // Consider updating MAXSIZE_ENVKEY if you change these values... 52 | /* module papi */ 53 | 54 | #define MAXNUM_PAPI_EVENTS 512 55 | #define MAXNUM_PAPI_COUNTERS 512 56 | #define MAXNUM_PAPI_COMPONENTS 1 57 | #define MAXSIZE_PAPI_EVTNAME 128 58 | 59 | /* module omptracepoints */ 60 | #define MAXNUM_THREADS 276 61 | 62 | 63 | #endif /* IPM_SIZES_INCLUDED */ 64 | -------------------------------------------------------------------------------- /test/test.introspect/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../../include/ipm_introspect.h" 8 | 9 | void foo() 10 | { 11 | int i; 12 | MPI_Pcontrol(1, "foo"); 13 | 14 | pia_regid_t rid, rid2; 15 | rid = pia_current_region(); 16 | fprintf(stderr, "this is region:%d\n", rid); 17 | 18 | for(i=0; i<100; i++ ) { 19 | MPI_Barrier(MPI_COMM_WORLD); 20 | } 21 | 22 | sleep(1.0); 23 | 24 | MPI_Pcontrol(-1, "foo"); 25 | } 26 | 27 | void bar() 28 | { 29 | MPI_Pcontrol(1, "bar"); 30 | foo(); 31 | 32 | pia_regid_t rid, rid2; 33 | rid = pia_current_region(); 34 | fprintf(stderr, "this is region:%d\n", rid); 35 | 36 | rid2 = pia_child_region(rid); 37 | fprintf(stderr, "It's child is region:%d\n", rid2); 38 | 39 | MPI_Pcontrol(-1, "bar"); 40 | } 41 | 42 | int main( int argc, char* argv[] ) 43 | { 44 | int myrank, nprocs; 45 | pia_regid_t rid; 46 | pia_regdata_t rdata; 47 | 48 | 49 | MPI_Init( &argc, &argv); 50 | 51 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 52 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 53 | 54 | foo(); 55 | foo(); 56 | 57 | rid = pia_find_region_by_name("foo"); 58 | fprintf(stderr, "region foo is %d\n", rid); 59 | 60 | pia_get_region_data(&rdata, rid); 61 | 62 | fprintf(stderr, "%d execc\n", rdata.count); 63 | fprintf(stderr, "%f wallt\n", rdata.wtime); 64 | fprintf(stderr, "%f mpit\n", rdata.mtime); 65 | fprintf(stderr, "%s\n", rdata.name); 66 | 67 | char *act = "MPI_Barrier"; 68 | 69 | pia_act_t aid = pia_find_activity_by_name(act); 70 | pia_actdata_t adata; 71 | 72 | pia_init_activity_data(&adata); 73 | pia_get_activity_data(&adata, aid); 74 | 75 | fprintf(stderr, "%s happened %d times\n", act, adata.ncalls); 76 | 77 | MPI_Finalize(); 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /test/run-test-suite.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -N 1 3 | #SBATCH -C haswell 4 | #SBATCH -p debug 5 | #SBATCH -t 00:10:00 6 | 7 | # This SLURM job script is intended to be run on Cori Phase-1 at 8 | # NERSC. 9 | 10 | # Change to the IPM test directory, compile the test programs and then 11 | # submit this script from the same directory, i.e. 12 | # > cd IPM/test 13 | # > make 14 | # > sbatch run-test-suite.sh 15 | 16 | # The script runs each of the parallel test programs with 2 MPI ranks 17 | # and generates an IPM XML file with the same name as the test 18 | # program. Success or failure is determined based on the return value 19 | # of each test program. The total number of failures is reported at 20 | # the end of the script. Note that the script does not check IPM 21 | # correctness and is only useful for identifying severe IPM 22 | # problems. Future versions should analyze the data in the IPM XML 23 | # files. 24 | 25 | export OMP_NUM_THREADS=1 26 | export OMP_PLACES=threads 27 | export OMP_PROC_BIND=spread 28 | export IPM_REPORT=full 29 | export IPM_LOG=full 30 | 31 | ptests[0]="allgather" 32 | ptests[1]="allgatherv" 33 | ptests[2]="allreduce" 34 | ptests[3]="alltoall" 35 | ptests[4]="alltoallv" 36 | ptests[5]="bcast" 37 | ptests[6]="fhello" 38 | ptests[7]="fring" 39 | ptests[8]="gather" 40 | ptests[9]="gatherv" 41 | ptests[10]="hello" 42 | ptests[11]="keyhist" 43 | ptests[12]="pcontrol" 44 | ptests[13]="simple_mpi" 45 | ptests[14]="status_ignore" 46 | 47 | fail_count=0 48 | for i in "${ptests[@]}"; do 49 | app="./test.${i}/${i}.ipm" 50 | ipm="./${i}.ipm.xml" 51 | export IPM_OUTFILE="${ipm}" 52 | 53 | echo "About to run ${app}" 54 | srun -n 2 -c 32 --cpu_bind=cores ${app} 55 | # mpiexec -n 2 ${app} # Alternative for non-SLURM jobs 56 | if [ $? -eq 0 ]; then 57 | echo -e "SUCCESS: ${app}\n" 58 | else 59 | echo -e "FAILURE: ${app}\n" 60 | ((fail_count++)) 61 | fi 62 | done 63 | 64 | echo "Total failures: ${fail_count}" 65 | -------------------------------------------------------------------------------- /doc/ipm_todo.txt: -------------------------------------------------------------------------------- 1 | 2 | * Overhead measurement rdtsc() for timing -- Nick 3 | - use gettimeofday() for now 4 | 5 | * Banner information: -- Nick 6 | - load imbalance metrics (Jesus Labarta)? 7 | 8 | * Environment variables for IPM2 -- Karl 9 | TODO: 10 | - Switching modules ON/OFF 11 | - Selecting counters and counter groups: 12 | 13 | * Module further out ideas 14 | - www (webserver in rank 0) 15 | - vapi for IB 16 | - xen/kvm cloud module 17 | - tracing as a module? 18 | - HMM generation 19 | - UDP packet reporting every N minutes 20 | 21 | * Unit tests 22 | - Automated testing framework 23 | 24 | * Default counter groups for PAPI -- Nick 25 | - Auto-detection of CPU at runtime, 26 | choose groups at runtime? 27 | - Should we have IPM_CPU environment variable? 28 | 29 | * MPI-IO module testing bringing up to date 30 | 31 | * Rewrite of ipm_parse -- Andrew 32 | - Use C instead of Perl 33 | - Current parser only for MPI, what about Posix-IO? 34 | - What kind of graphs, and other info for I/O banner? 35 | - Current parser reads IPM_KEYFILE, get rid of this 36 | - Which XML parser library? 37 | - 3 functions: reproduce banner terse/full 38 | produce HTML page 39 | 40 | 41 | * General open issues wrt. Magellan: 42 | - How is "always on" implemented? Modified mpirun? 43 | - when and how is ipm_parse run()? 44 | In ipm_finalize(), by a script? 45 | - Can we phase-in IPM usage smoothly? 46 | Up to 512 ranks at first for example? 47 | - What if PAPI already in use, 48 | or MPI profiling interface already used? 49 | How to detect, what to do? 50 | - Data generation and storage, where, how, 51 | and in what format? 52 | - Webpage generation and hosting 53 | - Output format: Do not include hashtable 54 | (hent) by default 55 | - What happens if codes don't reach MPI_Finalize() 56 | - there's already the atexit() mechanism, does that 57 | always work? 58 | - What happens when some ranks call MPI_Abort() 59 | -------------------------------------------------------------------------------- /bin/ipm_expand.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # expanding clustered XML profiles to "normal" full ones for use with 4 | # the standard parser. 5 | 6 | %tasks=(); 7 | %taskcopies=(); 8 | 9 | $rank=-1; 10 | $nprocs=-1; 11 | 12 | while(<>) { 13 | $line = $_; 14 | 15 | chomp $line; 16 | 17 | if( // ) { 27 | push @{ $tasks{$rank} }, $line; 28 | $rank=-2; 29 | } 30 | 31 | if( /= -1 ) { 36 | push @{ $tasks{$rank} }, $line; 37 | } 38 | } 39 | 40 | 41 | # print header... 42 | foreach $line (@{ $tasks{-1} }) { 43 | print "$line\n"; 44 | } 45 | 46 | 47 | for( $rank=0; $rank<$nprocs; $rank++ ) { 48 | if( defined( $taskcopies{$rank} ) ) { 49 | 50 | $clust = $taskcopies{$rank}; 51 | 52 | foreach $line (@{ $tasks{$clust} }) { 53 | 54 | $pline = $line; 55 | 56 | if( $pline =~ /mpi_rank=\"$clust\"/ ) { 57 | $pline =~ s/mpi_rank=\"$clust\"/mpi_rank=\"$rank\"/g; 58 | } 59 | 60 | if( $pline =~ /orank=\"\+(\d+)\"/ ) { 61 | $orank=$1+$rank; 62 | $pline =~ s/orank=\"\+(\d+)\"/orank=\"$orank\"/; 63 | } 64 | if( $pline =~ /orank=\"\-(\d+)\"/ ) { 65 | $orank=$rank-$1; 66 | $pline =~ s/orank=\"\-(\d+)\"/orank=\"$orank\"/; 67 | } 68 | print "$pline\n"; 69 | } 70 | } else { 71 | foreach $line (@{ $tasks{$rank} }) { 72 | 73 | $pline = $line; 74 | 75 | if( $pline =~ /orank=\"[\+](\d+)\"/ ) { 76 | $orank=$1+$rank; 77 | $pline =~ s/orank=\"[\+](\d+)\"/orank=\"$orank\"/; 78 | } 79 | if( $pline =~ /orank=\"[\-](\d+)\"/ ) { 80 | $orank=$rank-$1; 81 | $pline =~ s/orank=\"[\-](\d+)\"/orank=\"$orank\"/; 82 | } 83 | print "$pline\n"; 84 | } 85 | } 86 | } 87 | 88 | #print footer 89 | print "\n"; 90 | -------------------------------------------------------------------------------- /test/test.cuda/main.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define REPEAT 10000 7 | 8 | extern "C" __global__ void foo(float *a, int N) 9 | { 10 | int i; 11 | 12 | for( i=0; i>> (a_d, N); 56 | 57 | 58 | sleep(10); 59 | 60 | foo <<< n_blocks, block_size >>> (b_d, N); 61 | cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost); 62 | cudaMemcpy(b_h, b_d, sizeof(float)*N, cudaMemcpyDeviceToHost); 63 | 64 | // Print results 65 | // for (int i=0; i 3 | #include 4 | 5 | #include "ipm.h" 6 | #include "hashtable.h" 7 | #include "hashkey.h" 8 | #include "GEN.calltable_mpiio.h" 9 | #include "GEN.fproto.mpiio.h" 10 | #include "mod_mpiio.h" 11 | 12 | int mod_mpiio_xml(ipm_mod_t* mod, void *ptr, struct region *reg); 13 | int mod_mpiio_region(ipm_mod_t* mod, int op, struct region *reg); 14 | 15 | mpiiodata_t mpiiodata[MAXNUM_REGIONS]; 16 | 17 | int mod_mpiio_init(ipm_mod_t* mod, int flags) 18 | { 19 | int i; 20 | 21 | mod->state = STATE_IN_INIT; 22 | mod->init = mod_mpiio_init; 23 | mod->output = 0; 24 | mod->finalize = 0; 25 | mod->xml = mod_mpiio_xml; 26 | mod->regfunc = mod_mpiio_region; 27 | mod->name = "MPIIO"; 28 | mod->ct_offs = MOD_MPIIO_OFFSET; 29 | mod->ct_range = MOD_MPIIO_RANGE; 30 | 31 | for( i=0; istate = STATE_ACTIVE; 39 | return IPM_OK; 40 | } 41 | 42 | int mod_mpiio_xml(ipm_mod_t* mod, void *ptr, struct region *reg) 43 | { 44 | struct region *tmp; 45 | ipm_hent_t stats; 46 | double time; 47 | int res=0; 48 | 49 | if( !reg ) { 50 | time = ipm_mpiiotime(); 51 | } 52 | else { 53 | time = mpiiodata[reg->id].iotime; 54 | 55 | if( (reg->flags)&FLAG_PRINT_EXCLUSIVE ) { 56 | tmp = reg->child; 57 | while(tmp) { 58 | time -= mpiiodata[tmp->id].iotime; 59 | tmp = tmp->next; 60 | } 61 | } 62 | } 63 | 64 | res+=ipm_printf(ptr, 65 | "\n", 66 | mod->name, time); 67 | 68 | return res; 69 | } 70 | 71 | 72 | int mod_mpiio_region(ipm_mod_t *mod, int op, struct region *reg) 73 | { 74 | double time=0.0; 75 | if( !reg ) return 0; 76 | 77 | time = ipm_mpiiotime(); 78 | 79 | switch(op) 80 | { 81 | case -1: /* exit */ 82 | mpiiodata[reg->id].iotime += (time - (mpiiodata[reg->id].iotime_e)); 83 | break; 84 | 85 | case 1: /* enter */ 86 | mpiiodata[reg->id].iotime_e=time; 87 | break; 88 | } 89 | 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /src/mod_cufft.c: -------------------------------------------------------------------------------- 1 | 2 | #include "ipm.h" 3 | #include "ipm_modules.h" 4 | #include "perfdata.h" 5 | #include "mod_cufft.h" 6 | #include "GEN.calltable_cufft.h" 7 | #include "hashtable.h" 8 | 9 | double ipm_cuffttime(); 10 | 11 | int mod_cufft_xml(ipm_mod_t* mod, void *ptr, struct region *reg); 12 | int mod_cufft_region(ipm_mod_t*mod, int op, struct region *reg); 13 | 14 | int mod_cufft_init(ipm_mod_t* mod, int flags) 15 | { 16 | int i; 17 | 18 | mod->state = STATE_IN_INIT; 19 | mod->init = mod_cufft_init; 20 | mod->output = mod_cufft_output; 21 | mod->finalize = 0; 22 | mod->xml = mod_cufft_xml; 23 | mod->regfunc = mod_cufft_region; 24 | mod->name = "CUFFT"; 25 | mod->ct_offs = MOD_CUFFT_OFFSET; 26 | mod->ct_range = MOD_CUFFT_RANGE; 27 | 28 | copy_cufft_calltable(); 29 | 30 | for( i=0; istate = STATE_ACTIVE; 36 | 37 | return IPM_OK; 38 | } 39 | 40 | int mod_cufft_output(ipm_mod_t* mod, int flags) 41 | { 42 | 43 | } 44 | 45 | int mod_cufft_xml(ipm_mod_t* mod, void *ptr, struct region *reg) 46 | { 47 | struct region *tmp; 48 | ipm_hent_t stats; 49 | double time; 50 | int res=0; 51 | 52 | if( !reg ) { 53 | time = ipm_cuffttime(); 54 | } 55 | else { 56 | time = task.cufftdata[reg->id].time; 57 | 58 | if( (reg->flags)&FLAG_PRINT_EXCLUSIVE ) { 59 | tmp = reg->child; 60 | while(tmp) { 61 | time -= task.cufftdata[tmp->id].time; 62 | tmp = tmp->next; 63 | } 64 | } 65 | } 66 | 67 | res+=ipm_printf(ptr, 68 | "\n", 69 | mod->name, time); 70 | 71 | return res; 72 | } 73 | 74 | 75 | int mod_cufft_region(ipm_mod_t *mod, int op, struct region *reg) 76 | { 77 | double time; 78 | if( !reg ) return 0; 79 | 80 | time = ipm_cuffttime(); 81 | 82 | switch(op) 83 | { 84 | case -1: /* exit */ 85 | task.cufftdata[reg->id].time += 86 | (time - (task.cufftdata[reg->id].time_e)); 87 | break; 88 | 89 | case 1: /* enter */ 90 | task.cufftdata[reg->id].time_e=time; 91 | break; 92 | } 93 | return 0; 94 | } 95 | 96 | 97 | -------------------------------------------------------------------------------- /src/perfdata.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ipm.h" 10 | #include "ipm_core.h" 11 | #include "ipm_time.h" 12 | #include "perfdata.h" 13 | #include "jobdata.h" 14 | #include "md5.h" 15 | #include "regstack.h" 16 | 17 | 18 | taskdata_t task; 19 | 20 | void taskdata_init(taskdata_t *t) 21 | { 22 | char *tmp; 23 | 24 | gettimeofday( &(t->t_start), 0); 25 | 26 | t->flags = FLAG_REPORT_TERSE|FLAG_LOG_TERSE; 27 | 28 | t->wtime = ipm_wtime(); 29 | t->stime = ipm_stime(); 30 | t->utime = ipm_utime(); 31 | t->mtime = ipm_mtime(); 32 | t->iotime = ipm_iotime(); 33 | t->omptime = ipm_omptime(); 34 | 35 | t->procmem = 0.0; 36 | 37 | t->num_threads = 1; 38 | 39 | gethostname(t->hostname, MAXSIZE_HOSTNAME); 40 | t->hostname[MAXSIZE_HOSTNAME-1]=0; 41 | 42 | t->pid=getpid(); 43 | 44 | t->taskid=0; 45 | t->ntasks=1; 46 | t->nhosts=1; 47 | 48 | ipm_get_job_id(t->jobid, MAXSIZE_JOBID); 49 | ipm_get_job_user(t->user, MAXSIZE_USERNAME); 50 | ipm_get_job_allocation(t->allocation, MAXSIZE_ALLOCATIONNAME); 51 | 52 | ipm_get_mach_name(t->mach_name, MAXSIZE_MACHNAME); 53 | ipm_get_mach_info(t->mach_info, MAXSIZE_MACHINFO); 54 | 55 | ipm_get_exec_cmdline(t->exec_cmdline, t->exec_realpath); 56 | ipm_get_exec_md5sum(t->exec_md5sum, t->exec_realpath); 57 | 58 | /* 59 | * determine local appname and filename prefix 60 | * later root broadcasts its settings such that 61 | * that they are consisten 62 | */ 63 | 64 | /* need copy, because basename may modify arg */ 65 | tmp = strdup(t->exec_realpath); 66 | sprintf(t->appname, "%s", basename(tmp)); 67 | sprintf(t->fname, "%s.%lu.%lu", t->user, t->t_start.tv_sec, t->t_start.tv_usec); 68 | free(tmp); 69 | 70 | sprintf(t->logdir, "."); 71 | 72 | #if defined(HAVE_POSIXIO_TRACE) || defined(HAVE_MPI_TRACE) 73 | t->tracestate=1; 74 | t->tracefile=0; 75 | #endif 76 | 77 | #if defined(HAVE_SNAP) 78 | t-> snap_period = 0; 79 | t-> snap_last = 0; 80 | mkdir("/tmp/ipm",S_IWOTH|S_IROTH|S_IWGRP|S_IRGRP|S_IWUSR|S_IRUSR|S_IXUSR|S_IXGRP|S_IXOTH); 81 | #endif 82 | 83 | t->rstack = ipm_rstack; 84 | } 85 | 86 | 87 | -------------------------------------------------------------------------------- /src/mod_cublas.c: -------------------------------------------------------------------------------- 1 | 2 | #include "ipm.h" 3 | #include "ipm_modules.h" 4 | #include "perfdata.h" 5 | #include "mod_cublas.h" 6 | #include "GEN.calltable_cublas.h" 7 | #include "hashtable.h" 8 | 9 | double ipm_cublastime(); 10 | 11 | int mod_cublas_xml(ipm_mod_t* mod, void *ptr, struct region *reg); 12 | int mod_cublas_region(ipm_mod_t*mod, int op, struct region *reg); 13 | 14 | int mod_cublas_init(ipm_mod_t* mod, int flags) 15 | { 16 | int i; 17 | 18 | mod->state = STATE_IN_INIT; 19 | mod->init = mod_cublas_init; 20 | mod->output = mod_cublas_output; 21 | mod->finalize = 0; 22 | mod->xml = mod_cublas_xml; 23 | mod->regfunc = mod_cublas_region; 24 | mod->name = "CUBLAS"; 25 | mod->ct_offs = MOD_CUBLAS_OFFSET; 26 | mod->ct_range = MOD_CUBLAS_RANGE; 27 | 28 | copy_cublas_calltable(); 29 | 30 | for( i=0; istate = STATE_ACTIVE; 36 | 37 | return IPM_OK; 38 | } 39 | 40 | int mod_cublas_output(ipm_mod_t* mod, int flags) 41 | { 42 | 43 | } 44 | 45 | int mod_cublas_xml(ipm_mod_t* mod, void *ptr, struct region *reg) 46 | { 47 | struct region *tmp; 48 | ipm_hent_t stats; 49 | double time; 50 | int res=0; 51 | 52 | if( !reg ) { 53 | time = ipm_cublastime(); 54 | } 55 | else { 56 | time = task.cublasdata[reg->id].time; 57 | 58 | if( (reg->flags)&FLAG_PRINT_EXCLUSIVE ) { 59 | tmp = reg->child; 60 | while(tmp) { 61 | time -= task.cublasdata[tmp->id].time; 62 | tmp = tmp->next; 63 | } 64 | } 65 | } 66 | 67 | res+=ipm_printf(ptr, 68 | "\n", 69 | mod->name, time); 70 | 71 | return res; 72 | } 73 | 74 | 75 | int mod_cublas_region(ipm_mod_t *mod, int op, struct region *reg) 76 | { 77 | double time; 78 | if( !reg ) return 0; 79 | 80 | time = ipm_cublastime(); 81 | 82 | switch(op) 83 | { 84 | case -1: /* exit */ 85 | task.cublasdata[reg->id].time += 86 | (time - (task.cublasdata[reg->id].time_e)); 87 | break; 88 | 89 | case 1: /* enter */ 90 | task.cublasdata[reg->id].time_e=time; 91 | break; 92 | } 93 | return 0; 94 | } 95 | 96 | 97 | -------------------------------------------------------------------------------- /utils/cubew/lib/cartesian.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file cartesian.h 15 | \brief Declares a types and functions to deal with cartesian topology. 16 | */ 17 | #ifndef CUBEW_CARTESIAN_H 18 | #define CUBEW_CARTESIAN_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | #include "machine.h" 26 | #include "node.h" 27 | #include "process.h" 28 | #include "thread.h" 29 | 30 | typedef struct map_thrd cube_map_thrd; ///< Declares a synonim for the general maping "map_thrd" as "cube_map_thrd" 31 | 32 | /** 33 | * Defines cartesian topology of the threads. 34 | */ 35 | 36 | typedef struct cube_cartesian { 37 | unsigned ndims; ///< Number of dimensions 38 | long int* dim; ///< Array with dimensions. 39 | int* period; ///< Periond in ID in every dimension 40 | cube_map_thrd* thrd2coord; /// Thread -> (Coordinates) Mapping. 41 | } cube_cartesian; 42 | 43 | cube_cartesian* cube_cart_create(cube_cartesian* cart); 44 | int cube_cart_init(cube_cartesian* cart, long int ndims, long int* dim, int* period); 45 | void cube_cart_free(cube_cartesian* cart); 46 | void cube_cart_def_coords(cube_cartesian* cart, cube_thread* thrd, long int* coord); 47 | void cube_cart_writeXML(cube_cartesian* cart, FILE* fp); 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /utils/cubew/lib/Makefile: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # General Settings 3 | #------------------------------------------------------------------------------ 4 | 5 | TOPDIR = ../.. 6 | 7 | include $(TOPDIR)/Makefile.defs 8 | include $(TOPDIR)/mf/common.defs 9 | 10 | .SUFFIXES: .c .o 11 | .PHONY: all install clean 12 | 13 | 14 | # Object files 15 | OBJS = \ 16 | cartesian.o \ 17 | cnode.o \ 18 | cube.o \ 19 | machine.o \ 20 | metric.o \ 21 | node.o \ 22 | process.o \ 23 | region.o \ 24 | thread.o 25 | 26 | # Installed header files 27 | USER_HDRS = \ 28 | cartesian.h \ 29 | cnode.h \ 30 | cube.h \ 31 | machine.h \ 32 | metric.h \ 33 | node.h \ 34 | process.h \ 35 | region.h \ 36 | thread.h 37 | 38 | 39 | #------------------------------------------------------------------------------ 40 | # Rules 41 | #------------------------------------------------------------------------------ 42 | 43 | all: libcubew3.a 44 | 45 | install: all 46 | $(MKDIR) -m 755 $(LIBDIR) 47 | $(INSTALL) -c -m 644 libcubew3.a $(LIBDIR) 48 | $(MKDIR) -m 755 $(INCDIR)/cubew3 49 | @list='$(USER_HDRS)'; for p in $$list; do \ 50 | echo "$(INSTALL) -c -m 644 $$p $(INCDIR)/cubew3" ; \ 51 | eval "$(INSTALL) -c -m 644 $$p $(INCDIR)/cubew3" ; \ 52 | done 53 | 54 | clean: 55 | -rm -f libcubew3.a $(OBJS) 56 | 57 | 58 | .c.o: 59 | $(CCC) $(CCFLAGS) -c $< 60 | 61 | libcubew3.a: $(OBJS) 62 | $(AR) $(ARFLAGS) $@ $? 63 | @echo "*** $@ built" 64 | 65 | 66 | #------------------------------------------------------------------------------ 67 | # Dependencies 68 | # 69 | # Generated by: 70 | # gcc -MM *.c 71 | #------------------------------------------------------------------------------ 72 | 73 | cartesian.o: cartesian.c cartesian.h machine.h node.h process.h thread.h \ 74 | vector.h 75 | cnode.o: cnode.c cnode.h region.h vector.h 76 | cube.o: cube.c cube.h cartesian.h machine.h node.h process.h thread.h \ 77 | cnode.h metric.h region.h vector.h 78 | machine.o: machine.c machine.h node.h process.h thread.h vector.h 79 | metric.o: metric.c metric.h vector.h 80 | node.o: node.c node.h process.h thread.h machine.h vector.h 81 | process.o: process.c process.h thread.h node.h vector.h 82 | region.o: region.c region.h cnode.h vector.h 83 | thread.o: thread.c thread.h process.h vector.h 84 | -------------------------------------------------------------------------------- /etc/ipm_key_cufft: -------------------------------------------------------------------------------- 1 | # 2 | # IPM key file for CUFFT 3 | # 4 | ##module CUFFT 5 | # 6 | # BYTES_NONE 7 | # BYTES_NX 8 | # BYTES_NXNY 9 | # BYTES_NXNYNZ 10 | # 11 | 1|CUFFT_PLAN1D_ID|cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type, int batch)||CS_LOCAL,BYTES_NX,RANK_NONE,DATA_NONE,COMM_NONE 12 | 2|CUFFT_PLAN2D_ID|cufftResult CUFFTAPI cufftPlan2d(cufftHandle *plan, int nx, int ny, cufftType type)||CS_LOCAL,BYTES_NXNY,RANK_NONE,DATA_NONE,COMM_NONE 13 | 3|CUFFT_PLAN3D_ID|cufftResult CUFFTAPI cufftPlan3d(cufftHandle *plan, int nx, int ny, int nz, cufftType type)||CS_LOCAL,BYTES_NXNYNZ,RANK_NONE,DATA_NONE,COMM_NONE 14 | 4|CUFFT_PLANMANY_ID|cufftResult CUFFTAPI cufftPlanMany(cufftHandle *plan, int rank, int *n, int *inembed, int istride, int idist, int *onembed, int ostride, int odist, cufftType type, int batch)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 15 | 5|CUFFT_DESTROY|cufftResult CUFFTAPI cufftDestroy(cufftHandle plan)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 16 | 6|CUFFT_EXEC_C2C|cufftResult CUFFTAPI cufftExecC2C(cufftHandle plan, cufftComplex *idata, cufftComplex *odata, int direction)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 17 | 7|CUFFT_EXEC_R2C|cufftResult CUFFTAPI cufftExecR2C(cufftHandle plan, cufftReal *idata, cufftComplex *odata)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 18 | 8|CUFFT_EXEC_C2R|cufftResult CUFFTAPI cufftExecC2R(cufftHandle plan, cufftComplex *idata, cufftReal *odata)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 19 | 9|CUFFT_EXEC_Z2Z|cufftResult CUFFTAPI cufftExecZ2Z(cufftHandle plan, cufftDoubleComplex *idata, cufftDoubleComplex *odata, int direction)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 20 | 10|CUFFT_EXEC_D2Z|cufftResult CUFFTAPI cufftExecD2Z(cufftHandle plan, cufftDoubleReal *idata, cufftDoubleComplex *odata)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 21 | 11|CUFFT_EXEC_Z2D|cufftResult CUFFTAPI cufftExecZ2D(cufftHandle plan, cufftDoubleComplex *idata, cufftDoubleReal *odata)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 22 | 12|CUFFT_SETSTREAM|cufftResult CUFFTAPI cufftSetStream(cufftHandle plan, cudaStream_t stream)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE 23 | 13|CUFFT_SETCOMPATIBILITYMODE|cufftResult CUFFTAPI cufftSetCompatibilityMode(cufftHandle plan, cufftCompatibility mode)||CS_LOCAL,BYTES_NONE,RANK_NONE,DATA_NONE,COMM_NONE -------------------------------------------------------------------------------- /src/mpi_pcontrol.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "perfdata.h" 10 | #include "regstack.h" 11 | #include "ipm_sizes.h" 12 | #include "GEN.fproto.mpi.h" 13 | 14 | #define CHECK_REGION(reg_, ptr_) \ 15 | { \ 16 | int len=0; \ 17 | reg_=(char*)(ptr_); \ 18 | len = strlen(reg_); \ 19 | if( len==0 || len>MAXSIZE_REGLABEL ) { \ 20 | reg_=0; \ 21 | } \ 22 | } 23 | 24 | 25 | int ipm_control(const int ctl, char *cmd, void *data) 26 | { 27 | char *reg = NULL; 28 | 29 | switch( ctl ) 30 | { 31 | case 1: /* enter region, IPM1 compatible */ 32 | CHECK_REGION(reg, cmd); 33 | if(reg) 34 | ipm_region(1, reg); 35 | break; 36 | 37 | case -1: /* exit region, IPM1 compatible */ 38 | if(reg) 39 | CHECK_REGION(reg, cmd); 40 | ipm_region(-1, reg); 41 | break; 42 | 43 | /* general case */ 44 | case 0: 45 | default: 46 | if( !strncmp(cmd, "enter", 5) ) { 47 | CHECK_REGION(reg, data); 48 | if(reg) 49 | ipm_region(1, reg); 50 | } 51 | 52 | if( !strncmp(cmd, "exit", 4) ) { 53 | CHECK_REGION(reg, data); 54 | if(reg) 55 | ipm_region(-1, reg); 56 | } 57 | 58 | #ifdef HAVE_POSIXIO_TRACE 59 | if( !strncmp(cmd, "traceoff", 8) ) 60 | task.tracestate = 0; 61 | if( !strncmp(cmd, "traceon", 7) ) 62 | task.tracestate = 1; 63 | #endif 64 | } 65 | 66 | return 0; 67 | } 68 | 69 | 70 | int MPI_Pcontrol(const int ctl,...) 71 | { 72 | int res; 73 | va_list ap; 74 | char *cmd; 75 | void *data; 76 | 77 | /* ignore MPI_Pcontrol calls if not initialized */ 78 | if( ipm_state==STATE_NOTINIT ) 79 | return 0; 80 | 81 | va_start(ap, ctl); 82 | cmd = va_arg(ap, char *); 83 | data = va_arg(ap, void *); 84 | va_end(ap); 85 | 86 | res = ipm_control(ctl, cmd, data); 87 | return res; 88 | } 89 | 90 | 91 | void MPI_PCONTROL_F(const int *ctl, char *cmd, char *data) 92 | { 93 | int myctl; 94 | 95 | /* ignore MPI_Pcontrol calls if not initialized */ 96 | if( ipm_state==STATE_NOTINIT ) 97 | return; 98 | 99 | myctl = (ctl?(*ctl):0); 100 | ipm_control(myctl, cmd, data); 101 | 102 | return; 103 | } 104 | -------------------------------------------------------------------------------- /include/calltable.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CALLTABLE_H_INCLUDED 3 | #define CALLTABLE_H_INCLUDED 4 | 5 | #include "ipm_sizes.h" 6 | 7 | typedef struct 8 | { 9 | char *name; 10 | unsigned long long attr; 11 | } ipm_call_t; 12 | 13 | extern ipm_call_t ipm_calltable[MAXSIZE_CALLTABLE]; 14 | 15 | 16 | #define RANK_ALL (0x1ULL<<0) 17 | #define RANK_DEST (0x1ULL<<1) 18 | #define RANK_NONE (0x1ULL<<2) 19 | #define RANK_ROOT (0x1ULL<<3) 20 | #define RANK_SRC (0x1ULL<<4) 21 | #define RANK_STATUS (0x1ULL<<5) 22 | 23 | #define DATA_NONE (0x1ULL<<6) 24 | #define DATA_COLLECTIVE (0x1ULL<<7) 25 | #define DATA_RX (0x1ULL<<8) 26 | #define DATA_TX (0x1ULL<<9) 27 | #define DATA_TXRX (0x1ULL<<10) 28 | 29 | #define BYTES_NONE (0x1ULL<<11) 30 | #define BYTES_NMEMB (0x1ULL<<12) 31 | #define BYTES_COUNT (0x1ULL<<13) 32 | #define BYTES_CHAR (0x1ULL<<14) 33 | #define BYTES_RETURN_NMEMB (0x1ULL<<15) 34 | #define BYTES_RETURN_COUNT (0x1ULL<<16) 35 | #define BYTES_RETURN_EOF (0x1ULL<<17) 36 | 37 | #define BYTES_SCOUNT (0x1ULL<<18) 38 | #define BYTES_STATUS (0x1ULL<<19) 39 | #define BYTES_RCOUNT (0x1ULL<<20) 40 | #define BYTES_STATUSI (0x1ULL<<21) 41 | #define BYTES_STATUSES (0x1ULL<<22) 42 | #define BYTES_RCOUNTI (0x1ULL<<23) 43 | #define BYTES_SCOUNTI (0x1ULL<<24) 44 | #define BYTES_SCOUNTS (0x1ULL<<25) 45 | 46 | #define BYTES_COUNT_DATATYPE (0x1ULL<<26) 47 | #define BYTES_EXTENT (0x1ULL<<27) 48 | #define BYTES_SIZE (0x1ULL<<28) 49 | #define BYTES_WIDTH_HEIGHT (0x1ULL<<29) 50 | 51 | #define BYTES_NX (0x1ULL<<30) 52 | #define BYTES_NXNY (0x1ULL<<31) 53 | #define BYTES_NXNYNZ (0x1ULL<<32) 54 | 55 | #define BYTES_MNK (0x1ULL<<33) 56 | #define BYTES_NELEMSIZE (0x1ULL<<34) 57 | // 58 | // added to support the allgather which needs to use rtype vs stype when there is MPI_IN_PLACE 59 | // 60 | #define BYTES_SCOUNT_GA (0x1ULL<<35) 61 | #define BYTES_SCOUNT_RE (0x1ULL<<36) 62 | #define BYTES_SCOUNT_ALL (0x1ULL<<37) 63 | #define BYTES_SCOUNT_ALLV (0x1ULL<<38) 64 | #define BYTES_RCOUNT_SC (0x1ULL<<38) 65 | 66 | #define BYTES_RETURN_NULL_STR (0x1ULL<<39) 67 | 68 | #endif /* CALLTABLE_H_INCLUDED */ 69 | 70 | -------------------------------------------------------------------------------- /include/ipm_core.h: -------------------------------------------------------------------------------- 1 | #ifndef IPM_CORE_H_INCLUDED 2 | #define IPM_CORE_H_INCLUDED 3 | 4 | #include "ipm_types.h" 5 | #include "ipm_time.h" 6 | #include "mod_selfmonitor.h" 7 | 8 | #define STATE_NOTINIT 0 9 | #define STATE_IN_INIT 1 10 | #define STATE_ACTIVE 2 11 | #define STATE_NOTACTIVE 3 12 | #define STATE_IN_FINALIZE 4 13 | #define STATE_FINALIZED 5 14 | #define STATE_ERROR 99 15 | 16 | #ifdef HAVE_MPI 17 | #define IPM_NOT_IN_FORTRAN_PMPI 0 18 | #define IPM_IN_FORTRAN_PMPI 1 19 | #endif 20 | 21 | /* 22 | * IPM flags 23 | */ 24 | #define FLAG_DEBUG (0x0000000000000001ULL << 0) 25 | #define FLAG_REPORT_NONE (0x0000000000000001ULL << 1) 26 | #define FLAG_REPORT_TERSE (0x0000000000000001ULL << 2) 27 | #define FLAG_REPORT_FULL (0x0000000000000001ULL << 3) 28 | #define FLAG_LOG_NONE (0x0000000000000001ULL << 4) 29 | #define FLAG_LOG_TERSE (0x0000000000000001ULL << 5) 30 | #define FLAG_LOG_FULL (0x0000000000000001ULL << 6) 31 | #define FLAG_OUTFILE (0x0000000000000001ULL << 7) 32 | #define FLAG_LOGWRITER_POSIXIO (0x0000000000000001ULL << 8) 33 | #define FLAG_LOGWRITER_MPIIO (0x0000000000000001ULL << 9) 34 | 35 | /* is atexit() handler installed ? */ 36 | #define FLAG_USING_ATEXIT (0x0000000000000001ULL << 10) 37 | #define FLAG_HPCNAME (0x0000000000000001ULL << 11) 38 | 39 | /* report nested regions? */ 40 | #define FLAG_NESTED_REGIONS (0x0000000000000001ULL << 12) 41 | #define FLAG_PMON (0x0000000000000001ULL << 13) 42 | 43 | 44 | /* clear all REPORT bits */ 45 | #define FLAG_CLEAR_REPORT(flags_) \ 46 | flags_ &= ~FLAG_REPORT_NONE; \ 47 | flags_ &= ~FLAG_REPORT_TERSE; \ 48 | flags_ &= ~FLAG_REPORT_FULL; 49 | 50 | 51 | /* clear all LOG bits */ 52 | #define FLAG_CLEAR_LOG(flags_) \ 53 | flags_ &= ~FLAG_LOG_NONE; \ 54 | flags_ &= ~FLAG_LOG_TERSE; \ 55 | flags_ &= ~FLAG_LOG_FULL; 56 | 57 | /* clear all LOG bits */ 58 | #define FLAG_CLEAR_LOGWRITER(flags_) \ 59 | flags_ &= ~FLAG_LOGWRITER_MPIIO; \ 60 | flags_ &= ~FLAG_LOGWRITER_POSIXIO 61 | 62 | 63 | extern int ipm_state; 64 | 65 | #ifdef HAVE_MPI 66 | extern int ipm_in_fortran_pmpi; 67 | #endif 68 | 69 | int ipm_init(int flags); 70 | int ipm_finalize(int flags); 71 | 72 | #endif /* IPM_CORE_H_INCLUDED */ 73 | 74 | -------------------------------------------------------------------------------- /doc/initialization.txt: -------------------------------------------------------------------------------- 1 | 2 | s/* 3 | * init and shutdown procedures: 4 | */ 5 | 6 | // ipm return/error values, use signed integers 7 | #define IPM_OK 0 8 | #define IPM_ENOMEM 1 /* insufficient memory */ 9 | #define IPM_EINVAL 2 /* invalid argument(s) */ 10 | #define IPM_ESYS 3 /* system call failed */ 11 | #define IPM_EOTHER 12 | 13 | // ipm_state: 14 | #define STATE_NOTINIT 0 15 | #define STATE_IN_INIT 1 16 | #define STATE_ACTIVE 2 17 | #define STATE_NOTACTIVE 3 18 | #define STATE_IN_FINALIZE 4 19 | #define STATE_FINALIZED 5 20 | #define STATE_ERROR 99 21 | 22 | // ipm_state starts out as STATE_NOTINIT 23 | // ACTIVE and NOTACTIVE means state is ok and monitoring is on/off 24 | 25 | 26 | // no monitoring, data structures not initialized, before ipm_init() 27 | // no monitoring, data structures deallocated, after ipm_finalize() 28 | // flags: TBD, could be errors_are_fatal, etc. 29 | int ipm_init(int flags); 30 | int ipm_finalize(int flags); 31 | 32 | 33 | ipm_init(): 34 | * check if state is STATE_NOTINIT, if not, return error 35 | * set ipm_state to STATE_ININIT 36 | * initialize local data structures 37 | * initialize modules (mod_xxx_init()) 38 | * if all OK set ipm_state to IPM_ACTIVE 39 | - enter default IPM region for program global stats 40 | - init task.wtime, task.utime, task.stime, task.ctime 41 | * else set state to STATE_ERROR 42 | 43 | 44 | ipm_finalize(): 45 | * check if state is STATE_ACTIVE or STATE_NOTACTIVE 46 | - if yes, continue, if no return error 47 | * set state to STATE_IN_FINALIZE - this disables all monitoring 48 | * update task.wtime, task.utime, task.stime, task.ctime 49 | * exit default IPM region for program global stats 50 | * print banner (call ipm_banner()) 51 | * call mod_xxx_output() for all modules 52 | * call mod_xxx_finalize() for all modules, 53 | modules can also do output in finalize() 54 | * set state to STATE_FINALIZED 55 | 56 | 57 | What calls ipm_init(): 58 | ====================== 59 | * MPI_Init() and MPI_Init_thread() 60 | * Each wrapped call in posixio checks if state==STATE_NOTINIT 61 | and if yes, calls ipm_init() 62 | * User-added initialization? 63 | 64 | 65 | What calls ipm_finalize(): 66 | ========================== 67 | * ipm_atexit_handler() if installed (DELAYED_MPI_FINALIZE) 68 | * MPI_Finalize() if not DELAYED_MPI_FINALIZE 69 | 70 | 71 | Dependencies between modules: 72 | ============================= 73 | -------------------------------------------------------------------------------- /etc/wrap_mpi_f.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #include 5 | 6 | #include "ipm.h" 7 | #include "ipm_core.h" 8 | 9 | #ifndef MPI3CONST 10 | #if MPI_VERSION >= 3 11 | #define MPI3CONST const 12 | #else 13 | #define MPI3CONST 14 | #endif 15 | #endif 16 | 17 | 18 | /** HEADER_END **/ 19 | 20 | 21 | /* ---- wrapping __FFNAME__ ---- */ 22 | /* 23 | * strings in the form __IDENTIFIER__ are replaced 24 | * by the wrapper script 25 | * 26 | * CFNAME __CFNAME__ 27 | * FFNAME __FFNAME__ 28 | * CPARAMS __CPARAMS__ 29 | * FPARAMS __FPARAMS__ 30 | * F2CARGS __F2CARGS__ 31 | * FRET __FRET__ 32 | */ 33 | 34 | extern void IPM___CFNAME__(__CPARAMS__, double tstart, double tstop); 35 | 36 | extern void p__FFNAME__(__FPARAMS__); 37 | 38 | __FRET__ __FFNAME__(__FPARAMS__) 39 | { 40 | double tstart, tstop; 41 | 42 | #if HAVE_CREQ /* HAVE _CREQ */ 43 | MPI_Request creq; 44 | #endif 45 | #if HAVE_CSTAT /* HAVE _CSTAT */ 46 | MPI_Status cstat; 47 | #endif 48 | #if HAVE_CCOMM_OUT 49 | MPI_Comm ccomm_out; 50 | #endif /* HAVE _CCOMM_OUT */ 51 | #if HAVE_CCOMM_INOUT 52 | MPI_Comm ccomm_inout; 53 | #endif /* HAVE _CCOMM_INOUT */ 54 | 55 | #if HAVE_CCOMM_INOUT 56 | ccomm_inout = MPI_Comm_f2c(*comm_inout); 57 | #endif 58 | 59 | #if HAVE_CGROUP_OUT /* HAVE _CGROUP_OUT */ 60 | MPI_Group cgroup_out; 61 | #endif 62 | 63 | ipm_in_fortran_pmpi = IPM_IN_FORTRAN_PMPI; 64 | 65 | IPM_TIMESTAMP(tstart); 66 | p__FFNAME__(__FARGS__); 67 | IPM_TIMESTAMP(tstop); 68 | 69 | ipm_in_fortran_pmpi = IPM_NOT_IN_FORTRAN_PMPI; 70 | 71 | if( ipm_state!=STATE_ACTIVE ) { 72 | return; 73 | } 74 | 75 | #if HAVE_CSTAT /* HAVE_CSTAT */ 76 | if ( *info==MPI_SUCCESS ) 77 | MPI_Status_f2c(status, &cstat); 78 | #endif 79 | 80 | #if HAVE_CREQ /* HAVE_CREQ */ 81 | if( *info==MPI_SUCCESS ) 82 | creq=MPI_Request_f2c(*req); 83 | #endif 84 | 85 | #if HAVE_CCOMM_OUT /* HAVE _CCOMM_OUT */ 86 | if( *info==MPI_SUCCESS ) 87 | ccomm_out=MPI_Comm_f2c(*comm_out); 88 | #endif 89 | 90 | #if HAVE_CCOMM_INOUT /* HAVE _CCOMM_INOUT */ 91 | if( *info==MPI_SUCCESS ) 92 | ccomm_inout=MPI_Comm_f2c(*comm_inout); 93 | #endif 94 | 95 | #if HAVE_CGROUP_OUT /* HAVE _CGROUP_OUT */ 96 | if( *info==MPI_SUCCESS ) 97 | cgroup_out=MPI_Group_f2c(*group_out); 98 | #endif 99 | IPM___CFNAME__(__F2CARGS__, tstart, tstop); 100 | 101 | } 102 | 103 | 104 | -------------------------------------------------------------------------------- /utils/cubew/lib/machine.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file machine.h 15 | * \brief Declares types and functions to deal with running machine as whole object. 16 | */ 17 | #ifndef CUBEW_MACHINE_H 18 | #define CUBEW_MACHINE_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | #include "node.h" 26 | 27 | struct cube_node; 28 | /** 29 | * A dynamical array containing information about machine and its children. 30 | */ 31 | typedef struct cube_machine { 32 | cube_narray* child; 33 | char* name; 34 | char* desc; 35 | int id; 36 | } cube_machine; 37 | 38 | cube_machine* cube_machine_create(cube_machine* mach); 39 | void cube_machine_init(cube_machine* mach, char* name, char* desc); 40 | void cube_machine_construct_child(cube_machine* mach); 41 | void cube_machine_reserve_nodes(cube_machine* mach, unsigned num); 42 | void cube_machine_free(cube_machine* mach); 43 | 44 | struct cube_node* cube_machine_get_child(cube_machine* mach, int i); 45 | char* cube_machine_get_name(cube_machine* mach); 46 | char* cube_machine_get_desc(cube_machine* mach); 47 | int cube_machine_num_children(cube_machine* mach); 48 | int cube_machine_get_level(cube_machine* mach); 49 | void cube_machine_writeXML(cube_machine* mach, FILE* fp); 50 | void cube_machine_set_id(cube_machine* mach, int new_id); 51 | int cube_machine_get_id(cube_machine* mach); 52 | int cube_machine_equal(cube_machine* a, cube_machine* b); 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /include/ipm_introspect.h: -------------------------------------------------------------------------------- 1 | #ifndef IPM_INTROSPECT_H_INCLUDED 2 | #define IPM_INTROSPECT_H_INCLUDED 3 | 4 | /* 5 | * PIA == Performance Introspection API 6 | * 7 | */ 8 | 9 | /* for compile-time checks */ 10 | #define IPM_HAVE_PIA 1 11 | 12 | 13 | #define PIA_MAXLEN_LABEL 64 14 | 15 | /* return values for functions */ 16 | typedef int pia_ret_t; 17 | 18 | #define PIA_OK 0 19 | #define PIA_NOTFOUND -1 20 | 21 | 22 | /* 23 | * pia_regid_t is an integer identifier for regions 24 | * 25 | * == 0 represents the whole application 26 | * < 0 invalid, does not exist, error condition 27 | * > 0 a valid user-defined region in IPM 28 | */ 29 | typedef int pia_regid_t; 30 | 31 | 32 | typedef struct 33 | { 34 | pia_regid_t id; 35 | char name[PIA_MAXLEN_LABEL]; 36 | unsigned count; /* executed how many times? */ 37 | double wtime; /* wallclock time */ 38 | double mtime; /* time in mpi */ 39 | } pia_regdata_t; 40 | 41 | 42 | /* 43 | * navigate the region hierarchy: 44 | * 45 | * - pia_current_region() returns the id of the region at 46 | * the point of invocation 47 | * - pia_child_region() returns the id of the *first* 48 | * sub (child) region 49 | * - pia_parent_region() returns the id of the 50 | * parent of the current region 51 | * - pia_next_region() returns the next region on the 52 | * same level of the hierarchy 53 | * 54 | * negative return values indicate that the requested region does not 55 | * exist 56 | */ 57 | 58 | pia_regid_t pia_current_region(void); 59 | pia_regid_t pia_child_region(pia_regid_t reg); 60 | pia_regid_t pia_parent_region(pia_regid_t reg); 61 | pia_regid_t pia_next_region(pia_regid_t reg); 62 | 63 | pia_regid_t pia_find_region_by_name(char *name); 64 | 65 | pia_ret_t pia_get_region_data(pia_regdata_t *rdata, pia_regid_t reg); 66 | 67 | /* 68 | * pia_act_t is an integer identifier for IPM activities 69 | * an activity is like an MPI or Posix-IO call or time spent 70 | * inside an OpenMP region 71 | * 72 | * => 0 represents a valid activity 73 | * < 0 represents error, not available, ... 74 | */ 75 | typedef int pia_act_t; 76 | 77 | /* #define PIA_ACT_ALL_MPI 0xFFFF */ 78 | 79 | pia_act_t pia_find_activity_by_name(char *name); 80 | 81 | 82 | typedef struct 83 | { 84 | int ncalls; 85 | double tmin, tmax, tsum; 86 | } pia_actdata_t; 87 | 88 | pia_ret_t pia_init_activity_data(pia_actdata_t *adata); 89 | pia_ret_t pia_get_activity_data(pia_actdata_t *adata, pia_act_t act); 90 | 91 | 92 | #endif /* IPM_INTROSPECT_H_INCLUDED */ 93 | -------------------------------------------------------------------------------- /src/mod_callpath_evtgraph.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | 5 | #include "mod_callpath.h" 6 | 7 | int thisblob; 8 | 9 | 10 | void* cgfunc_markblob(callsite_t *site, int level, int flags, void *ptr) 11 | { 12 | int i; 13 | unsigned csite; 14 | graph_t *eg; 15 | node_t *egnode; 16 | 17 | eg = (graph_t*)ptr; 18 | 19 | if( flags==VISIT_BACKTRACK || !(IS_NODE(site)) ) 20 | return ptr; 21 | 22 | /* iterate over all nodes in the graph, 23 | check if their callsite id equals site->id 24 | if yes, set the node's tag to thisblob */ 25 | for( i=0; innodes; i++ ) { 26 | egnode = &(eg->nodes[i]); 27 | csite = KEY_GET_CALLSITE(egnode->key); 28 | 29 | if( csite==(site->id) ) { 30 | egnode->tag=thisblob; 31 | } 32 | } 33 | 34 | return ptr; 35 | } 36 | 37 | 38 | void* cgfunc_evtgraph(callsite_t *site, int level, int flags, void *ptr) 39 | { 40 | int i; char buf[200]; 41 | callsite_t *cgnode; 42 | unsigned csite; 43 | node_t *egnode; 44 | graph_t *eg; 45 | int fmt; 46 | FILE *f; 47 | 48 | if( flags==VISIT_BACKTRACK || !(IS_NODE(site)) || IS_LEAF(site) ) 49 | return ptr; 50 | 51 | eg = (graph_t*)ptr; 52 | 53 | /* reset tags for whole graph */ 54 | for( i=0; innodes; i++ ) { 55 | egnode = &(eg->nodes[i]); 56 | egnode->tag=-1; 57 | 58 | /* 59 | csite = KEY_GET_CALLSITE(egnode->key); 60 | if( csite==(site->id) ) 61 | egnode->tag=-1; 62 | */ 63 | } 64 | 65 | cgnode=site->child; 66 | while(cgnode) { 67 | thisblob=cgnode->id; 68 | 69 | if( IS_LEAF(cgnode) ) { 70 | for( i=0; innodes; i++ ) { 71 | egnode = &(eg->nodes[i]); 72 | csite = KEY_GET_CALLSITE(egnode->key); 73 | if( csite==(cgnode->id) ) 74 | egnode->tag=0; 75 | } 76 | } 77 | else { 78 | callgraph_traverse(cgnode, cgnode->parent, cgfunc_markblob, eg); 79 | } 80 | 81 | cgnode=cgnode->next; 82 | } 83 | 84 | fmt = 85 | NODEFMT_COUNT | 86 | NODEFMT_CALL | 87 | NODEFMT_CALLSITE | 88 | NODEFMT_REGION | 89 | NODEFMT_RANK | 90 | NODEFMT_BYTES; 91 | 92 | sprintf(buf, "evtgraph/evtgraph.%d.txt", site->id); 93 | f=fopen(buf, "w"); 94 | graph_printf(f, eg, fmt); 95 | fclose(f); 96 | 97 | return ptr; 98 | } 99 | 100 | 101 | void callgraph_print_evtgraphs(callgraph_t *cg) 102 | { 103 | graph_t eg; 104 | 105 | htable_to_graph( ipm_htable, MAXSIZE_HASH, &eg ); 106 | callgraph_traverse(cg, 0, cgfunc_evtgraph, &eg); 107 | graph_free(&eg); 108 | } 109 | -------------------------------------------------------------------------------- /utils/cubew/lib/node.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file node.h 15 | \brief Declares types and functions to deal with one single node of the running machine. 16 | */ 17 | #ifndef CUBEW_NODE_H 18 | #define CUBEW_NODE_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | #include "process.h" 26 | 27 | struct cube_machine; 28 | /** 29 | A dynamical array of nodes. 30 | */ 31 | typedef struct cube_narray { 32 | int size; 33 | int capacity; 34 | struct cube_node** data; 35 | } cube_narray; 36 | 37 | /** 38 | A description and ID of a node. And its children. 39 | */ 40 | typedef struct cube_node { 41 | cube_parray* child; 42 | struct cube_machine* parent; 43 | char* name; 44 | int id; 45 | } cube_node; 46 | 47 | cube_node* cube_node_create(cube_node* node); 48 | void cube_node_init(cube_node* node, char* name, struct cube_machine* parent); 49 | void cube_node_construct_child(cube_node* node); 50 | void cube_node_free(cube_node* node); 51 | 52 | cube_process* cube_node_get_child(cube_node* node, int i); 53 | struct cube_machine* cube_node_get_parent(cube_node* node); 54 | char* cube_node_get_name(cube_node* node); 55 | int cube_node_num_children(cube_node* node); 56 | int cube_node_get_level(cube_node* node); 57 | void cube_node_writeXML(cube_node* node, FILE* fp); 58 | void cube_node_set_id(cube_node* node, int new_id); 59 | int cube_node_get_id(cube_node* node); 60 | int cube_node_equal(cube_node* a, cube_node* b); 61 | void cube_node_add_child(struct cube_machine* parent, cube_node* node); 62 | 63 | #ifdef __cplusplus 64 | } 65 | #endif 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /etc/wrap_mpiio_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #include 5 | /* #include */ 6 | 7 | #include "ipm.h" 8 | #include "ipm_core.h" 9 | #include "hashtable.h" 10 | #include "mod_mpi.h" 11 | #include "mod_mpiio.h" 12 | #include "GEN.calltable_mpi.h" 13 | #include "GEN.calltable_mpiio.h" 14 | 15 | #ifdef HAVE_CALLPATH 16 | #include "mod_callpath.h" 17 | #endif 18 | 19 | #ifdef HAVE_KEYHIST 20 | #include "mod_keyhist.h" 21 | #endif 22 | 23 | 24 | #include "regstack.h" 25 | 26 | #ifndef MPI3CONST 27 | #if MPI_VERSION >= 3 28 | #define MPI3CONST const 29 | #else 30 | #define MPI3CONST 31 | #endif 32 | #endif 33 | 34 | /** HEADER_END **/ 35 | 36 | 37 | /* ---- wrapping __CFNAME__ ---- */ 38 | /* 39 | * strings in the form __IDENT__ are replaced by the wrapper script 40 | * 41 | * CRET __CRET__ 42 | * CFNAME __CFNAME__ 43 | * CPARAMS __CPARAMS__ 44 | * CARGS __CARGS__ 45 | * CFMT __CFMT__ 46 | * GET_SSIZE __GET_SSIZE__ 47 | * GET_RSIZE __GET_RSIZE__ 48 | * GET_RANK __GET_RANK__ 49 | * GET_BYTES __GET_BYTES__ 50 | */ 51 | 52 | __CRET__ __CFNAME__(__CPARAMS__) 53 | { 54 | __CRET__ rv; 55 | int bytes, irank; 56 | double tstart, tstop, t; 57 | IPM_KEY_TYPE key; 58 | int csite, idx, idx2; 59 | int regid; 60 | 61 | if( ipm_state==STATE_NOTINIT ) { 62 | ipm_init(0); 63 | } 64 | 65 | IPM_TIMESTAMP(tstart); 66 | rv = __PCFNAME__(__CARGS__); 67 | IPM_TIMESTAMP(tstop); 68 | 69 | if( ipm_state!=STATE_ACTIVE ) { 70 | return rv; 71 | } 72 | 73 | t=tstop-tstart; 74 | 75 | bytes=0; irank=0; 76 | __GET_BYTES__(bytes); 77 | __GET_RANK__(irank); 78 | 79 | if( irank==MPI_PROC_NULL || irank==MPI_ANY_SOURCE ) 80 | irank = IPM_RANK_NULL; 81 | 82 | #ifdef HAVE_CALLPATH 83 | csite=get_callsite_id(); 84 | #else 85 | csite=0; 86 | #endif 87 | 88 | regid=ipm_rstackptr->id; 89 | 90 | #ifdef HAVE_MPI_TRACE 91 | if( task.tracefile && task.tracestate) { 92 | fprintf(task.tracefile, "%s %d %d %d %d\n", 93 | "__CFNAME__", irank, bytes, regid, csite); 94 | } 95 | #endif 96 | 97 | 98 | IPM_MPI_KEY(key, __CFID___GLOBAL, irank, bytes, 99 | regid, csite); 100 | 101 | IPM_HASH_HKEY(ipm_htable, key, idx); 102 | 103 | #ifdef HAVE_KEYHIST 104 | IPM_XHASH_HKEY(ipm_xhtable,last_hkey,key,idx2); 105 | ipm_xhtable[idx2].t_tot+=(tstart-last_tstamp); 106 | ipm_xhtable[idx2].count++; 107 | KEY_ASSIGN(last_hkey,key); 108 | last_tstamp=tstop; 109 | #endif 110 | 111 | IPM_HASHTABLE_ADD(idx,t); 112 | 113 | return rv; 114 | } 115 | 116 | -------------------------------------------------------------------------------- /src/memusage.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ipm_core.h" 7 | 8 | int ipm_get_procmem(double *bytes) 9 | { 10 | FILE *fh; 11 | int proc_ret; 12 | char proc_var[80]; 13 | char *cp; 14 | long long int ibytes; 15 | 16 | #if defined(OS_LINUX) 17 | #ifndef LINUX_XT3 18 | #define PROCMEM_LINUX_PROC 19 | #endif 20 | #endif 21 | 22 | #if defined (OS_AIX) 23 | #define PROCMEM_GETRUSAGE 24 | #endif 25 | 26 | #ifdef PROCMEM_LINUX_PROC 27 | #ifndef max 28 | #define max( a, b ) ( ((a) > (b)) ? (a) : (b) ) 29 | #endif 30 | 31 | /* Old logic was max of VmPeak and VmHWM - for some reason */ 32 | /* VmPeak is now much too large by ~3GB - so cade is now switched to */ 33 | /* VmHWM. Nick Dec 15 2010 */ 34 | *bytes=0.0; 35 | fh = fopen("/proc/self/status","r"); 36 | while(!feof(fh)) { 37 | fgets(proc_var,80,fh); 38 | /* cp = strstr(proc_var,"VmPeak:"); 39 | if (cp) {sscanf(cp, "VmPeak:"" %llu",&ibytes ); 40 | *bytes=max(*bytes,ibytes); 41 | } 42 | */ 43 | cp = strstr(proc_var,"VmHWM:"); 44 | if (cp) {sscanf(cp, "VmHWM:"" %llu",&ibytes ); 45 | *bytes=max(*bytes,ibytes); 46 | } 47 | } 48 | fclose(fh); 49 | *bytes *= 1024.0; 50 | 51 | #elif defined (PROCMEM_GETRUSAGE) 52 | 53 | getrusage(RUSAGE_SELF,&task.ru_SELF_curr); 54 | getrusage(RUSAGE_CHILDREN,&task.ru_CHILD_curr); 55 | *bytes = (task.ru_SELF_curr.ru_maxrss + 56 | task.ru_CHILD_curr.ru_maxrss )*1024.0; 57 | 58 | #else 59 | *bytes = 0.0; 60 | #endif 61 | return IPM_OK; 62 | } 63 | 64 | #if 0 65 | 66 | /* this is how we would return the memory currently in use */ 67 | static int ipm_get_procmem_now(double *bytes) { 68 | FILE *fh; 69 | int pagesize=getpagesize(); 70 | fh = fopen("/proc/self/statm", "r"); 71 | if(fscanf(fh,"%lf", bytes)!=1) { 72 | printf("IPM: %d error in ipm_get_procmem\n", task.mpi_rank); 73 | } 74 | fclose(fh); 75 | *bytes *= pagesize; 76 | return 0; 77 | } 78 | 79 | /* FIXME - unimplemented */ 80 | static int ipm_get_virtmem(double *bytes) { 81 | return 0; 82 | } 83 | 84 | #endif 85 | 86 | 87 | 88 | #ifdef UTEST_MEMUSAGE 89 | 90 | int main(int argc, char* argv[]) 91 | { 92 | double before, after; 93 | double *mem; 94 | 95 | ipm_get_procmem(&before); 96 | 97 | mem = calloc( 1000*1000, sizeof(double) ); 98 | 99 | ipm_get_procmem(&after); 100 | 101 | fprintf(stderr, "Testing ipm_get_procmem... %f %f %f\n", 102 | before, after, after-before); 103 | 104 | } 105 | 106 | #endif /* UTEST_MEMUSAGE */ 107 | -------------------------------------------------------------------------------- /utils/getopts.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "ipm_parse.h" 6 | 7 | 8 | int parse_topospec(char* str, job_t *job); 9 | 10 | int getopts(int argc, char *argv[], job_t *job) 11 | { 12 | int i; 13 | 14 | job->infile=job->outfile=0; 15 | job->inname=job->outname=""; 16 | 17 | for( i=1; i=argc || !(argv[i]) || 24 | (parse_topospec(argv[i],job)!=IPMP_OK) ) 25 | { 26 | fprintf(stderr, "Error parsing topology specification: '%s'\n", 27 | (i>=argc||!(argv[i])?"":argv[i])); 28 | return IPMP_ERR; 29 | } 30 | break; 31 | 32 | case 'f': // full banner 33 | if( strcmp(argv[i], "-full") ) 34 | goto unrecognized; 35 | 36 | job->outform = FULL; 37 | break; 38 | 39 | case 'h': // html report 40 | if( strcmp(argv[i], "-html") ) 41 | goto unrecognized; 42 | 43 | job->outform = HTML; 44 | break; 45 | 46 | case 's': // cube format 47 | if( strcmp(argv[i], "-summary") ) 48 | goto unrecognized; 49 | 50 | job->outform = SUMMARY; 51 | break; 52 | 53 | case 'c': // cube format 54 | if( strcmp(argv[i], "-cube") ) 55 | goto unrecognized; 56 | 57 | job->outform = CUBE; 58 | break; 59 | 60 | case 'o': 61 | job->outname=job->inname; 62 | job->outname.append(".cube"); 63 | break; 64 | 65 | case 'q': 66 | job->quiet=true; 67 | break; 68 | 69 | unrecognized: 70 | default: 71 | fprintf(stderr, "Unrecognized option: '%s'\n", argv[i]); 72 | return IPMP_ERR; 73 | } 74 | } 75 | else { 76 | if( (job->inname)=="" ) { 77 | job->inname=argv[i]; 78 | continue; 79 | } 80 | 81 | if( (job->outname)=="" ) { 82 | job->outname=argv[i]; 83 | continue; 84 | } 85 | return IPMP_ERR; 86 | } 87 | } 88 | return IPMP_OK; 89 | } 90 | 91 | 92 | int parse_topospec(char* str, job_t *job) 93 | { 94 | topospec_t t; 95 | char *s; 96 | 97 | s=str; 98 | while( s && (*s) ) { 99 | t.x=t.y=0; t.z=1; 100 | 101 | t.x=strtol(s, &s, 10); 102 | if( !t.x || (*s!='x') ) 103 | goto error; 104 | 105 | t.y=strtol(++s, &s, 10); 106 | if( !t.y ) goto error; 107 | 108 | if( *s=='x' ) { 109 | t.z=strtol(++s, &s, 10); 110 | if( !t.z ) goto error; 111 | } 112 | 113 | if( t.x>0 && t.y>0 ) { 114 | job->topologies.push_back(t); 115 | } 116 | 117 | if( *s==',' ) ++s; 118 | } 119 | return IPMP_OK; 120 | 121 | error: 122 | return IPMP_ERR; 123 | } 124 | -------------------------------------------------------------------------------- /utils/cubew/lib/thread.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file thread.h 15 | \brief Declares types and functions to deal with threads of running application. 16 | */ 17 | #ifndef CUBEW_THREAD_H 18 | #define CUBEW_THREAD_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | 26 | struct cube_process; 27 | /** 28 | A dynamic array of threads. 29 | */ 30 | typedef struct cube_tarray { 31 | int size; 32 | int capacity; 33 | struct cube_thread** data; 34 | } cube_tarray; 35 | /** 36 | * A structure containing information about a thread. 37 | */ 38 | typedef struct cube_thread { 39 | cube_tarray* child; 40 | struct cube_process* parent; 41 | char* name; 42 | int rank; 43 | int id; 44 | } cube_thread; 45 | 46 | cube_thread* cube_thread_create(cube_thread* thrd); 47 | void cube_thread_init(cube_thread* thrd, char* name, int rank, struct cube_process* parent); 48 | void cube_thread_construct_child(cube_thread* thrd); 49 | void cube_thread_free(cube_thread* thrd); 50 | 51 | cube_thread* cube_thread_get_child(cube_thread* thrd, int i); 52 | struct cube_process* cube_thread_get_parent(cube_thread* thrd); 53 | char* cube_thread_get_name(cube_thread* thrd); 54 | int cube_thread_get_rank(cube_thread* thrd); 55 | int cube_thread_num_children(cube_thread* thrd); 56 | int cube_thread_get_level(cube_thread* thrd); 57 | void cube_thread_writeXML(cube_thread* thrd, FILE* fp); 58 | void cube_thread_set_id(cube_thread* thrd, int new_id); 59 | int cube_thread_get_id(cube_thread* thrd); 60 | void cube_thread_add_child(struct cube_process* parent, cube_thread* thrd); 61 | int cube_thread_equal(cube_thread* a, cube_thread* b); 62 | 63 | #ifdef __cplusplus 64 | } 65 | #endif 66 | 67 | #endif 68 | 69 | 70 | -------------------------------------------------------------------------------- /src/mod_posixio.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "ipm.h" 4 | #include "perfdata.h" 5 | #include "mod_posixio.h" 6 | #include "ipm_modules.h" 7 | #include "hashtable.h" 8 | #include "report.h" 9 | #include "GEN.calltable_posixio.h" 10 | 11 | //iodata_t iodata[MAXNUM_REGIONS]; 12 | 13 | int mod_posixio_xml(ipm_mod_t* mod, void *ptr, struct region *reg); 14 | int mod_posixio_region(ipm_mod_t *mod, int op, struct region *reg); 15 | 16 | int mod_posixio_init(ipm_mod_t* mod, int flags) 17 | { 18 | char fname[256]; 19 | int i, id; 20 | 21 | mod->state = STATE_IN_INIT; 22 | mod->init = mod_posixio_init; 23 | mod->output = 0; 24 | mod->finalize = 0; 25 | mod->xml = mod_posixio_xml; 26 | mod->regfunc = mod_posixio_region; 27 | mod->name = "POSIXIO"; 28 | mod->ct_offs = MOD_POSIXIO_OFFSET; 29 | mod->ct_range = MOD_POSIXIO_RANGE; 30 | 31 | copy_posixio_calltable(); 32 | id=task.pid; 33 | 34 | for(i=0; istate = STATE_ACTIVE; 58 | 59 | return IPM_OK; 60 | } 61 | 62 | int mod_posixio_xml(ipm_mod_t* mod, void *ptr, struct region *reg) 63 | { 64 | struct region *tmp; 65 | ipm_hent_t stats; 66 | double time; 67 | int res=0; 68 | 69 | if( !reg ) { 70 | time = ipm_iotime(); 71 | } else { 72 | time = task.iodata[reg->id].iotime; 73 | 74 | if( (reg->flags)&FLAG_PRINT_EXCLUSIVE ) { 75 | tmp = reg->child; 76 | while(tmp) { 77 | time -= task.iodata[tmp->id].iotime; 78 | tmp = tmp->next; 79 | } 80 | } 81 | } 82 | 83 | res+=ipm_printf(ptr, 84 | "\n", 85 | mod->name, time); 86 | 87 | return res; 88 | 89 | } 90 | 91 | 92 | int mod_posixio_region(ipm_mod_t *mod, int op, struct region *reg) 93 | { 94 | double time; 95 | if( !reg ) return 0; 96 | 97 | time = ipm_iotime(); 98 | 99 | switch(op) 100 | { 101 | case -1: /* exit */ 102 | task.iodata[reg->id].iotime += (time - (task.iodata[reg->id].iotime_e)); 103 | break; 104 | 105 | case 1: /* enter */ 106 | task.iodata[reg->id].iotime_e=time; 107 | break; 108 | } 109 | 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /utils/cubew/lib/process.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | 14 | /** 15 | * \file process.h 16 | \brief Declares types and functions to deal with process of running application. 17 | */ 18 | #ifndef CUBEW_PROCESS_H 19 | #define CUBEW_PROCESS_H 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | #include 26 | #include "thread.h" 27 | 28 | struct cube_node; 29 | /** 30 | A dynamic array with processes. 31 | */ 32 | typedef struct cube_parray { 33 | int size; 34 | int capacity; 35 | struct cube_process** data; 36 | } cube_parray; 37 | 38 | /** 39 | * Structure collection name, ID and rank of a process. 40 | */ 41 | typedef struct cube_process { 42 | cube_tarray* child; 43 | struct cube_node* parent; 44 | char* name; 45 | int rank; 46 | int id; 47 | } cube_process; 48 | 49 | cube_process* cube_process_create(cube_process* proc); 50 | void cube_process_init(cube_process* proc, char* name, int rank, struct cube_node* parent); 51 | void cube_process_construct_child(cube_process* proc); 52 | void cube_process_free(cube_process* proc); 53 | 54 | cube_thread* cube_process_get_child(cube_process* proc, int i); 55 | struct cube_node* cube_process_get_parent(cube_process* proc); 56 | int cube_process_get_rank(cube_process* proc); 57 | char* cube_process_get_name(cube_process* proc); 58 | int cube_process_num_children(cube_process* proc); 59 | int cube_process_get_level(cube_process* proc); 60 | void cube_process_writeXML(cube_process* proc, FILE* fp); 61 | void cube_process_set_id(cube_process* proc, int new_id); 62 | int cube_process_get_id(cube_process* proc); 63 | int cube_process_equal(cube_process* a, cube_process* b); 64 | void cube_process_add_child(struct cube_node* parent, cube_process* proc); 65 | 66 | #ifdef __cplusplus 67 | } 68 | #endif 69 | 70 | #endif 71 | 72 | -------------------------------------------------------------------------------- /utils/cubew/lib/region.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file region.h 15 | \brief Declares types and functions to deal with regions in source code of running application. 16 | */ 17 | #ifndef CUBEW_REGION_H 18 | #define CUBEW_REGION_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | #include "cnode.h" 26 | 27 | typedef struct rarray cube_rarray; ///< A synonym of the arrays containing only regions. 28 | 29 | /** 30 | * A structure collecting information about a region: Start line, end line, description, url, name and so on. 31 | */ 32 | typedef struct cube_region { 33 | char* name; 34 | int begln; 35 | int endln; 36 | char* url; 37 | char* descr; 38 | char* mod; 39 | int id; 40 | cube_rarray* cnode; 41 | } cube_region; 42 | 43 | 44 | cube_region* cube_region_create(cube_region* reg); 45 | void cube_region_init(cube_region* reg, char* name, int begln, int endln, 46 | char* url, char* descr, char* mod); 47 | void cube_region_construct_cnode(cube_region* reg); 48 | void cube_region_free(cube_region* reg); 49 | 50 | char* cube_region_get_name(cube_region* reg); 51 | char* cube_region_get_url(cube_region* reg); 52 | char* cube_region_get_descr(cube_region* reg); 53 | char* cube_region_get_mod(cube_region* reg); 54 | int cube_region_get_begn_ln(cube_region* reg); 55 | int cube_region_get_end_ln(cube_region* reg); 56 | int cube_region_num_children(cube_region* reg); 57 | void cube_region_add_cnode(cube_region* reg, cube_cnode* cnode); 58 | void cube_region_writeXML(cube_region* reg, FILE* fp); 59 | int cube_region_equal(cube_region* a, cube_region* b); 60 | void cube_region_set_id(cube_region* reg, int new_id); 61 | int cube_region_get_id(cube_region* reg); 62 | 63 | #ifdef __cplusplus 64 | } 65 | #endif 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /test/test.fcomm/main.f90: -------------------------------------------------------------------------------- 1 | implicit none 2 | 3 | integer nrow, mcol, irow, jcol, i, j, ndim 4 | parameter (nrow=3, mcol=2, ndim=2) 5 | integer p, ierr, row_comm, col_comm, comm2D 6 | integer Iam, me, row_id, col_id 7 | integer row_group, row_key, map(0:5) 8 | data map/2,1,2,1,0,1/ 9 | include "mpif.h" !! This brings in pre-defined MPI constants, ... 10 | 11 | call MPI_Init(ierr) !! starts MPI 12 | call MPI_Comm_rank(MPI_COMM_WORLD, Iam, ierr) !! get current process id 13 | call MPI_Comm_size(MPI_COMM_WORLD, p, ierr) !! get number of processes 14 | if(Iam .eq. 0) then 15 | write(*,*) 16 | write(*,*)'Example of MPI_Comm_split Usage' 17 | write(*,*)'Split 3x2 grid into 2 different communicators' 18 | write(*,*)'which correspond to 3 rows and 2 columns.' 19 | write(*,*) 20 | write(*,*)' Iam irow jcol row-id col-id' 21 | endif 22 | 23 | irow = Iam/mcol !! row number 24 | jcol = mod(Iam, mcol) !! column number 25 | comm2D = MPI_COMM_WORLD 26 | call MPI_Comm_split(comm2D, irow, jcol, row_comm, ierr) 27 | call MPI_Comm_split(comm2D, jcol, irow, col_comm, ierr) 28 | 29 | call MPI_Comm_rank(row_comm, row_id, ierr) 30 | call MPI_Comm_rank(col_comm, col_id, ierr) 31 | call MPI_Barrier(MPI_COMM_WORLD, ierr) 32 | 33 | write(*,'(9i8)')Iam,irow,jcol,row_id,col_id 34 | call MPI_Barrier(MPI_COMM_WORLD, ierr) 35 | 36 | if(Iam .eq. 0) then 37 | write(*,*) 38 | write(*,*)'Next, create more general communicator' 39 | write(*,*)'which consists of two groups :' 40 | write(*,*)'Rows 1 and 2 belongs to group 1 and row 3 is group 2' 41 | write(*,*) 42 | endif 43 | 44 | row_group = Iam/4 ! this expression by no means general 45 | row_key = Iam - row_group*4 ! group1:0,1,2,3; group2:0,1 46 | 47 | call MPI_Comm_split(comm2D, row_group, row_key, & 48 | row_comm, ierr) 49 | call MPI_Comm_rank(row_comm, row_id, ierr) 50 | write(*,'(9i8)')Iam,row_id 51 | call MPI_Barrier(MPI_COMM_WORLD, ierr) 52 | 53 | if(Iam .eq. 0) then 54 | write(*,*) 55 | write(*,*)'If two processes have same key, the ranks' 56 | write(*,*)'of these two processes in the new' 57 | write(*,*)'communicator will be ordered according' 58 | write(*,*)'to their order in the old communicator' 59 | write(*,*)' key = map(Iam); map = (2,1,2,1,0,1)' 60 | write(*,*) 61 | endif 62 | 63 | 64 | row_group = Iam/4 ! this expression by no means general 65 | row_key = map(Iam) 66 | call MPI_Comm_split(comm2D, row_group, row_key, & 67 | row_comm, ierr) 68 | call MPI_Comm_rank(row_comm, row_id, ierr) 69 | call MPI_Barrier(MPI_COMM_WORLD, ierr) 70 | write(*,'(9i8)')Iam,row_id 71 | 72 | call MPI_Finalize(ierr) !! let MPI finish up ... 73 | 74 | end program 75 | 76 | 77 | -------------------------------------------------------------------------------- /test/test.fring/main.f: -------------------------------------------------------------------------------- 1 | C************************************************************ 2 | C FILE: ring.f 3 | C 4 | C DESCRIPTION: 5 | C This example program uses MPI blocking and nonblocking 6 | C point-to-point communication calls to send and receive 7 | C message in the ring where node wirh rank i sends a message 8 | C to the node with rank i+1 and receives a message from the node 9 | C with rank i-1. 10 | C 11 | C AUTHOR: Roslyn Leibensperger (MPL version) 1/15/93 12 | C Xianneng Shen converted from MPL to MPI 11/20/94 13 | C************************************************************** 14 | program ring 15 | c 16 | implicit none 17 | include 'mpif.h' 18 | c 19 | integer ntasks, taskid, right, left, inmsg, outmsg, mtype, 20 | . msgid, rbytes, sbytes, i 21 | integer ierr,status(MPI_STATUS_SIZE),tag,requests(2) 22 | 23 | data outmsg /0/ 24 | data i /1/ 25 | 26 | 27 | tag = 33 28 | 29 | c 30 | c learn number of tasks in partition and task ID 31 | call mpi_init(ierr) 32 | call mpi_comm_size(MPI_COMM_WORLD,ntasks,ierr) 33 | call mpi_comm_rank(MPI_COMM_WORLD,taskid,ierr) 34 | 35 | c compute source and destination for messages 36 | if (taskid.eq.0) then 37 | left = ntasks - 1 38 | else 39 | left = taskid - 1 40 | endif 41 | if (taskid.eq.ntasks-1) then 42 | right = 0 43 | else 44 | right = taskid + 1 45 | endif 46 | c 47 | outmsg = 2 48 | do while (outmsg .ne. -1) 49 | mtype = i 50 | c node 0 queries user for message, sends it to the right, 51 | c then waits for its return 52 | if (taskid .eq. 0) then 53 | write (*,*) 'Enter integer value to be passed along ring' 54 | write (*,*) 'A value of -1 ends the program' 55 | c read (*,*) outmsg 56 | call mpi_send(outmsg,1,MPI_INTEGER,right,tag, 57 | & MPI_COMM_WORLD,ierr) 58 | call mpi_irecv(inmsg,1,MPI_INTEGER,left,tag, 59 | & MPI_COMM_WORLD,requests(1),ierr) 60 | call mpi_wait(requests(1),status,ierr) 61 | write (*,*) 'node 0 received message ', 62 | . 'content is ', inmsg 63 | c the rest of the nodes in the group read the message and pass it on 64 | else 65 | call mpi_recv (inmsg,1,MPI_INTEGER,left,tag, 66 | & MPI_COMM_WORLD, status,ierr) 67 | outmsg = inmsg 68 | call mpi_isend (outmsg,1,MPI_INTEGER,right,tag, 69 | & MPI_COMM_WORLD, requests(2),ierr) 70 | call mpi_wait (requests(2),status,ierr) 71 | write (*,*) taskid, 'processed message ', mtype, 72 | . 'content is ', outmsg 73 | end if 74 | i = i + 1 75 | outmsg = outmsg -1 76 | end do 77 | 78 | call mpi_finalize(ierr) 79 | end 80 | 81 | -------------------------------------------------------------------------------- /utils/cubew/lib/cnode.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | /** 14 | * \file cnode.h 15 | \brief Declares a types and functions to deal with calee node in the cube. 16 | */ 17 | #ifndef CUBEW_CNODE_H 18 | #define CUBEW_CNODE_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | 26 | struct cube_region; ///< Empty structure defines a region of the source code, 27 | 28 | /** 29 | * Synonym for array of cnodes. 30 | */ 31 | typedef struct carray cube_carray; 32 | 33 | typedef struct cube_cnode { 34 | struct cube_region* callee; ///< Calee of this cnode. 35 | struct cube_cnode* parent;///< This cnode belongs to cnode "parents". 36 | cube_carray *child;///< It has some children. 37 | char* mod; ///< ?????? 38 | int line; ///< Start of the source code lines. 39 | int id; /// id of the cnode. 40 | } cube_cnode; 41 | 42 | 43 | cube_cnode* cube_cnode_create(cube_cnode* cnode); 44 | void cube_cnode_init(cube_cnode* cnode, struct cube_region* callee, char* mod, int line, cube_cnode* parent); 45 | void cube_cnode_construct_child(cube_cnode* cnode); 46 | void cube_cnode_free(cube_cnode* cnode); 47 | 48 | cube_cnode* cube_cnode_get_child(cube_cnode* cnode, int i); 49 | cube_cnode* cube_cnode_get_parent(cube_cnode* cnode); 50 | int cube_cnode_get_line(cube_cnode* cnode); 51 | int cube_cnode_num_children(cube_cnode* cnode); 52 | char* cube_cnode_get_mod(cube_cnode* cnode); 53 | struct cube_region* cube_cnode_get_callee(cube_cnode* cnode); 54 | struct cube_region* cube_cnode_get_caller(cube_cnode* cnode); 55 | void cube_cnode_writeXML(cube_cnode* cnode, FILE* fp); 56 | void cube_cnode_add_child(cube_cnode* parent, cube_cnode* cnode); 57 | int cube_cnode_equal(cube_cnode* a, cube_cnode* b); 58 | void cube_cnode_set_id(cube_cnode* cnode, int new_id); 59 | int cube_cnode_get_id(cube_cnode* cnode); 60 | void cube_cnode_assign_ids(cube_cnode* cnode, int* id); 61 | int cube_cnode_get_level(cube_cnode* cnode); 62 | 63 | #ifdef __cplusplus 64 | } 65 | #endif 66 | 67 | #endif 68 | 69 | 70 | -------------------------------------------------------------------------------- /utils/ipm_join: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DIR=$1 4 | OUT0=`ls $1/*_0` 5 | OUT=`basename $OUT0 | sed -e 's/_0/.ipm.xml/'` 6 | 7 | rm -f $OUT 8 | cat > $OUT < 10 | 11 | 12 |
13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
64 |
65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |
91 |
92 | EOF 93 | 94 | for i in `ls $DIR | sort -n` ; do 95 | cat $DIR/$i >> $OUT 96 | done 97 | 98 | echo "NO_MPI_Finalize_Called" >> $OUT 99 | echo "
" >> $OUT 100 | 101 | -------------------------------------------------------------------------------- /include/ipm_modules.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef IPM_MODULES_H_INCLUDED 3 | #define IPM_MODULES_H_INCLUDED 4 | 5 | 6 | typedef struct ipm_module ipm_mod_t; 7 | struct ipm_module; 8 | 9 | #include "ipm_sizes.h" 10 | #include "regstack.h" 11 | 12 | 13 | #define IPM_MODULE_MPI 0 14 | #define IPM_MODULE_MPIIO 1 15 | #define IPM_MODULE_POSIXIO 2 16 | #define IPM_MODULE_OMPTRACEPOINTS 3 17 | #define IPM_MODULE_CUDA 4 18 | #define IPM_MODULE_CUFFT 5 19 | #define IPM_MODULE_CUBLAS 6 20 | #define IPM_MODULE_PAPI 7 21 | #define IPM_MODULE_SELFMONITOR 8 22 | #define IPM_MODULE_CALLPATH 9 23 | #define IPM_MODULE_KEYHIST 10 24 | #define IPM_MODULE_PROCCTRL 11 25 | #define IPM_MODULE_CLUSTERING 12 26 | #define IPM_MODULE_PMON 13 27 | 28 | #define MOD_MPI_OFFSET 0 29 | #define MOD_MPIIO_OFFSET 80 30 | #define MOD_POSIXIO_OFFSET 140 31 | #define MOD_OMPTRACEPOINTS_OFFSET 200 32 | #define MOD_CUDA_OFFSET 220 33 | #define MOD_CUFFT_OFFSET 400 34 | #define MOD_CUBLAS_OFFSET 420 35 | 36 | #define MOD_MPI_RANGE (MOD_MPIIO_OFFSET-MOD_MPI_OFFSET) 37 | #define MOD_MPIIO_RANGE (MOD_POSIXIO_OFFSET-MOD_MPIIO_OFFSET) 38 | #define MOD_POSIXIO_RANGE (MOD_OMPTRACEPOINTS_OFFSET-MOD_POSIXIO_OFFSET) 39 | #define MOD_OMPTRACEPOINTS_RANGE (MOD_CUDA_OFFSET-MOD_OMPTRACEPOINTS_OFFSET) 40 | #define MOD_CUDA_RANGE (MOD_CUFFT_OFFSET-MOD_CUDA_OFFSET) 41 | #define MOD_CUFFT_RANGE (MOD_CUBLAS_OFFSET-MOD_CUFFT_OFFSET) 42 | #define MOD_CUBLAS_RANGE 180 43 | 44 | 45 | 46 | #if (MOD_CUBLAS_OFFSET+MOD_CUBLAS_RANGE > MAXSIZE_CALLTABLE ) 47 | #error MAXSIZE_CALLTABLE not big enought to hold all events 48 | #endif 49 | 50 | 51 | struct ipm_module; 52 | struct region; 53 | 54 | typedef int(*initfunc_t)(struct ipm_module* mod, int flags); 55 | typedef int(*outputfunc_t)(struct ipm_module* mod, int flags); 56 | typedef int(*finalizefunc_t)(struct ipm_module* mod, int flags); 57 | 58 | /* Add something module-specific to the XML output. 59 | This function has to use the ipm_printf routine for output, 60 | passing ptr as the first argument and keeping track of the 61 | number of bytes written */ 62 | typedef int(*xmlfunc_t)(struct ipm_module* mod, void *ptr, struct region *reg); 63 | 64 | /* Called upon region enter/exit */ 65 | typedef int(*regfunc_t)(struct ipm_module* mod, int op, struct region *reg); 66 | 67 | struct ipm_module 68 | { 69 | char *name; 70 | initfunc_t init; 71 | outputfunc_t output; 72 | finalizefunc_t finalize; 73 | xmlfunc_t xml; 74 | regfunc_t regfunc; 75 | int state; 76 | int ct_offs; /* range and offset in the */ 77 | int ct_range; /* call table */ 78 | }; 79 | 80 | void ipm_module_init(struct ipm_module *mod); 81 | 82 | extern ipm_mod_t modules[MAXNUM_MODULES]; 83 | 84 | 85 | #endif /* IPM_MODULES_H_INCLUDED */ 86 | -------------------------------------------------------------------------------- /include/mod_callpath.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOD_CALLPATH_H_INCLUDED 3 | #define MOD_CALLPATH_H_INCLUDED 4 | 5 | #include "ipm_sizes.h" 6 | #include "ipm_modules.h" 7 | 8 | /* ---- initialize the module ---- */ 9 | int mod_callpath_init(ipm_mod_t* mod, int flags); 10 | 11 | 12 | int get_callsite_id(); 13 | 14 | 15 | 16 | /* ---- callsite / callgraph ----- */ 17 | 18 | typedef struct callsite 19 | { 20 | int id; 21 | char *name; 22 | void *addr; 23 | unsigned long long int narrivals; 24 | struct callsite *parent, *next, *child; 25 | } callsite_t; 26 | 27 | /* a callgraph is represented by its root node */ 28 | typedef callsite_t callgraph_t; 29 | 30 | 31 | #define INIT_CALLSITE(site_ ) \ 32 | site_->id = 0; \ 33 | site_->name = 0; \ 34 | site_->addr = 0; \ 35 | site_->narrivals = 0; \ 36 | site_->parent = 0; \ 37 | site_->next = 0; \ 38 | site_->child = 0; 39 | 40 | 41 | #define COPY_CALLSITE(from_, to_) \ 42 | to_->id = from_->id; \ 43 | to_->name = from_->name; \ 44 | to_->addr = from_->addr; \ 45 | to_->narrivals = from_->narrivals; \ 46 | to_->parent = from_->parent; \ 47 | to_->next = from_->next; \ 48 | to_->child = from_->child; 49 | 50 | 51 | /* prototype of functions to be called on each node of 52 | the callgraph by the traverse function */ 53 | typedef void*(*cgfunc_t)(callsite_t *site, int level, int flags, void *ptr); 54 | 55 | /* traverse the callgraph in DFS order and apply function 56 | func to each node */ 57 | void callgraph_traverse(callsite_t *graph, callsite_t *stop, 58 | cgfunc_t func, void *ptr); 59 | 60 | #define IS_NODE(csite_) \ 61 | ((csite_->addr!=0)) 62 | 63 | #define IS_LEAF(csite_) \ 64 | (((csite_->child) && (csite_->child->addr))?0:1) 65 | 66 | extern callgraph_t *ipm_callgraph; 67 | 68 | typedef struct cs_hent 69 | { 70 | void *addr; 71 | char *fname; 72 | int offs; 73 | int csid; 74 | int pcsid; /* parent csid */ 75 | } cs_hent_t; 76 | 77 | extern cs_hent_t cs_hash[MAXNUM_CALLSITES]; 78 | 79 | int cs_hash_lookup_addr(void *addr); 80 | int cs_hash_lookup_csid(int csid); 81 | 82 | void callgraph_find_by_csid(callgraph_t *g, callsite_t *site); 83 | 84 | int callgraph_count_leaves(callgraph_t *g); 85 | 86 | int callgraph_count_nodes(callgraph_t *g); 87 | 88 | 89 | /* flags for invocation of cgfunc from traverse */ 90 | #define VISIT_FIRST 1 91 | #define VISIT_BACKTRACK 2 92 | 93 | 94 | /* ---- callsite table ----- */ 95 | 96 | typedef struct 97 | { 98 | int ncs; /* number of callsites */ 99 | int *csids; /* the IDs */ 100 | void **cstable; /* the call-stacks */ 101 | } cstable_t; 102 | 103 | void init_cstable(cstable_t *t, int size); 104 | void clear_cstable(cstable_t *t); 105 | 106 | void callgraph_to_cstable(callgraph_t *g, cstable_t *t); 107 | 108 | void merge_cstables(cstable_t *t1, cstable_t *t2, int *u); 109 | 110 | void ipm_unify_callsite_ids(); 111 | 112 | 113 | 114 | 115 | 116 | #endif /* IPM_CALLSTACK_H_INCLUDED */ 117 | 118 | -------------------------------------------------------------------------------- /src/hashkey.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | 5 | 6 | #ifdef UTEST_HASHKEY 7 | 8 | #define TEST_ACTIVITY 0 9 | #define TEST_REGION 1 10 | #define TEST_TID 2 11 | #define TEST_CALLSITE 3 12 | #define TEST_DATATYPE 4 13 | #define TEST_OPERATION 5 14 | #define TEST_SELECT 6 15 | #define TEST_RANK 7 16 | #define TEST_BYTES 8 17 | 18 | 19 | int test_setget(int what, 20 | unsigned long long val, 21 | unsigned long long expect) 22 | { 23 | IPM_KEY_TYPE key; 24 | unsigned long long u; 25 | 26 | KEY_CLEAR(key); 27 | switch( what ) 28 | { 29 | case TEST_ACTIVITY: 30 | KEY_SET_ACTIVITY(key, val); 31 | u=KEY_GET_ACTIVITY(key); 32 | break; 33 | 34 | case TEST_REGION: 35 | KEY_SET_REGION(key, val); 36 | u=KEY_GET_REGION(key); 37 | break; 38 | 39 | case TEST_TID: 40 | KEY_SET_TID(key, val); 41 | u=KEY_GET_TID(key); 42 | break; 43 | 44 | case TEST_CALLSITE: 45 | KEY_SET_CALLSITE(key, val); 46 | u=KEY_GET_CALLSITE(key); 47 | break; 48 | 49 | case TEST_DATATYPE: 50 | KEY_SET_DATATYPE(key, val); 51 | u=KEY_GET_DATATYPE(key); 52 | break; 53 | 54 | case TEST_OPERATION: 55 | KEY_SET_OPERATION(key, val); 56 | u=KEY_GET_OPERATION(key); 57 | break; 58 | 59 | case TEST_SELECT: 60 | KEY_SET_SELECT(key, val); 61 | u=KEY_GET_SELECT(key); 62 | break; 63 | 64 | case TEST_RANK: 65 | KEY_SET_RANK(key, val); 66 | u=KEY_GET_RANK(key); 67 | break; 68 | 69 | case TEST_BYTES: 70 | KEY_SET_BYTES(key, val); 71 | u=KEY_GET_BYTES(key); 72 | break; 73 | } 74 | 75 | if( u!=expect ) 76 | return 0; 77 | 78 | return 1; 79 | } 80 | 81 | 82 | int test_range(char* name, int what, unsigned long long min, 83 | unsigned long long max, unsigned long long wrap) 84 | { 85 | int res; 86 | 87 | fprintf(stderr, "Testing set/get range for %12s ... ", name); 88 | res = test_setget(what, min, min); 89 | res = res && test_setget(what, max-1, max-1); 90 | res = res && test_setget(what, max, max); 91 | res = res && test_setget(what, max+1, wrap); 92 | fprintf(stderr, "%s\n", res?"PASS":"FAIL"); 93 | 94 | return res; 95 | } 96 | 97 | int main(int argc, char* argv[] ) 98 | { 99 | int res; 100 | 101 | fprintf(stderr, "IPM_KEY_TYPE is %d bytes long\n", sizeof(IPM_KEY_TYPE)); 102 | 103 | test_range("ACTIVITY", TEST_ACTIVITY, 0, KEY_MAX_ACTIVITY, 0); 104 | test_range("REGION", TEST_REGION, 0, KEY_MAX_REGION, 0); 105 | test_range("TID", TEST_TID, 0, KEY_MAX_TID, 0); 106 | test_range("CALLSITE", TEST_CALLSITE, 0, KEY_MAX_CALLSITE, 0); 107 | test_range("DATATYPE", TEST_DATATYPE, 0, KEY_MAX_DATATYPE, 0); 108 | test_range("OPERATION",TEST_OPERATION, 0, KEY_MAX_OPERATION, 0); 109 | test_range("SELECT", TEST_SELECT, 0, KEY_MAX_SELECT, 0); 110 | test_range("RANK", TEST_RANK, 0, KEY_MAX_RANK, 0); 111 | test_range("BYTES", TEST_BYTES, 0, KEY_MAX_BYTES, 0); 112 | } 113 | 114 | #endif /* UTEST_HASHKEY */ 115 | 116 | 117 | -------------------------------------------------------------------------------- /m4/ipm_underscore.m4: -------------------------------------------------------------------------------- 1 | 2 | ############################################################################### 3 | # 4 | # tests for wrapper convention for Fortran 5 | # requires ACX_MPI 6 | # outputs autoconf variable IPM_FUNDERSCORE 7 | # 8 | # Copyright (c) 2009 Sascha Hunold 9 | # 10 | ############################################################################### 11 | 12 | AC_DEFUN([AX_IPM_UNDERSCORE], [ 13 | AC_MSG_CHECKING([underscores for F77 objects]) 14 | 15 | CWD=$PWD 16 | TEST_DIR="$PWD/.test" 17 | CONFIG_LOG=config.test.log 18 | 19 | #echo "MPIF77 = ${MPIF77}" 20 | 21 | ## determine underscoring in a MPI F77 program { 22 | if test "x$MPIF77" != "x" ; then 23 | TEST_NAME="simplef_mpi_underscores" 24 | 25 | #echo -n "checking underscores for F77 objects..." 26 | 27 | rm -rf $TEST_DIR ; mkdir $TEST_DIR ; cd $TEST_DIR 28 | cat >> ./$TEST_NAME.f <> ./run <> $CONFIG_LOG 71 | cat ./run >> $CONFIG_LOG 72 | FUNDERSCORES_PRE=$(./run | awk '{print $[1]}') 73 | FUNDERSCORES_POST=$(./run | awk '{print $[2]}') 74 | 75 | #echo "PER = ${FUNDERSCORES_PRE}" 76 | #echo "POST= ${FUNDERSCORES_POST}" 77 | 78 | FUNDERSCORE=""; 79 | 80 | if test "0" == "1" ; then 81 | if test "$FUNDERSCORES_PRE" == "1" ; then 82 | # echo -n "pre1," 83 | FUNDERSCORE="$FUNDERSCORE -funderscore_pre "; 84 | elif test "$FUNDERSCORES_PRE" == "2" ; then 85 | # echo -n "pre2," 86 | FUNDERSCORE="$FUNDERSCORE -funderscore_pre -funderscore_pre"; 87 | fi 88 | fi 89 | 90 | if test "$FUNDERSCORES_POST" == "0" ; then 91 | # echo "none" 92 | true 93 | elif test "$FUNDERSCORES_POST" == "1" ; then 94 | # echo "1" 95 | FUNDERSCORE="$FUNDERSCORE -funderscore_post "; 96 | elif test "$FUNDERSCORES_POST" == "2" ; then 97 | # echo "2" 98 | FUNDERSCORE="$FUNDERSCORE -funderscore_post -funderscore_post"; 99 | else 100 | echo "unknown" 101 | echo "see $CONFIG_LOG for compiler errors." 102 | exit 1 103 | fi 104 | ## } 105 | fi 106 | # endif MPIF77 exists 107 | 108 | AC_SUBST(IPM_FUNDERSCORE, $FUNDERSCORE) 109 | AC_MSG_RESULT([$FUNDERSCORE]) 110 | 111 | cd $CWD 112 | ]) 113 | -------------------------------------------------------------------------------- /src/ipm_introspect.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "ipm.h" 5 | #include "ipm_introspect.h" 6 | #include "calltable.h" 7 | #include "hashtable.h" 8 | 9 | pia_regid_t pia_current_region() 10 | { 11 | struct region* reg; 12 | 13 | reg = ipm_rstackptr; 14 | if(reg) { 15 | return reg->id; 16 | } else { 17 | return 0; 18 | } 19 | } 20 | 21 | 22 | pia_regid_t pia_child_region(pia_regid_t rid) 23 | { 24 | struct region* reg; 25 | 26 | reg = rstack_find_region_by_id(ipm_rstack, rid); 27 | 28 | if(reg && reg->child) { 29 | return reg->child->id; 30 | } else { 31 | return -1; 32 | } 33 | } 34 | 35 | 36 | pia_regid_t pia_parent_region(pia_regid_t rid) 37 | { 38 | struct region* reg; 39 | 40 | reg = rstack_find_region_by_id(ipm_rstack, rid); 41 | 42 | if(reg && reg->parent) { 43 | return reg->parent->id; 44 | } else { 45 | return -1; 46 | } 47 | } 48 | 49 | 50 | pia_regid_t pia_next_region(pia_regid_t rid) 51 | { 52 | struct region* reg; 53 | 54 | reg = rstack_find_region_by_id(ipm_rstack, rid); 55 | 56 | if(reg && reg->next) { 57 | return reg->next->id; 58 | } else { 59 | return -1; 60 | } 61 | } 62 | 63 | pia_regid_t pia_find_region_by_name(char *name) 64 | { 65 | struct region* reg; 66 | 67 | reg = rstack_find_region_by_name(ipm_rstack, name); 68 | 69 | if(reg) { 70 | return reg->id; 71 | } else { 72 | return -1; 73 | } 74 | } 75 | 76 | pia_ret_t pia_get_region_data(pia_regdata_t *rdata, pia_regid_t rid) 77 | { 78 | struct region* reg=0; 79 | 80 | reg = rstack_find_region_by_id(ipm_rstack, rid); 81 | if( !reg ) { 82 | return PIA_NOTFOUND; 83 | } 84 | 85 | rdata->id=rid; 86 | strncpy(rdata->name, reg->name, PIA_MAXLEN_LABEL); 87 | rdata->count=reg->nexecs; 88 | rdata->wtime=reg->wtime; 89 | rdata->mtime=reg->mtime; 90 | 91 | return PIA_OK; 92 | } 93 | 94 | 95 | 96 | pia_act_t pia_find_activity_by_name(char *name) 97 | { 98 | int i; 99 | 100 | for( i=0; i= 0 */ 103 | } 104 | } 105 | 106 | return PIA_NOTFOUND; 107 | } 108 | 109 | pia_ret_t pia_init_activity_data(pia_actdata_t *adata) 110 | { 111 | adata->ncalls=0; 112 | adata->tmin=1.0e15; 113 | adata->tmax=0.0; 114 | adata->tsum=0.0; 115 | 116 | return PIA_OK; 117 | } 118 | 119 | 120 | pia_ret_t pia_get_activity_data(pia_actdata_t *adata, 121 | pia_act_t act) 122 | { 123 | int i; 124 | int bytes; 125 | int rank; 126 | 127 | for( i=0; itmin ) 137 | adata->tmin = ipm_htable[i].t_min; 138 | if( ipm_htable[i].t_max > adata->tmax ) 139 | adata->tmax = ipm_htable[i].t_max; 140 | 141 | adata->tsum+=ipm_htable[i].t_tot; 142 | adata->ncalls+=ipm_htable[i].count; 143 | } 144 | } 145 | 146 | return PIA_OK; 147 | } 148 | -------------------------------------------------------------------------------- /utils/cubew/lib/vector.h: -------------------------------------------------------------------------------- 1 | /**************************************************************************** 2 | ** SCALASCA http://www.scalasca.org/ ** 3 | ** KOJAK http://www.fz-juelich.de/jsc/kojak/ ** 4 | ***************************************************************************** 5 | ** Copyright (c) 1998-2010 ** 6 | ** Forschungszentrum Juelich, Juelich Supercomputing Centre ** 7 | ** ** 8 | ** Copyright (c) 2003-2008 ** 9 | ** University of Tennessee, Innovative Computing Laboratory ** 10 | ** ** 11 | ** See the file COPYRIGHT in the package base directory for details ** 12 | ****************************************************************************/ 13 | 14 | /** 15 | * \file vector.h 16 | * \brief Contains macros XALLOC, ALLOC, REALLOC and ADD_NEXT for memory allocation. 17 | */ 18 | 19 | 20 | #ifndef CUBEW_VECTOR_H 21 | #define CUBEW_VECTOR_H 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | /** Macros for handling dynamic arrays */ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | extern int cubew_trace; 35 | 36 | /** allocation with optional log message and only warning if unsuccessful */ 37 | #define ALLOC(MEMORY,NMEMB,MTYPE) MEMORY = (MTYPE *) calloc(NMEMB,sizeof(MTYPE)); \ 38 | if (cubew_trace) fprintf(stderr,"%s: calloc(%lu,%lu) = %p\n", __func__, \ 39 | (unsigned long) NMEMB, (unsigned long) sizeof(MTYPE), MEMORY); \ 40 | if (!MEMORY) { fprintf(stderr, "%s: calloc(%lu,%lu): %s\n", \ 41 | __func__, (unsigned long) NMEMB, (unsigned long) sizeof(MTYPE), \ 42 | strerror(errno)); } 43 | 44 | /** allocation with exit if unsuccessful */ 45 | #define XALLOC(MEMORY,NMEMB,MTYPE) MEMORY = (MTYPE *) calloc(NMEMB,sizeof(MTYPE)); \ 46 | if (cubew_trace) fprintf(stderr,"%s: calloc(%lu,%lu) = %p\n", __func__, \ 47 | (unsigned long) NMEMB, (unsigned long) sizeof(MTYPE), MEMORY); \ 48 | if (!MEMORY) { fprintf(stderr, "%s: calloc(%lu,%lu): %s\n", \ 49 | __func__, (unsigned long) NMEMB, (unsigned long) sizeof(MTYPE), \ 50 | strerror(errno)); exit(1); } 51 | 52 | /** (re)allocation with exit if unsuccessful */ 53 | #define REALLOC(MEMORY,CAST,MSIZE) \ 54 | if (cubew_trace) fprintf(stderr,"%s: realloc(%p,%lu)", __func__, \ 55 | MEMORY, (unsigned long) MSIZE); \ 56 | MEMORY = CAST realloc(MEMORY,MSIZE); \ 57 | if (cubew_trace) fprintf(stderr," = %p\n", MEMORY); \ 58 | if (!MEMORY) { fprintf(stderr, "%s: realloc(%lu): %s\n", \ 59 | __func__, (unsigned long) MSIZE, strerror(errno)); exit(2); } 60 | 61 | /** append element to vector */ 62 | #define ADD_NEXT(VECTOR, ELEMENT, ETYPE) \ 63 | if (VECTOR->size == VECTOR->capacity) { \ 64 | if (VECTOR->capacity == 0) VECTOR->capacity = 1; \ 65 | else VECTOR->capacity *= 2; \ 66 | REALLOC(VECTOR->data, (ETYPE *), sizeof(ETYPE)*(VECTOR->capacity)); \ 67 | } \ 68 | VECTOR->data[VECTOR->size] = ELEMENT; \ 69 | VECTOR->size++; 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif 76 | 77 | -------------------------------------------------------------------------------- /test/test.status_ignore/main.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main( int argc, char* argv[] ) 8 | { 9 | int myrank, nprocs; 10 | int val, val2; 11 | int idx, idx2[2]; 12 | int flag; 13 | 14 | 15 | MPI_Request req; 16 | MPI_Request req2[2]; 17 | MPI_Status stat; 18 | 19 | MPI_Init( &argc, &argv ); 20 | 21 | MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); 22 | MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); 23 | 24 | if( nprocs<2 ) { 25 | fprintf(stderr, "Need at least 2 procs to run this program\n"); 26 | MPI_Abort(MPI_COMM_WORLD, 1); 27 | return 1; 28 | } 29 | 30 | /* MPI_STATUS_IGNORE in MPI_Recv */ 31 | switch(myrank) { 32 | case 0: 33 | MPI_Send( &val, 1, MPI_INTEGER, 1, 33, MPI_COMM_WORLD); 34 | break; 35 | 36 | case 1: 37 | MPI_Recv( &val, 1, MPI_INTEGER, 0, 33, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); 38 | break; 39 | } 40 | 41 | /* MPI_STATUS_IGNORE in MPI_Wait, MPI_Test */ 42 | switch(myrank) { 43 | case 0: 44 | MPI_Isend( &val, 1, MPI_INTEGER, 1, 34, MPI_COMM_WORLD, &req); 45 | MPI_Test( &req, &flag, MPI_STATUS_IGNORE ); 46 | MPI_Wait( &req, MPI_STATUS_IGNORE ); 47 | 48 | break; 49 | 50 | case 1: 51 | MPI_Recv( &val, 1, MPI_INTEGER, 0, 34, MPI_COMM_WORLD, &stat ); 52 | break; 53 | } 54 | 55 | /* MPI_STATUS_IGNORE in MPI_Waitany, MPI_Testany */ 56 | switch(myrank) { 57 | case 0: 58 | MPI_Isend( &val, 1, MPI_INTEGER, 1, 35, MPI_COMM_WORLD, &(req2[0])); 59 | MPI_Isend( &val2, 1, MPI_INTEGER, 1, 36, MPI_COMM_WORLD, &(req2[1])); 60 | MPI_Testany( 2, req2, &idx, &flag, MPI_STATUS_IGNORE ); 61 | MPI_Waitany( 2, req2, &idx, MPI_STATUS_IGNORE ); 62 | break; 63 | 64 | case 1: 65 | MPI_Recv( &val, 1, MPI_INTEGER, 0, 35, MPI_COMM_WORLD, &stat ); 66 | MPI_Recv( &val2, 1, MPI_INTEGER, 0, 36, MPI_COMM_WORLD, &stat ); 67 | break; 68 | } 69 | 70 | /* MPI_STATUSES_IGNORE in MPI_Waitall, MPI_Testall */ 71 | switch(myrank) { 72 | case 0: 73 | MPI_Isend( &val, 1, MPI_INTEGER, 1, 35, MPI_COMM_WORLD, &(req2[0])); 74 | MPI_Isend( &val2, 1, MPI_INTEGER, 1, 36, MPI_COMM_WORLD, &(req2[1])); 75 | MPI_Testall( 2, req2, &flag, MPI_STATUSES_IGNORE ); 76 | MPI_Waitall( 2, req2, MPI_STATUSES_IGNORE ); 77 | break; 78 | 79 | case 1: 80 | MPI_Recv( &val, 1, MPI_INTEGER, 0, 35, MPI_COMM_WORLD, &stat ); 81 | MPI_Recv( &val2, 1, MPI_INTEGER, 0, 36, MPI_COMM_WORLD, &stat ); 82 | break; 83 | } 84 | 85 | /* MPI_STATUSES_IGNORE in MPI_Waitsome */ 86 | switch(myrank) { 87 | case 0: 88 | MPI_Isend( &val, 1, MPI_INTEGER, 1, 35, MPI_COMM_WORLD, &(req2[0])); 89 | MPI_Isend( &val2, 1, MPI_INTEGER, 1, 36, MPI_COMM_WORLD, &(req2[1])); 90 | MPI_Testsome( 2, req2, &idx, idx2, MPI_STATUSES_IGNORE ); 91 | MPI_Waitsome( 2, req2, &idx, idx2, MPI_STATUSES_IGNORE ); 92 | break; 93 | 94 | case 1: 95 | MPI_Recv( &val, 1, MPI_INTEGER, 0, 35, MPI_COMM_WORLD, &stat ); 96 | MPI_Recv( &val2, 1, MPI_INTEGER, 0, 36, MPI_COMM_WORLD, &stat ); 97 | break; 98 | } 99 | 100 | 101 | 102 | 103 | MPI_Barrier(MPI_COMM_WORLD); 104 | fprintf(stderr, "%5d: DONE\n", myrank); 105 | 106 | MPI_Finalize(); 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /include/regstack.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef REGSTACK_H_INCLUDED 3 | #define REGSTACK_H_INCLUDED 4 | 5 | #include "ipm.h" 6 | #include "ipm_core.h" 7 | #include "ipm_sizes.h" 8 | 9 | 10 | int rstack_init(int flags); 11 | 12 | #define FLAG_PRINT_EXCLUSIVE 1 13 | 14 | typedef struct region 15 | { 16 | struct region *parent, *next, *child; 17 | struct region *self; /* needed for packing */ 18 | 19 | int id; 20 | int flags; 21 | unsigned nexecs; 22 | 23 | /* accumulated time for this region */ 24 | /* mtime retained for backwards compatibility */ 25 | double wtime, utime, stime; 26 | double mtime; 27 | 28 | /* enter timestamps */ 29 | double wtime_e, utime_e, stime_e; 30 | double mtime_e; 31 | 32 | char name[MAXSIZE_REGLABEL+1]; 33 | 34 | /* a module can store data with a region */ 35 | // tyler: this is never used and doesn't really work with mpi 36 | //void *moddata[MAXNUM_MODULES]; 37 | 38 | #ifdef HAVE_PAPI 39 | /* accumulated values for this region */ 40 | long long ctr[MAXNUM_PAPI_EVENTS]; 41 | 42 | /* snapshot of values when entering region */ 43 | long long ctr_e[MAXNUM_PAPI_EVENTS]; 44 | 45 | /* counter values accumulated while in IPM code */ 46 | long long ctr_ipm[MAXNUM_PAPI_EVENTS]; 47 | #endif 48 | #ifdef HAVE_PMON 49 | double energy; 50 | double cpu_energy; 51 | double mem_energy; 52 | double other_energy; 53 | #endif 54 | } region_t; 55 | 56 | 57 | typedef struct 58 | { 59 | int id; 60 | region_t *reg; 61 | char *name; 62 | } regid_t; 63 | 64 | 65 | void rstack_init_region(struct region *r, char *s); 66 | void rstack_clear_region(struct region *r); 67 | 68 | 69 | extern struct region *ipm_rstack; 70 | extern struct region *ipm_rstackptr; 71 | 72 | /* this is a region representing the execution of the 73 | application from the start to the point of writing 74 | the job log. This is either at the program end or 75 | when triggered by snapshotting */ 76 | extern struct region ipm_app; 77 | 78 | 79 | /* 80 | op = -1 to exit a region 81 | op = 1 to enter a region 82 | 83 | maintains the region stack and updates 84 | performance data using the ipm_region_begin() 85 | ipm_region_end() calls, tag is the 86 | name of the region 87 | */ 88 | void ipm_region(int op, char *tag); 89 | 90 | void ipm_region_begin(struct region *r); 91 | void ipm_region_end(struct region *r); 92 | 93 | 94 | /* prototype of functions to be called on each node of 95 | the region stack by the traverse function */ 96 | typedef void*(*rsfunc_t)(region_t *reg, unsigned level, int flags, void *ptr); 97 | 98 | /* DFS traversal of stack */ 99 | void traverse_rstack( region_t *stack, region_t *stop, 100 | rsfunc_t func, void *ptr); 101 | 102 | 103 | int rstack_cleanup(region_t *rstack); 104 | 105 | 106 | int rstack_count_all_regions(region_t *rstack); 107 | int rstack_count_l1_regions(region_t *rstack); 108 | 109 | void rstack_print(region_t *rstack, FILE *f); 110 | 111 | void rstack_pack(region_t* rstack, int nreg, region_t *list); 112 | region_t* rstack_unpack(int nreg, region_t *list); 113 | 114 | region_t* rstack_find_region_by_id(region_t *rstack, int id); 115 | region_t* rstack_find_region_by_name(region_t *rstack, char *name); 116 | 117 | #endif /* IPM_RSTACK_H_INCLUDED */ 118 | -------------------------------------------------------------------------------- /etc/wrap_cufft_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ipm_core.h" 11 | #include "ipm_time.h" 12 | #include "hashtable.h" 13 | #include "perfdata.h" 14 | #include "mod_cufft.h" 15 | 16 | #include "cuda.h" 17 | #include "cufft.h" 18 | 19 | #ifdef HAVE_DYNLOAD 20 | #include 21 | #include 22 | #endif /* HAVE_DYNLOAD */ 23 | 24 | #ifdef HAVE_KEYHIST 25 | #include "mod_keyhist.h" 26 | #endif 27 | 28 | #include 29 | 30 | /** HEADER_END **/ 31 | 32 | 33 | __CRET__ __real___CFNAME__(__CPARAMS__); 34 | 35 | 36 | /* ---- wrapping __CFNAME__ ---- */ 37 | /* 38 | * strings in the form __IDENT__ are replaced by the wrapper script 39 | * 40 | * CRET : __CRET__ 41 | * CFNAME : __CFNAME__ 42 | * CPARAMS : __CPARAMS__ 43 | * CARGS : __CARGS__ 44 | * CARGFMT : __CARGFMT__ 45 | * CRETFMT : __CRETFMT__ 46 | * GET_SSIZE : __GET_SSIZE__ 47 | * GET_RSIZE : __GET_RSIZE__ 48 | * GET_RANK : __GET_RANK__ 49 | * GET_BYTES : __GET_BYTES__ 50 | * RETURN_VALUE : __RETURN_VALUE__ 51 | */ 52 | 53 | #ifdef HAVE_DYNLOAD 54 | __CRET__ __CFNAME__(__CPARAMS__) 55 | #else 56 | __CRET__ __wrap___CFNAME__(__CPARAMS__) 57 | #endif 58 | { 59 | static int loaded=0; 60 | static __CRET__ (*__CFNAME___real)(__CPARAMS__); 61 | #if __RETURN_VALUE__ 62 | __CRET__ rv; 63 | #endif 64 | 65 | double tstart, tstop, t; 66 | int oldstate; 67 | int idx, regid; 68 | int ibytes; 69 | IPM_KEY_TYPE key; 70 | 71 | 72 | #ifdef HAVE_DYNLOAD 73 | if(!loaded) { 74 | __CFNAME___real=0; 75 | __CFNAME___real=(__CRET__ (*)(__CPARAMS__)) dlsym(RTLD_NEXT, 76 | "__CFNAME__"); 77 | 78 | if(!dlerror()) loaded=1; 79 | else { 80 | fprintf(stderr, "Error loading __CFNAME__ \n"); 81 | /* handle error */ 82 | } 83 | } 84 | #endif /* HAVE_DYNLOAD */ 85 | 86 | if( ipm_state==STATE_NOTINIT ) { 87 | #ifndef HAVE_MPI 88 | ipm_init(0); 89 | #endif 90 | } 91 | 92 | IPM_TIMESTAMP(tstart); 93 | /* 94 | ipm_region(1, "__CFNAME__"); 95 | */ 96 | 97 | oldstate=ipm_state; 98 | ipm_state=STATE_NOTACTIVE; 99 | /* invoke wrapped function */ 100 | #if __RETURN_VALUE__ 101 | #ifdef HAVE_DYNLOAD 102 | rv=__CFNAME___real(__CARGS__); 103 | #else 104 | rv=__real___CFNAME__(__CARGS__); 105 | #endif 106 | #else 107 | #ifdef HAVE_DYNLOAD 108 | __CFNAME___real(__CARGS__); 109 | #else 110 | __real___CFNAME__(__CARGS__); 111 | #endif 112 | #endif 113 | ipm_state=oldstate; 114 | 115 | if( ipm_state!=STATE_ACTIVE ) { 116 | #if __RETURN_VALUE__ 117 | return rv; 118 | #else 119 | return; 120 | #endif 121 | } 122 | 123 | IPM_TIMESTAMP(tstop); 124 | t=tstop-tstart; 125 | 126 | regid=ipm_rstackptr->id; 127 | __GET_BYTES__(ibytes); 128 | 129 | /* 130 | ipm_region(-1, "__CFNAME__"); 131 | */ 132 | 133 | 134 | /* build the key */ 135 | IPM_CUFFT_KEY(key, __CFID___GLOBAL, 0, ibytes, regid, 0); 136 | 137 | /* update htable */ 138 | IPM_HASH_HKEY(ipm_htable, key, idx); 139 | IPM_HASHTABLE_ADD(idx,t); 140 | 141 | #if __RETURN_VALUE__ 142 | return rv; 143 | #else 144 | return; 145 | #endif 146 | } 147 | 148 | -------------------------------------------------------------------------------- /etc/wrap_cublas_c.c: -------------------------------------------------------------------------------- 1 | 2 | /** HEADER_BEGIN **/ 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ipm_core.h" 11 | #include "ipm_time.h" 12 | #include "hashtable.h" 13 | #include "perfdata.h" 14 | #include "mod_cublas.h" 15 | 16 | #include "cuda.h" 17 | #include "cublas.h" 18 | 19 | #ifdef HAVE_DYNLOAD 20 | #include 21 | #include 22 | #endif /* HAVE_DYNLOAD */ 23 | 24 | #ifdef HAVE_KEYHIST 25 | #include "mod_keyhist.h" 26 | #endif 27 | 28 | #include 29 | 30 | #define MPI3CONST const 31 | 32 | /** HEADER_END **/ 33 | 34 | 35 | __CRET__ __real___CFNAME__(__CPARAMS__); 36 | 37 | 38 | /* ---- wrapping __CFNAME__ ---- */ 39 | /* 40 | * strings in the form __IDENT__ are replaced by the wrapper script 41 | * 42 | * CRET : __CRET__ 43 | * CFNAME : __CFNAME__ 44 | * CPARAMS : __CPARAMS__ 45 | * CARGS : __CARGS__ 46 | * CARGFMT : __CARGFMT__ 47 | * CRETFMT : __CRETFMT__ 48 | * GET_SSIZE : __GET_SSIZE__ 49 | * GET_RSIZE : __GET_RSIZE__ 50 | * GET_RANK : __GET_RANK__ 51 | * GET_BYTES : __GET_BYTES__ 52 | * RETURN_VALUE : __RETURN_VALUE__ 53 | */ 54 | 55 | #ifdef HAVE_DYNLOAD 56 | __CRET__ __CFNAME__(__CPARAMS__) 57 | #else 58 | __CRET__ __wrap___CFNAME__(__CPARAMS__) 59 | #endif 60 | { 61 | static int loaded=0; 62 | static __CRET__ (*__CFNAME___real)(__CPARAMS__); 63 | #if __RETURN_VALUE__ 64 | __CRET__ rv; 65 | #endif 66 | 67 | double tstart, tstop, t; 68 | int oldstate; 69 | int idx, regid; 70 | int ibytes; 71 | IPM_KEY_TYPE key; 72 | 73 | 74 | #ifdef HAVE_DYNLOAD 75 | if(!loaded) { 76 | __CFNAME___real=0; 77 | __CFNAME___real=(__CRET__ (*)(__CPARAMS__)) dlsym(RTLD_NEXT, 78 | "__CFNAME__"); 79 | 80 | if(!dlerror()) loaded=1; 81 | else { 82 | fprintf(stderr, "Error loading __CFNAME__ \n"); 83 | /* handle error */ 84 | } 85 | } 86 | #endif /* HAVE_DYNLOAD */ 87 | 88 | if( ipm_state==STATE_NOTINIT ) { 89 | ipm_init(0); 90 | } 91 | 92 | IPM_TIMESTAMP(tstart); 93 | 94 | #if 0 /*(__CFID__ == CUBLAS_ZGEMM_ID ) */ 95 | #else 96 | oldstate=ipm_state; 97 | ipm_state=STATE_NOTACTIVE; 98 | #endif 99 | 100 | /* invoke wrapped function */ 101 | #if __RETURN_VALUE__ 102 | #ifdef HAVE_DYNLOAD 103 | rv=__CFNAME___real(__CARGS__); 104 | #else 105 | rv=__real___CFNAME__(__CARGS__); 106 | #endif 107 | #else 108 | #ifdef HAVE_DYNLOAD 109 | __CFNAME___real(__CARGS__); 110 | #else 111 | __real___CFNAME__(__CARGS__); 112 | #endif 113 | #endif 114 | 115 | #if 0 /* (__CFID__ == CUBLAS_ZGEMM_ID ) */ 116 | #else 117 | ipm_state=oldstate; 118 | #endif 119 | 120 | if( ipm_state!=STATE_ACTIVE ) { 121 | #if __RETURN_VALUE__ 122 | return rv; 123 | #else 124 | return; 125 | #endif 126 | } 127 | 128 | IPM_TIMESTAMP(tstop); 129 | t=tstop-tstart; 130 | 131 | regid=ipm_rstackptr->id; 132 | __GET_BYTES__(ibytes); 133 | 134 | 135 | /* build the key */ 136 | IPM_CUFFT_KEY(key, __CFID___GLOBAL, 0, ibytes, regid, 0); 137 | 138 | /* update htable */ 139 | IPM_HASH_HKEY(ipm_htable, key, idx); 140 | IPM_HASHTABLE_ADD(idx,t); 141 | 142 | #if __RETURN_VALUE__ 143 | return rv; 144 | #else 145 | return; 146 | #endif 147 | } 148 | 149 | -------------------------------------------------------------------------------- /src/jobdata.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ipm_sizes.h" 13 | #include "md5.h" 14 | 15 | void ipm_get_job_id(char *id, int len) 16 | { 17 | char *s=0; 18 | 19 | s = getenv("PBS_JOBID"); 20 | if(!s) s = getenv("LOADL_STEP_ID"); 21 | if(!s) s = getenv("SLURM_JOBID"); 22 | if(!s) s = getenv("JOB_ID"); 23 | if(!s) s = getenv("LSB_JOBID"); 24 | if(!s) { 25 | strncpy(id, "unknown", len); 26 | } else { 27 | strncpy(id, s, len); 28 | } 29 | 30 | } 31 | 32 | void ipm_get_job_user(char *user, int len) 33 | { 34 | 35 | char *s=0; 36 | s = getenv("USER"); 37 | if(s) { 38 | strncpy(user, s, len); 39 | } else { 40 | strncpy(user, "unknown", len); 41 | } 42 | } 43 | 44 | void ipm_get_job_allocation(char *allocation, int len) 45 | { 46 | char *s=0; 47 | 48 | 49 | if(!s) s = getenv("REPO"); 50 | if(!s) s = getenv("GROUP"); 51 | if(s) { 52 | sprintf(allocation, "%s", s); 53 | } else { 54 | strncpy(allocation, "unknown", len); 55 | } 56 | 57 | } 58 | 59 | 60 | void ipm_get_mach_info(char *machi, int len) 61 | { 62 | char buf[200]; 63 | struct utsname mach_info; 64 | 65 | #ifndef IPM_DISABLE_UNAME 66 | uname(&mach_info); 67 | sprintf(buf, "%s_%s", 68 | mach_info.machine, mach_info.sysname); 69 | strncpy(machi, buf, len); 70 | #else 71 | strncpy(machi, "unknown", len); 72 | #endif 73 | } 74 | 75 | 76 | void ipm_get_mach_name(char *machn, int len) 77 | { 78 | char buf[200]; 79 | struct utsname mach_info; 80 | 81 | #ifndef IPM_DISABLE_UNAME 82 | uname(&mach_info); 83 | sprintf(buf, "%s", mach_info.machine); 84 | strncpy(machn, buf, len); 85 | #else 86 | strncpy(machn, "unknown", len); 87 | #endif 88 | } 89 | 90 | 91 | void ipm_get_exec_cmdline(char *cmdl, char *rpath) 92 | { 93 | int i, ii, fd, rv, blen; 94 | FILE *fh; 95 | char *cp,*pp,*up,cbuf[MAXSIZE_CMDLINE]; 96 | 97 | cmdl[0] = '\0'; 98 | fh = fopen("/proc/self/cmdline","r"); 99 | if(fh) { 100 | /* 101 | rv=fscanf(fh,"%s",cmdl); 102 | printf("rv %d cmdl %s\n", rv, cmdl); 103 | rv=fscanf(fh,"%s",cmdl); 104 | printf("rv %d cmdl %s\n", rv, cmdl); 105 | */ 106 | ii = 0; 107 | fgets(cmdl,MAXSIZE_CMDLINE,fh); 108 | for(i=1;i