├── .gitattributes
├── .gitignore
├── .travis.yml
├── AUTHORS
├── BUILD.md
├── CMakeLists.txt
├── COPYING
├── NOTICE
├── README.md
├── applications
    ├── CMakeLists.txt
    ├── NPB
    │   ├── GRAPPA
    │   │   ├── IS
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── igor_grappa_intsort.rb
    │   │   │   ├── intsort.cpp
    │   │   │   ├── npb_intsort.hpp
    │   │   │   ├── npbparams.h
    │   │   │   ├── randlc.cpp
    │   │   │   └── randlc.hpp
    │   │   ├── common
    │   │   │   ├── c_print_results.c
    │   │   │   ├── c_timers.c
    │   │   │   ├── print_results.f
    │   │   │   ├── randdp.c
    │   │   │   ├── randdp.f
    │   │   │   ├── randdpvec.f
    │   │   │   ├── randi8.f
    │   │   │   ├── randi8_safe.f
    │   │   │   └── timers.f
    │   │   ├── config
    │   │   │   ├── NAS.samples
    │   │   │   │   ├── README
    │   │   │   │   ├── make.def.dec_alpha
    │   │   │   │   ├── make.def.ibm_aix64
    │   │   │   │   ├── make.def.irix6.2
    │   │   │   │   ├── make.def.origin
    │   │   │   │   ├── make.def.pgi_mpich
    │   │   │   │   ├── make.def.sgi_altix
    │   │   │   │   ├── make.def.sgi_powerchallenge
    │   │   │   │   ├── make.def.sp2_babbage
    │   │   │   │   ├── make.def.sun_ultra_sparc
    │   │   │   │   ├── make.def.t3d_cosmos
    │   │   │   │   ├── make.def_sun_mpich
    │   │   │   │   ├── suite.def.bt
    │   │   │   │   ├── suite.def.cg
    │   │   │   │   ├── suite.def.ep
    │   │   │   │   ├── suite.def.ft
    │   │   │   │   ├── suite.def.is
    │   │   │   │   ├── suite.def.lu
    │   │   │   │   ├── suite.def.mg
    │   │   │   │   ├── suite.def.small
    │   │   │   │   └── suite.def.sp
    │   │   │   ├── make.def
    │   │   │   ├── make.def.template
    │   │   │   ├── make.dummy
    │   │   │   ├── suite.def
    │   │   │   └── suite.def.template
    │   │   └── sys
    │   │   │   ├── .gitignore
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── make.common
    │   │   │   ├── print_header
    │   │   │   ├── print_instructions
    │   │   │   ├── setparams.c
    │   │   │   └── suite.awk
    │   ├── MPI
    │   │   ├── .gitignore
    │   │   ├── BT
    │   │   │   ├── Makefile
    │   │   │   ├── add.f
    │   │   │   ├── adi.f
    │   │   │   ├── bt.f
    │   │   │   ├── btio.f
    │   │   │   ├── btio_common.f
    │   │   │   ├── copy_faces.f
    │   │   │   ├── define.f
    │   │   │   ├── epio.f
    │   │   │   ├── error.f
    │   │   │   ├── exact_rhs.f
    │   │   │   ├── exact_solution.f
    │   │   │   ├── fortran_io.f
    │   │   │   ├── full_mpiio.f
    │   │   │   ├── header.h
    │   │   │   ├── initialize.f
    │   │   │   ├── inputbt.data.sample
    │   │   │   ├── make_set.f
    │   │   │   ├── mpinpb.h
    │   │   │   ├── rhs.f
    │   │   │   ├── set_constants.f
    │   │   │   ├── setup_mpi.f
    │   │   │   ├── simple_mpiio.f
    │   │   │   ├── solve_subs.f
    │   │   │   ├── verify.f
    │   │   │   ├── work_lhs.h
    │   │   │   ├── work_lhs_vec.h
    │   │   │   ├── x_solve.f
    │   │   │   ├── x_solve_vec.f
    │   │   │   ├── y_solve.f
    │   │   │   ├── y_solve_vec.f
    │   │   │   ├── z_solve.f
    │   │   │   └── z_solve_vec.f
    │   │   ├── CG
    │   │   │   ├── Makefile
    │   │   │   ├── cg.f
    │   │   │   ├── mpinpb.h
    │   │   │   ├── runexps.rb
    │   │   │   └── timing.h
    │   │   ├── DT
    │   │   │   ├── DGraph.c
    │   │   │   ├── DGraph.h
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   └── dt.c
    │   │   ├── EP
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── ep.f
    │   │   │   └── mpinpb.h
    │   │   ├── FT
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── ft.f
    │   │   │   ├── global.h
    │   │   │   ├── inputft.data.sample
    │   │   │   └── mpinpb.h
    │   │   ├── IS
    │   │   │   ├── Makefile
    │   │   │   ├── is.c
    │   │   │   └── npbparams.h
    │   │   ├── LU
    │   │   │   ├── Makefile
    │   │   │   ├── applu.incl
    │   │   │   ├── bcast_inputs.f
    │   │   │   ├── blts.f
    │   │   │   ├── blts_vec.f
    │   │   │   ├── buts.f
    │   │   │   ├── buts_vec.f
    │   │   │   ├── erhs.f
    │   │   │   ├── error.f
    │   │   │   ├── exact.f
    │   │   │   ├── exchange_1.f
    │   │   │   ├── exchange_3.f
    │   │   │   ├── exchange_4.f
    │   │   │   ├── exchange_5.f
    │   │   │   ├── exchange_6.f
    │   │   │   ├── init_comm.f
    │   │   │   ├── inputlu.data.sample
    │   │   │   ├── jacld.f
    │   │   │   ├── jacu.f
    │   │   │   ├── l2norm.f
    │   │   │   ├── lu.f
    │   │   │   ├── mpinpb.h
    │   │   │   ├── neighbors.f
    │   │   │   ├── nodedim.f
    │   │   │   ├── pintgr.f
    │   │   │   ├── proc_grid.f
    │   │   │   ├── read_input.f
    │   │   │   ├── rhs.f
    │   │   │   ├── setbv.f
    │   │   │   ├── setcoeff.f
    │   │   │   ├── sethyper.f
    │   │   │   ├── setiv.f
    │   │   │   ├── ssor.f
    │   │   │   ├── subdomain.f
    │   │   │   ├── timing.h
    │   │   │   └── verify.f
    │   │   ├── MG
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── globals.h
    │   │   │   ├── mg.f
    │   │   │   ├── mg.input.sample
    │   │   │   └── mpinpb.h
    │   │   ├── MPI_dummy
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── mpi.h
    │   │   │   ├── mpi_dummy.c
    │   │   │   ├── mpi_dummy.f
    │   │   │   ├── mpif.h
    │   │   │   ├── test.f
    │   │   │   ├── wtime.c
    │   │   │   ├── wtime.f
    │   │   │   ├── wtime.h
    │   │   │   └── wtime_sgi64.c
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── README.install
    │   │   ├── SP
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── add.f
    │   │   │   ├── adi.f
    │   │   │   ├── copy_faces.f
    │   │   │   ├── define.f
    │   │   │   ├── error.f
    │   │   │   ├── exact_rhs.f
    │   │   │   ├── exact_solution.f
    │   │   │   ├── header.h
    │   │   │   ├── initialize.f
    │   │   │   ├── inputsp.data.sample
    │   │   │   ├── lhsx.f
    │   │   │   ├── lhsy.f
    │   │   │   ├── lhsz.f
    │   │   │   ├── make_set.f
    │   │   │   ├── mpinpb.h
    │   │   │   ├── ninvr.f
    │   │   │   ├── pinvr.f
    │   │   │   ├── rhs.f
    │   │   │   ├── set_constants.f
    │   │   │   ├── setup_mpi.f
    │   │   │   ├── sp.f
    │   │   │   ├── txinvr.f
    │   │   │   ├── tzetar.f
    │   │   │   ├── verify.f
    │   │   │   ├── x_solve.f
    │   │   │   ├── y_solve.f
    │   │   │   └── z_solve.f
    │   │   ├── common
    │   │   │   ├── c_print_results.c
    │   │   │   ├── c_timers.c
    │   │   │   ├── print_results.f
    │   │   │   ├── randdp.c
    │   │   │   ├── randdp.f
    │   │   │   ├── randdpvec.f
    │   │   │   ├── randi8.f
    │   │   │   ├── randi8_safe.f
    │   │   │   └── timers.f
    │   │   ├── config
    │   │   │   ├── NAS.samples
    │   │   │   │   ├── README
    │   │   │   │   ├── make.def.dec_alpha
    │   │   │   │   ├── make.def.ibm_aix64
    │   │   │   │   ├── make.def.irix6.2
    │   │   │   │   ├── make.def.origin
    │   │   │   │   ├── make.def.pgi_mpich
    │   │   │   │   ├── make.def.sgi_altix
    │   │   │   │   ├── make.def.sgi_powerchallenge
    │   │   │   │   ├── make.def.sp2_babbage
    │   │   │   │   ├── make.def.sun_ultra_sparc
    │   │   │   │   ├── make.def.t3d_cosmos
    │   │   │   │   ├── make.def_sun_mpich
    │   │   │   │   ├── suite.def.bt
    │   │   │   │   ├── suite.def.cg
    │   │   │   │   ├── suite.def.ep
    │   │   │   │   ├── suite.def.ft
    │   │   │   │   ├── suite.def.is
    │   │   │   │   ├── suite.def.lu
    │   │   │   │   ├── suite.def.mg
    │   │   │   │   ├── suite.def.small
    │   │   │   │   └── suite.def.sp
    │   │   │   ├── make.def
    │   │   │   ├── make.def.template
    │   │   │   ├── make.dummy
    │   │   │   ├── suite.def
    │   │   │   └── suite.def.template
    │   │   ├── igor_mpi_intsort.rb
    │   │   ├── sort.rb
    │   │   └── sys
    │   │   │   ├── .gitignore
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── make.common
    │   │   │   ├── print_header
    │   │   │   ├── print_instructions
    │   │   │   ├── setparams.c
    │   │   │   └── suite.awk
    │   ├── NPB3.3-HPF.README
    │   ├── NPB3.3-JAV.README
    │   ├── OMP
    │   │   ├── BT
    │   │   │   ├── Makefile
    │   │   │   ├── add.f
    │   │   │   ├── adi.f
    │   │   │   ├── bt.f
    │   │   │   ├── error.f
    │   │   │   ├── exact_rhs.f
    │   │   │   ├── exact_solution.f
    │   │   │   ├── header.h
    │   │   │   ├── initialize.f
    │   │   │   ├── inputbt.data.sample
    │   │   │   ├── rhs.f
    │   │   │   ├── set_constants.f
    │   │   │   ├── solve_subs.f
    │   │   │   ├── verify.f
    │   │   │   ├── work_lhs.h
    │   │   │   ├── work_lhs_vec.h
    │   │   │   ├── x_solve.f
    │   │   │   ├── x_solve_vec.f
    │   │   │   ├── y_solve.f
    │   │   │   ├── y_solve_vec.f
    │   │   │   ├── z_solve.f
    │   │   │   └── z_solve_vec.f
    │   │   ├── CG
    │   │   │   ├── Makefile
    │   │   │   ├── README.carefully
    │   │   │   ├── cg.f
    │   │   │   ├── globals.h
    │   │   │   └── runexps.rb
    │   │   ├── DC
    │   │   │   ├── ADC.par
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── adc.c
    │   │   │   ├── adc.h
    │   │   │   ├── adcc.h
    │   │   │   ├── dc.c
    │   │   │   ├── extbuild.c
    │   │   │   ├── jobcntl.c
    │   │   │   ├── macrodef.h
    │   │   │   ├── protots.h
    │   │   │   ├── rbt.c
    │   │   │   └── rbt.h
    │   │   ├── EP
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   └── ep.f
    │   │   ├── FT
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── ft.f
    │   │   │   ├── global.h
    │   │   │   └── inputft.data.sample
    │   │   ├── IS
    │   │   │   ├── Makefile
    │   │   │   ├── README.carefully
    │   │   │   └── is.c
    │   │   ├── LU
    │   │   │   ├── Makefile
    │   │   │   ├── applu.incl
    │   │   │   ├── blts.f
    │   │   │   ├── blts_vec.f
    │   │   │   ├── buts.f
    │   │   │   ├── buts_vec.f
    │   │   │   ├── domain.f
    │   │   │   ├── erhs.f
    │   │   │   ├── error.f
    │   │   │   ├── exact.f
    │   │   │   ├── inputlu.data.sample
    │   │   │   ├── jacld.f
    │   │   │   ├── jacu.f
    │   │   │   ├── l2norm.f
    │   │   │   ├── lu.f
    │   │   │   ├── pintgr.f
    │   │   │   ├── read_input.f
    │   │   │   ├── rhs.f
    │   │   │   ├── rhs_vec.f
    │   │   │   ├── setbv.f
    │   │   │   ├── setcoeff.f
    │   │   │   ├── setiv.f
    │   │   │   ├── ssor.f
    │   │   │   ├── ssor_vec.f
    │   │   │   ├── syncs.f
    │   │   │   └── verify.f
    │   │   ├── MG
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── globals.h
    │   │   │   ├── mg.f
    │   │   │   └── mg.input.sample
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── README.install
    │   │   ├── SP
    │   │   │   ├── Makefile
    │   │   │   ├── add.f
    │   │   │   ├── adi.f
    │   │   │   ├── error.f
    │   │   │   ├── exact_rhs.f
    │   │   │   ├── exact_solution.f
    │   │   │   ├── header.h
    │   │   │   ├── initialize.f
    │   │   │   ├── inputsp.data.sample
    │   │   │   ├── ninvr.f
    │   │   │   ├── pinvr.f
    │   │   │   ├── rhs.f
    │   │   │   ├── set_constants.f
    │   │   │   ├── sp.f
    │   │   │   ├── txinvr.f
    │   │   │   ├── tzetar.f
    │   │   │   ├── verify.f
    │   │   │   ├── x_solve.f
    │   │   │   ├── y_solve.f
    │   │   │   └── z_solve.f
    │   │   ├── UA
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── adapt.f
    │   │   │   ├── convect.f
    │   │   │   ├── diffuse.f
    │   │   │   ├── header.h
    │   │   │   ├── mason.f
    │   │   │   ├── move.f
    │   │   │   ├── precond.f
    │   │   │   ├── setup.f
    │   │   │   ├── transfer.f
    │   │   │   ├── transfer_au.f
    │   │   │   ├── ua.f
    │   │   │   ├── utils.f
    │   │   │   └── verify.f
    │   │   ├── common
    │   │   │   ├── c_print_results.c
    │   │   │   ├── c_timers.c
    │   │   │   ├── print_results.f
    │   │   │   ├── randdp.f
    │   │   │   ├── randdpvec.f
    │   │   │   ├── randi8.f
    │   │   │   ├── randi8_safe.f
    │   │   │   ├── timers.f
    │   │   │   ├── wtime.c
    │   │   │   ├── wtime.h
    │   │   │   └── wtime_sgi64.c
    │   │   ├── config
    │   │   │   ├── NAS.samples
    │   │   │   │   ├── README
    │   │   │   │   ├── make.def.gcc_x86
    │   │   │   │   ├── make.def_ia64
    │   │   │   │   ├── make.def_ibm
    │   │   │   │   ├── make.def_ibm64
    │   │   │   │   ├── make.def_intel
    │   │   │   │   ├── make.def_omni
    │   │   │   │   ├── make.def_pgi
    │   │   │   │   ├── make.def_sgi
    │   │   │   │   ├── make.def_sgi64
    │   │   │   │   ├── make.def_sun
    │   │   │   │   ├── make.def_sun64
    │   │   │   │   ├── suite.def.bt
    │   │   │   │   ├── suite.def.cg
    │   │   │   │   ├── suite.def.ep
    │   │   │   │   ├── suite.def.ft
    │   │   │   │   ├── suite.def.is
    │   │   │   │   ├── suite.def.lu
    │   │   │   │   ├── suite.def.mg
    │   │   │   │   └── suite.def.sp
    │   │   │   ├── make.def.template
    │   │   │   └── suite.def.template
    │   │   └── sys
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── make.common
    │   │   │   ├── print_header
    │   │   │   ├── print_instructions
    │   │   │   ├── setparams.c
    │   │   │   └── suite.awk
    │   ├── README
    │   └── SERIAL
    │   │   ├── BT
    │   │       ├── Makefile
    │   │       ├── add.f
    │   │       ├── adi.f
    │   │       ├── bt.f
    │   │       ├── error.f
    │   │       ├── exact_rhs.f
    │   │       ├── exact_solution.f
    │   │       ├── header.h
    │   │       ├── initialize.f
    │   │       ├── inputbt.data.sample
    │   │       ├── rhs.f
    │   │       ├── set_constants.f
    │   │       ├── solve_subs.f
    │   │       ├── verify.f
    │   │       ├── work_lhs.h
    │   │       ├── work_lhs_vec.h
    │   │       ├── x_solve.f
    │   │       ├── x_solve_vec.f
    │   │       ├── y_solve.f
    │   │       ├── y_solve_vec.f
    │   │       ├── z_solve.f
    │   │       └── z_solve_vec.f
    │   │   ├── CG
    │   │       ├── Makefile
    │   │       ├── README.carefully
    │   │       ├── cg.f
    │   │       └── globals.h
    │   │   ├── DC
    │   │       ├── ADC.par
    │   │       ├── Makefile
    │   │       ├── README
    │   │       ├── adc.c
    │   │       ├── adc.h
    │   │       ├── adcc.h
    │   │       ├── dc.c
    │   │       ├── extbuild.c
    │   │       ├── jobcntl.c
    │   │       ├── macrodef.h
    │   │       ├── protots.h
    │   │       ├── rbt.c
    │   │       └── rbt.h
    │   │   ├── EP
    │   │       ├── Makefile
    │   │       ├── README
    │   │       └── ep.f
    │   │   ├── FT
    │   │       ├── Makefile
    │   │       ├── appft.f
    │   │       ├── auxfnct.f
    │   │       ├── fft3d.f
    │   │       ├── global.h
    │   │       ├── mainft.f
    │   │       └── verify.f
    │   │   ├── IS
    │   │       ├── Makefile
    │   │       ├── README.carefully
    │   │       └── is.c
    │   │   ├── LU
    │   │       ├── Makefile
    │   │       ├── applu.incl
    │   │       ├── blts.f
    │   │       ├── blts_vec.f
    │   │       ├── buts.f
    │   │       ├── buts_vec.f
    │   │       ├── domain.f
    │   │       ├── erhs.f
    │   │       ├── error.f
    │   │       ├── exact.f
    │   │       ├── inputlu.data.sample
    │   │       ├── jacld.f
    │   │       ├── jacu.f
    │   │       ├── l2norm.f
    │   │       ├── lu.f
    │   │       ├── pintgr.f
    │   │       ├── read_input.f
    │   │       ├── rhs.f
    │   │       ├── rhs_vec.f
    │   │       ├── setbv.f
    │   │       ├── setcoeff.f
    │   │       ├── setiv.f
    │   │       ├── ssor.f
    │   │       ├── ssor_vec.f
    │   │       └── verify.f
    │   │   ├── MG
    │   │       ├── Makefile
    │   │       ├── README
    │   │       ├── globals.h
    │   │       ├── mg.f
    │   │       └── mg.input.sample
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── README.install
    │   │   ├── SP
    │   │       ├── Makefile
    │   │       ├── add.f
    │   │       ├── adi.f
    │   │       ├── error.f
    │   │       ├── exact_rhs.f
    │   │       ├── exact_solution.f
    │   │       ├── header.h
    │   │       ├── initialize.f
    │   │       ├── inputsp.data.sample
    │   │       ├── ninvr.f
    │   │       ├── pinvr.f
    │   │       ├── rhs.f
    │   │       ├── set_constants.f
    │   │       ├── sp.f
    │   │       ├── txinvr.f
    │   │       ├── tzetar.f
    │   │       ├── verify.f
    │   │       ├── x_solve.f
    │   │       ├── y_solve.f
    │   │       └── z_solve.f
    │   │   ├── UA
    │   │       ├── Makefile
    │   │       ├── README
    │   │       ├── adapt.f
    │   │       ├── convect.f
    │   │       ├── diffuse.f
    │   │       ├── header.h
    │   │       ├── mason.f
    │   │       ├── move.f
    │   │       ├── precond.f
    │   │       ├── setup.f
    │   │       ├── transfer.f
    │   │       ├── ua.f
    │   │       ├── utils.f
    │   │       └── verify.f
    │   │   ├── common
    │   │       ├── c_print_results.c
    │   │       ├── c_timers.c
    │   │       ├── print_results.f
    │   │       ├── randdp.f
    │   │       ├── randdpvec.f
    │   │       ├── randi8.f
    │   │       ├── randi8_safe.f
    │   │       ├── timers.f
    │   │       ├── wtime.c
    │   │       ├── wtime.h
    │   │       └── wtime_sgi64.c
    │   │   ├── config
    │   │       ├── NAS.samples
    │   │       │   ├── README
    │   │       │   ├── make.def_crayx1
    │   │       │   ├── make.def_gcc_x86
    │   │       │   ├── make.def_ia64
    │   │       │   ├── make.def_ibm
    │   │       │   ├── make.def_ibm64
    │   │       │   ├── make.def_intel
    │   │       │   ├── make.def_pgi
    │   │       │   ├── make.def_sgi
    │   │       │   ├── make.def_sgi64
    │   │       │   ├── make.def_sun
    │   │       │   ├── make.def_sun64
    │   │       │   ├── suite.def.bt
    │   │       │   ├── suite.def.cg
    │   │       │   ├── suite.def.ep
    │   │       │   ├── suite.def.ft
    │   │       │   ├── suite.def.is
    │   │       │   ├── suite.def.lu
    │   │       │   ├── suite.def.mg
    │   │       │   └── suite.def.sp
    │   │       ├── make.def.template
    │   │       └── suite.def.template
    │   │   └── sys
    │   │       ├── Makefile
    │   │       ├── README
    │   │       ├── make.common
    │   │       ├── print_header
    │   │       ├── print_instructions
    │   │       ├── setparams.c
    │   │       └── suite.awk
    ├── demos
    │   ├── CMakeLists.txt
    │   ├── gups
    │   │   ├── gups-lcg.cpp
    │   │   ├── gups.cpp
    │   │   ├── gups1.cpp
    │   │   ├── gups2.cpp
    │   │   ├── gups3.cpp
    │   │   └── gups4.cpp
    │   ├── hello_world
    │   │   └── hello_world.cpp
    │   ├── nqueens
    │   │   └── nqueens.cpp
    │   ├── standalone
    │   │   ├── Makefile
    │   │   └── standalone.cpp
    │   └── tree_search
    │   │   └── tree_search.cpp
    ├── graph500
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── COPYING
    │   ├── Graph500.html
    │   ├── Graph500.org
    │   ├── Makefile
    │   ├── README
    │   ├── README-Grappa.md
    │   ├── compat.h
    │   ├── compatio.h
    │   ├── generator
    │   │   ├── CMakeLists.txt
    │   │   ├── LICENSE_1_0.txt
    │   │   ├── Makefile.grappa
    │   │   ├── Makefile.mpi
    │   │   ├── Makefile.omp
    │   │   ├── Makefile.seq
    │   │   ├── Makefile.xmt
    │   │   ├── README
    │   │   ├── generator_test_mpi.c
    │   │   ├── generator_test_omp.c
    │   │   ├── generator_test_seq.c
    │   │   ├── generator_test_xmt.c
    │   │   ├── graph_generator.c
    │   │   ├── graph_generator.h
    │   │   ├── graph_generator.xmt.c
    │   │   ├── make_graph.c
    │   │   ├── make_graph.h
    │   │   ├── make_graph.xmt.c
    │   │   ├── mod_arith.h
    │   │   ├── mod_arith_32bit.h
    │   │   ├── mod_arith_64bit.h
    │   │   ├── mod_arith_xmt.h
    │   │   ├── mrg_transitions.c
    │   │   ├── splittable_mrg.c
    │   │   ├── splittable_mrg.h
    │   │   ├── user_settings.h
    │   │   ├── utils.c
    │   │   └── utils.h
    │   ├── graph500.c
    │   ├── graph500.h
    │   ├── grappa
    │   │   ├── .gitignore
    │   │   ├── README
    │   │   ├── asciize.rb
    │   │   ├── beamer.rb
    │   │   ├── beamer2.rb
    │   │   ├── beamer_alg.md
    │   │   ├── bfs.rb
    │   │   ├── common.h
    │   │   ├── graph.cpp
    │   │   ├── graph.hpp
    │   │   ├── oned_csr.cpp
    │   │   ├── oned_csr.h
    │   │   ├── options.cpp
    │   │   ├── options.h
    │   │   ├── test.rb
    │   │   ├── timer.h
    │   │   ├── trace.rb
    │   │   └── vampir.rb
    │   ├── kronecker.c
    │   ├── kronecker.h
    │   ├── make-edgelist.c
    │   ├── make-incs
    │   │   ├── make.inc-gcc
    │   │   ├── make.inc-osx
    │   │   └── make.inc-xmt
    │   ├── mpi
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── bfs_custom.c
    │   │   ├── bfs_one_sided.c
    │   │   ├── bfs_replicated.c
    │   │   ├── bfs_replicated_csc.c
    │   │   ├── bfs_simple.c
    │   │   ├── common.h
    │   │   ├── igor_mpi_bfs.rb
    │   │   ├── main.c
    │   │   ├── mpi_workarounds.h
    │   │   ├── oned_csc.c
    │   │   ├── oned_csc.h
    │   │   ├── oned_csr.c
    │   │   ├── oned_csr.h
    │   │   ├── onesided.c
    │   │   ├── onesided.h
    │   │   ├── onesided_emul.c
    │   │   ├── redistribute.h
    │   │   ├── utils.c
    │   │   └── validate.c
    │   ├── octave
    │   │   ├── Graph500.m
    │   │   ├── kernel_1.m
    │   │   ├── kernel_2.m
    │   │   ├── kronecker_generator.m
    │   │   ├── output.m
    │   │   └── validate.m
    │   ├── omp-csr
    │   │   └── omp-csr.c
    │   ├── options.c
    │   ├── options.h
    │   ├── prng.c
    │   ├── prng.h
    │   ├── rmat.c
    │   ├── rmat.h
    │   ├── run_exps.rb
    │   ├── seq-csr
    │   │   └── seq-csr.c
    │   ├── seq-list
    │   │   └── seq-list.c
    │   ├── timer.c
    │   ├── timer.h
    │   ├── verify.c
    │   ├── verify.h
    │   ├── xalloc.c
    │   ├── xalloc.h
    │   ├── xmt-csr-local
    │   │   └── xmt-csr-local.c
    │   └── xmt-csr
    │   │   ├── xmt-csr.c
    │   │   └── xmt-csr.xmt.c
    ├── graphlab
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── bfs.cpp
    │   ├── cc.cpp
    │   ├── graphlab.cpp
    │   ├── graphlab.hpp
    │   ├── graphlab_borrowed.hpp
    │   ├── graphlab_naive.hpp
    │   ├── graphlab_splitv.hpp
    │   ├── igor_graphlab_bfs.rb
    │   ├── igor_graphlab_pagerank.rb
    │   ├── igor_graphlab_sssp.rb
    │   ├── pagerank.cpp
    │   ├── pagerank_new.cpp
    │   ├── sssp.cpp
    │   └── test.cpp
    ├── isopath
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── compat.h
    │   ├── compatio.h
    │   ├── generator
    │   │   ├── LICENSE_1_0.txt
    │   │   ├── Makefile.grappa
    │   │   ├── Makefile.mpi
    │   │   ├── Makefile.omp
    │   │   ├── Makefile.seq
    │   │   ├── Makefile.xmt
    │   │   ├── README
    │   │   ├── generator_test_mpi.c
    │   │   ├── generator_test_omp.c
    │   │   ├── generator_test_seq.c
    │   │   ├── generator_test_xmt.c
    │   │   ├── graph_generator.c
    │   │   ├── graph_generator.h
    │   │   ├── graph_generator.xmt.c
    │   │   ├── make_graph.c
    │   │   ├── make_graph.h
    │   │   ├── make_graph.xmt.c
    │   │   ├── mod_arith.h
    │   │   ├── mod_arith_32bit.h
    │   │   ├── mod_arith_64bit.h
    │   │   ├── mod_arith_xmt.h
    │   │   ├── mrg_transitions.c
    │   │   ├── splittable_mrg.c
    │   │   ├── splittable_mrg.h
    │   │   ├── user_settings.h
    │   │   ├── utils.c
    │   │   └── utils.h
    │   ├── grappa
    │   │   ├── .gitignore
    │   │   ├── CMakeLists.txt
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── common.h
    │   │   ├── graph.cpp
    │   │   ├── graph.hpp
    │   │   ├── isopath.cpp
    │   │   ├── oned_csr.cpp
    │   │   ├── oned_csr.h
    │   │   ├── options.cpp
    │   │   ├── options.h
    │   │   ├── simple_graphs.cpp
    │   │   ├── simple_graphs.hpp
    │   │   └── timer.h
    │   ├── options.c
    │   ├── options.h
    │   ├── prng.c
    │   ├── prng.h
    │   ├── timer.c
    │   ├── timer.h
    │   ├── verify.c
    │   ├── verify.h
    │   ├── xalloc.c
    │   └── xalloc.h
    ├── join
    │   ├── .gitignore
    │   ├── Aggregates.hpp
    │   ├── CMakeLists.txt
    │   ├── DHT.hpp
    │   ├── DHT_old.hpp
    │   ├── DHT_symmetric.hpp
    │   ├── DoubleDHT.hpp
    │   ├── DoubleDHT_test.cpp
    │   ├── HashJoin.cpp
    │   ├── HashJoin.hpp
    │   ├── HashJoin_tests.cpp
    │   ├── HashSet.hpp
    │   ├── Hypercube.cpp
    │   ├── Hypercube.hpp
    │   ├── Hypercube_tests.cpp
    │   ├── KMeansMR.cpp
    │   ├── Local_graph_tests.cpp
    │   ├── MapReduce.cpp
    │   ├── MapReduce.hpp
    │   ├── MapReduce_tests.cpp
    │   ├── MatchesDHT.cpp
    │   ├── MatchesDHT.hpp
    │   ├── Query.cpp
    │   ├── Query.hpp
    │   ├── Relation_io_tests.cpp
    │   ├── Tuple.cpp
    │   ├── Tuple.hpp
    │   ├── convert2bin.cpp
    │   ├── double.txt
    │   ├── extract_timestamps.sh
    │   ├── hex_tri.soln.txt
    │   ├── hex_tri.txt
    │   ├── igor_grappa_baseline.rb
    │   ├── igor_grappa_sp2bench.rb
    │   ├── igor_grappa_squares_partition.rb
    │   ├── igor_grappa_triangles.rb
    │   ├── igor_grappa_twohop.rb
    │   ├── igor_interact_kmeans.rb
    │   ├── igor_interact_sp2bench.rb
    │   ├── igor_kmeans.rb
    │   ├── join.cpp
    │   ├── local_graph.cpp
    │   ├── local_graph.hpp
    │   ├── overlapping.txt
    │   ├── relation.hpp
    │   ├── relation_io.cpp
    │   ├── relation_io.hpp
    │   ├── scripts
    │   │   ├── activenodes.sh
    │   │   ├── forall.sh
    │   │   ├── getcolumn.sh
    │   │   ├── nodes_nested2names.sh
    │   │   └── pidlist.sh
    │   ├── single.txt
    │   ├── small_tri.soln.txt
    │   ├── small_tri.txt
    │   ├── sp2b.100mb.sh
    │   ├── sp2b.100t.sh
    │   ├── sp2b.1gb.sh
    │   ├── squares.cpp
    │   ├── squares.hpp
    │   ├── squares_bushy.cpp
    │   ├── squares_bushy.hpp
    │   ├── squares_partition.cpp
    │   ├── squares_partition.hpp
    │   ├── squares_partition_bushy.cpp
    │   ├── squares_partition_bushy.hpp
    │   ├── stats.cpp
    │   ├── stats.h
    │   ├── triangles.OldDHT.cpp
    │   ├── triangles.cpp
    │   ├── triangles.sql
    │   ├── triangles_partition.cpp
    │   ├── twohop.cpp
    │   ├── utility.cpp
    │   └── utility.hpp
    ├── nativegraph
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── bfs
    │   │   ├── CMakeLists.txt
    │   │   ├── bfs_beamer.cpp
    │   │   ├── bfs_queues.cpp
    │   │   ├── bfs_spmd.cpp
    │   │   ├── common.hpp
    │   │   ├── igor_bfs.rb
    │   │   └── main.cpp
    │   ├── cc
    │   │   ├── CMakeLists.txt
    │   │   ├── cc_kahan.hpp
    │   │   ├── igor_cc.rb
    │   │   └── main.cpp
    │   ├── sssp
    │   │   ├── CMakeLists.txt
    │   │   ├── sssp.cpp
    │   │   └── sssp.hpp
    │   └── verifier.hpp
    ├── pagerank
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── igor_grappa_pagerank.rb
    │   ├── mult_main.cpp
    │   ├── pagerank.R
    │   ├── pagerank.cpp
    │   ├── runexps.rb
    │   ├── runpr.rb
    │   ├── single.rb
    │   ├── spmv_mult.cpp
    │   └── spmv_mult.hpp
    ├── sort
    │   ├── CMakeLists.txt
    │   └── grappa
    │   │   ├── CMakeLists.txt
    │   │   ├── main.cpp
    │   │   ├── npb_intsort.h
    │   │   ├── sort.hpp
    │   │   ├── sort.rb
    │   │   ├── sort_test.cpp
    │   │   └── test.rb
    ├── util
    │   ├── CMakeLists.txt
    │   ├── convert
    │   │   └── convert.cpp
    │   └── otf2sqlite.cpp
    └── uts
    │   ├── .gitignore
    │   ├── AUTHORS
    │   ├── Changelog
    │   ├── LICENSE
    │   ├── Makefile.uts
    │   ├── README
    │   ├── README-Grappa.md
    │   ├── check_ctrk.pl
    │   ├── config
    │       ├── cray-mta
    │       ├── cray-x1
    │       ├── cray-xmt
    │       ├── cray-xt
    │       ├── linux-c99
    │       ├── linux-cluster
    │       ├── linux-workstation
    │       └── sgi-altix
    │   ├── configure.sh
    │   ├── ctrk.h
    │   ├── dequeue.c
    │   ├── dequeue.h
    │   ├── dlist.c
    │   ├── dlist.h
    │   ├── igor_grappa_uts.rb
    │   ├── mpi_worksharing.c
    │   ├── mpi_workstealing.c
    │   ├── rng
    │       ├── alfg.c
    │       ├── alfg.h
    │       ├── brg_endian.h
    │       ├── brg_sha1.c
    │       ├── brg_sha1.h
    │       ├── brg_types.h
    │       └── rng.h
    │   ├── runuts.rb
    │   ├── sample_trees.csh
    │   ├── sample_trees.sh
    │   ├── shared_dequeue.c
    │   ├── shared_dequeue.h
    │   ├── shared_dlist.c
    │   ├── shared_dlist.h
    │   ├── stats.c
    │   ├── time_poll.c
    │   ├── time_rng.c
    │   ├── upc_worksharing.c
    │   ├── uts.c
    │   ├── uts.h
    │   ├── uts_dfs.c
    │   ├── uts_dfs_review.c
    │   ├── uts_dm.c
    │   ├── uts_dm.h
    │   ├── uts_grappa.cpp
    │   ├── uts_shm.c
    │   └── uts_upc_enhanced.c
├── bin
    ├── CMakeLists.txt
    ├── distcc_make
    ├── distcc_ninja
    ├── grappa_run
    ├── grappa_srun
    ├── launch_distcc.sh
    ├── settings.sh
    ├── srun_epilog.sh
    └── srun_prolog.rb
├── configure
├── doc
    ├── CMakeLists.txt
    ├── debugging.md
    ├── running.md
    ├── testing.md
    └── tutorial
    │   ├── CMakeLists.txt
    │   ├── addressing_linear.cpp
    │   ├── addressing_symmetric.cpp
    │   ├── delegates.cpp
    │   ├── hello_world_1.cpp
    │   ├── hello_world_2.cpp
    │   ├── search1.cpp
    │   ├── search2.cpp
    │   ├── tree.hpp
    │   └── tutorial.md
├── scratch
    └── CMakeLists.txt
├── system
    ├── Addressing.hpp
    ├── Addressing_tests.cpp
    ├── Aggregator.cpp
    ├── Aggregator.hpp
    ├── Aggregator_tests.cpp
    ├── Allocator.cpp
    ├── Allocator.hpp
    ├── Allocator_tests.cpp
    ├── Array.hpp
    ├── Array_tests.cpp
    ├── AsyncDelegate.cpp
    ├── AsyncDelegate.hpp
    ├── Barrier.cpp
    ├── Barrier.hpp
    ├── BufferVector.hpp
    ├── BufferVector_tests.cpp
    ├── CMakeLists.txt
    ├── Cache.cpp
    ├── Cache.hpp
    ├── Cache_tests.cpp
    ├── CallbackMetric.cpp
    ├── CallbackMetric.hpp
    ├── CallbackMetricImpl.hpp
    ├── ChunkAllocator.cpp
    ├── ChunkAllocator.hpp
    ├── Collective.cpp
    ├── Collective.hpp
    ├── Collective_tests.cpp
    ├── Communicator.cpp
    ├── Communicator.hpp
    ├── CommunicatorImpl.hpp
    ├── Communicator_tests.cpp
    ├── CompletionEvent.hpp
    ├── CompletionEvent_tests.cpp
    ├── ConditionVariable.hpp
    ├── ConditionVariableLocal.hpp
    ├── ContextSwitchLatency_tests.cpp
    ├── ContextSwitchRate_bench.cpp
    ├── CountingSemaphoreLocal.hpp
    ├── Delegate.cpp
    ├── Delegate.hpp
    ├── DelegateBase.hpp
    ├── Delegate_tests.cpp
    ├── Doxyfile.in
    ├── ExternalCountPayloadMessage.hpp
    ├── FileIO.cpp
    ├── FileIO.hpp
    ├── FileIO_tests.cpp
    ├── FlatCombiner.cpp
    ├── FlatCombiner.hpp
    ├── FlatCombiner_tests.cpp
    ├── FullEmpty.hpp
    ├── FullEmptyLocal.hpp
    ├── FullEmpty_tests.cpp
    ├── GlobalAllocator.cpp
    ├── GlobalAllocator.hpp
    ├── GlobalAllocator_tests.cpp
    ├── GlobalBag.hpp
    ├── GlobalCompletionEvent.cpp
    ├── GlobalCompletionEvent.hpp
    ├── GlobalCounter.hpp
    ├── GlobalHashMap.cpp
    ├── GlobalHashMap.hpp
    ├── GlobalHashSet.cpp
    ├── GlobalHashSet.hpp
    ├── GlobalHash_tests.cpp
    ├── GlobalMemory.cpp
    ├── GlobalMemory.hpp
    ├── GlobalMemoryChunk.cpp
    ├── GlobalMemoryChunk.hpp
    ├── GlobalMemoryChunk_tests.cpp
    ├── GlobalMemory_tests.cpp
    ├── GlobalVector.cpp
    ├── GlobalVector.hpp
    ├── GlobalVector_tests.cpp
    ├── Grappa.cpp
    ├── Grappa.hpp
    ├── Grappa.md
    ├── Gups_tests.cpp
    ├── HistogramMetric.cpp
    ├── HistogramMetric.hpp
    ├── IncoherentAcquirer.cpp
    ├── IncoherentAcquirer.hpp
    ├── IncoherentReleaser.cpp
    ├── IncoherentReleaser.hpp
    ├── LocaleSharedMemory.cpp
    ├── LocaleSharedMemory.hpp
    ├── LocaleSharedMemory_tests.cpp
    ├── Makefile.tau
    ├── Malloc_tests.cpp
    ├── MaxMetric.cpp
    ├── MaxMetric.hpp
    ├── MaxMetricImpl.hpp
    ├── Message.hpp
    ├── MessageBase.cpp
    ├── MessageBase.hpp
    ├── MessageBaseImpl.hpp
    ├── MessagePool.cpp
    ├── MessagePool.hpp
    ├── Message_tests.cpp
    ├── MetricBase.hpp
    ├── Metrics.cpp
    ├── Metrics.hpp
    ├── MetricsTools.hpp
    ├── Metrics_tests.cpp
    ├── Mutex.hpp
    ├── Mutex_tests.cpp
    ├── NTBuffer.cpp
    ├── NTBuffer.hpp
    ├── NTBuffer_tests.cpp
    ├── NTMessage.cpp
    ├── NTMessage.hpp
    ├── NTMessage_aggregator_tests.cpp
    ├── NTMessage_tests.cpp
    ├── New_delegate_tests.cpp
    ├── New_loop_tests.cpp
    ├── ParallelLoop.cpp
    ├── ParallelLoop.hpp
    ├── PerformanceTools.cpp
    ├── PerformanceTools.hpp
    ├── PoolAllocator.hpp
    ├── PoolAllocator_tests.cpp
    ├── Public_tasks_tests.cpp
    ├── PushBuffer.hpp
    ├── RDMAAggregator.cpp
    ├── RDMAAggregator.hpp
    ├── RDMAAggregator_tests.cpp
    ├── RDMABuffer.hpp
    ├── RateMeasure_tests.cpp
    ├── Reducer.hpp
    ├── Reducer_tests.cpp
    ├── ReuseList.hpp
    ├── ReuseMessage.hpp
    ├── ReuseMessageList.hpp
    ├── ReusePool.hpp
    ├── Scheduler_benchmarking_tests.cpp
    ├── Semaphore.hpp
    ├── Semaphore_tests.cpp
    ├── SharedMessagePool.cpp
    ├── SharedMessagePool.hpp
    ├── SimpleMetric.cpp
    ├── SimpleMetric.hpp
    ├── SimpleMetricImpl.hpp
    ├── SmallLocalSet.hpp
    ├── StateTimer.cpp
    ├── StateTimer.hpp
    ├── Stealing_tests.cpp
    ├── StringMetric.cpp
    ├── StringMetric.hpp
    ├── StringMetricImpl.hpp
    ├── SummarizingMetric.cpp
    ├── SummarizingMetric.hpp
    ├── SummarizingMetricImpl.hpp
    ├── SuspendedDelegate.hpp
    ├── Synchronization.hpp
    ├── Tasking.hpp
    ├── Tasking_tests.cpp
    ├── ThreadQueue.cpp
    ├── ThreadQueue.hpp
    ├── ThreadQueue_tests.cpp
    ├── Timestamp.cpp
    ├── Timestamp.hpp
    ├── Worker.cpp
    ├── Worker.hpp
    ├── boost_helpers.hpp
    ├── cluster_tau.sh
    ├── common.hpp
    ├── doxygen_footer.html
    ├── function_traits.hpp
    ├── graph
    │   ├── Graph.cpp
    │   ├── Graph.hpp
    │   ├── Graph_tests.cpp
    │   ├── KroneckerGenerator.cpp
    │   ├── TupleGraph.cpp
    │   └── TupleGraph.hpp
    ├── grappa-valgrind.supp
    ├── grappa_gdb.macros
    ├── runcontextswitch.rb
    ├── runlatencyswitch.rb
    ├── stack.S
    ├── stack.h
    ├── tasks
    │   ├── BasicScheduler.cpp
    │   ├── BasicScheduler.hpp
    │   ├── DictOut.hpp
    │   ├── GlobalQueue.cpp
    │   ├── GlobalQueue.hpp
    │   ├── Scheduler.hpp
    │   ├── StealQueue.cpp
    │   ├── StealQueue.hpp
    │   ├── Task.cpp
    │   ├── Task.hpp
    │   ├── TaskingScheduler.cpp
    │   └── TaskingScheduler.hpp
    ├── tests
    │   ├── igor_context_switch.rb
    │   ├── igor_datastructs.rb
    │   ├── igor_hashmap.rb
    │   ├── igor_hashset.rb
    │   ├── igor_queue.rb
    │   ├── igor_stack.rb
    │   └── igor_tests.rb
    └── utils
    │   ├── README
    │   ├── obj_grep.rb
    │   └── uniq.rb
├── third-party
    ├── CMakeLists.txt
    ├── bashflags
    │   ├── README.md
    │   ├── flags.bash
    │   └── test
    │   │   ├── bool.bash
    │   │   ├── echo.bash
    │   │   └── test.bash
    ├── downloads
    │   └── README.md
    ├── google-glog
    │   ├── AUTHORS
    │   ├── COPYING
    │   ├── ChangeLog
    │   ├── INSTALL
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── NEWS
    │   ├── README
    │   ├── README.windows
    │   ├── aclocal.m4
    │   ├── compile
    │   ├── config.guess
    │   ├── config.sub
    │   ├── configure
    │   ├── configure.ac
    │   ├── depcomp
    │   ├── doc
    │   │   ├── designstyle.css
    │   │   └── glog.html
    │   ├── google-glog.sln
    │   ├── install-sh
    │   ├── libglog.pc.in
    │   ├── ltmain.sh
    │   ├── m4
    │   │   ├── ac_have_attribute.m4
    │   │   ├── ac_have_builtin_expect.m4
    │   │   ├── ac_have_sync_val_compare_and_swap.m4
    │   │   ├── ac_rwlock.m4
    │   │   ├── acx_pthread.m4
    │   │   ├── google_namespace.m4
    │   │   ├── libtool.m4
    │   │   ├── ltoptions.m4
    │   │   ├── ltsugar.m4
    │   │   ├── ltversion.m4
    │   │   ├── lt~obsolete.m4
    │   │   ├── namespaces.m4
    │   │   ├── pc_from_ucontext.m4
    │   │   ├── stl_namespace.m4
    │   │   └── using_operator.m4
    │   ├── missing
    │   ├── mkinstalldirs
    │   ├── packages
    │   │   ├── deb.sh
    │   │   ├── deb
    │   │   │   ├── README
    │   │   │   ├── changelog
    │   │   │   ├── compat
    │   │   │   ├── control
    │   │   │   ├── copyright
    │   │   │   ├── docs
    │   │   │   ├── libgoogle-glog-dev.dirs
    │   │   │   ├── libgoogle-glog-dev.install
    │   │   │   ├── libgoogle-glog0.dirs
    │   │   │   ├── libgoogle-glog0.install
    │   │   │   └── rules
    │   │   ├── rpm.sh
    │   │   └── rpm
    │   │   │   └── rpm.spec
    │   ├── src
    │   │   ├── base
    │   │   │   ├── commandlineflags.h
    │   │   │   ├── googleinit.h
    │   │   │   └── mutex.h
    │   │   ├── config.h.in
    │   │   ├── config_for_unittests.h
    │   │   ├── demangle.cc
    │   │   ├── demangle.h
    │   │   ├── demangle_unittest.cc
    │   │   ├── demangle_unittest.sh
    │   │   ├── demangle_unittest.txt
    │   │   ├── glog
    │   │   │   ├── log_severity.h
    │   │   │   ├── logging.h.in
    │   │   │   ├── raw_logging.h.in
    │   │   │   ├── stl_logging.h.in
    │   │   │   └── vlog_is_on.h.in
    │   │   ├── googletest.h
    │   │   ├── logging.cc
    │   │   ├── logging_striplog_test.sh
    │   │   ├── logging_striptest10.cc
    │   │   ├── logging_striptest2.cc
    │   │   ├── logging_striptest_main.cc
    │   │   ├── logging_unittest.cc
    │   │   ├── logging_unittest.err
    │   │   ├── mock-log.h
    │   │   ├── mock-log_test.cc
    │   │   ├── raw_logging.cc
    │   │   ├── signalhandler.cc
    │   │   ├── signalhandler_unittest.cc
    │   │   ├── signalhandler_unittest.sh
    │   │   ├── stacktrace.h
    │   │   ├── stacktrace_generic-inl.h
    │   │   ├── stacktrace_libunwind-inl.h
    │   │   ├── stacktrace_powerpc-inl.h
    │   │   ├── stacktrace_unittest.cc
    │   │   ├── stacktrace_x86-inl.h
    │   │   ├── stacktrace_x86_64-inl.h
    │   │   ├── stl_logging_unittest.cc
    │   │   ├── symbolize.cc
    │   │   ├── symbolize.h
    │   │   ├── symbolize_unittest.cc
    │   │   ├── utilities.cc
    │   │   ├── utilities.h
    │   │   ├── utilities_unittest.cc
    │   │   ├── vlog_is_on.cc
    │   │   └── windows
    │   │   │   ├── config.h
    │   │   │   ├── glog
    │   │   │       ├── log_severity.h
    │   │   │       ├── logging.h
    │   │   │       ├── raw_logging.h
    │   │   │       ├── stl_logging.h
    │   │   │       └── vlog_is_on.h
    │   │   │   ├── port.cc
    │   │   │   ├── port.h
    │   │   │   └── preprocess.sh
    │   └── vsprojects
    │   │   ├── libglog
    │   │       └── libglog.vcproj
    │   │   ├── libglog_static
    │   │       └── libglog_static.vcproj
    │   │   ├── logging_unittest
    │   │       └── logging_unittest.vcproj
    │   │   └── logging_unittest_static
    │   │       └── logging_unittest_static.vcproj
    ├── graph500-generator
    │   ├── CMakeLists.txt
    │   ├── LICENSE_1_0.txt
    │   ├── Makefile.mpi
    │   ├── Makefile.omp
    │   ├── Makefile.seq
    │   ├── Makefile.xmt
    │   ├── README
    │   ├── generator_test_mpi.c
    │   ├── generator_test_omp.c
    │   ├── generator_test_seq.c
    │   ├── generator_test_xmt.c
    │   ├── graph_generator.c
    │   ├── graph_generator.h
    │   ├── make_graph.c
    │   ├── make_graph.h
    │   ├── mod_arith.h
    │   ├── mod_arith_32bit.h
    │   ├── mod_arith_64bit.h
    │   ├── mod_arith_xmt.h
    │   ├── mrg_transitions.c
    │   ├── splittable_mrg.c
    │   ├── splittable_mrg.h
    │   ├── user_settings.h
    │   ├── utils.c
    │   └── utils.h
    └── vampirtrace.rb
└── util
    ├── CMakeLists.txt
    ├── common.sh
    ├── env.sh
    ├── experiment_utils.rb
    ├── grappa.cmake
    ├── grappa.mk
    ├── histogram.rb
    └── igor_common.rb


/.gitattributes:
--------------------------------------------------------------------------------
1 | third-party/* linguist-vendored
2 | applications/NPB/* linguist-vendored
3 | applications/graph500/* linguist-vendored
4 | 
5 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | These people have contributed to Grappa:
 2 | * Jacob Nelson <nelson@cs.washington.edu>
 3 | * Brandon Holt <bholt@cs.washington.edu>
 4 | * Brandon Myers <bdmyers@cs.washington.edu>
 5 | * Vincent Lee <vlee2@cs.washington.edu>
 6 | * Andrew Hunter <ahh@cs.washington.edu>
 7 | * Simon Kahan <skahan@cs.washington.edu>
 8 | * Luis Ceze <luisceze@cs.washington.edu>
 9 | * Mark Oskin <oskin@cs.washington.edu>
10 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | This distribution contains Grappa, a system for scaling irregular
 2 | applications on commodity clusters.
 3 | 
 4 | This software was created with Government support under DE
 5 | AC05-76RL01830 awarded by the United States Department of Energy. The
 6 | Government has certain rights in the software.
 7 | 
 8 | Most Grappa files are licensed under the New BSD License as described
 9 | in the file COPYING. See individual files for details.
10 | 
11 | As an exception, the files under the third-party/ and applications/
12 | directories are licensed under their own separate licenses. Please see
13 | those files for their licensing terms.
14 | 
15 | 


--------------------------------------------------------------------------------
/applications/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(nativegraph)
 2 | add_subdirectory(graph500)
 3 | add_subdirectory(sort)
 4 | add_subdirectory(pagerank)
 5 | add_subdirectory(demos)
 6 | add_subdirectory(NPB/GRAPPA/IS)
 7 | add_subdirectory(join)
 8 | add_subdirectory(isopath)
 9 | add_subdirectory(graphlab)
10 | add_subdirectory(util)
11 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/IS/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_grappa_application(intsort.exe
2 |   intsort.cpp
3 |   randlc.cpp
4 |   randlc.hpp
5 |   npb_intsort.hpp
6 | )
7 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/IS/npb_intsort.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | enum NPBClass                     {  S,  W,  A,  B,  C,  D,  E };
 3 | static const int NKEY_LOG2[]    = { 16, 20, 23, 25, 27, 29, 31 };
 4 | static const int MAX_KEY_LOG2[] = { 11, 16, 19, 21, 23, 27, 27 };
 5 | static const int NBUCKET_LOG2[] = { 10, 10, 10, 10, 10, 10, 10 };
 6 | 
 7 | inline NPBClass get_npb_class(char c) {
 8 |   switch (c) {
 9 |     case 'S': return NPBClass::S;
10 |     case 'W': return NPBClass::W;
11 |     case 'A': return NPBClass::A;
12 |     case 'B': return NPBClass::B;
13 |     case 'C': return NPBClass::C;
14 |     case 'D': return NPBClass::D;
15 |     case 'E': return NPBClass::E;
16 |     default: return NPBClass::S;
17 |   }
18 | }
19 | inline char npb_class_char(NPBClass c) {
20 |   switch (c) {
21 |     case NPBClass::S: return 'S';
22 |     case NPBClass::W: return 'W';
23 |     case NPBClass::A: return 'A';
24 |     case NPBClass::B: return 'B';
25 |     case NPBClass::C: return 'C';
26 |     case NPBClass::D: return 'D';
27 |     case NPBClass::E: return 'E';
28 |     default: return 'S';
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/IS/npbparams.h:
--------------------------------------------------------------------------------
 1 | #define NUM_PROCS 32
 2 | /*
 3 |    This file is generated automatically by the setparams utility.
 4 |    It sets the number of processors and the class of the NPB
 5 |    in this directory. Do not modify it by hand.   */
 6 |    
 7 | #define COMPILETIME "11 Sep 2012"
 8 | #define NPBVERSION "3.3"
 9 | #define MPICC "cc"
10 | #define CFLAGS "-O3 -g"
11 | #define CLINK "$(MPICC)"
12 | #define CLINKFLAGS "-O3"
13 | #define CMPI_LIB "-L/usr/lib64/openmpi/lib -lmpi"
14 | #define CMPI_INC "-I/usr/include/openmpi-x86_64"
15 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/IS/randlc.hpp:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | double   randlc( double *X, const double *A );
4 | double   find_my_seed( int  kn,       /* my processor rank, 0<=kn<=num procs */
5 |                        int  np,       /* np = num procs                      */
6 |                        long nn,       /* total num of ran numbers, all procs */
7 |                        double s,      /* Ran num seed, for ex.: 314159265.00 */
8 |                        double a );     /* Ran num gen mult, try 1220703125.00 */
9 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/README:
--------------------------------------------------------------------------------
1 | This directory contains examples of make.def files that were used 
2 | by the NPB team in testing the benchmarks on different platforms. 
3 | They can be used as starting points for make.def files for your 
4 | own platform, but you may need to taylor them for best performance 
5 | on your installation. A clean template can be found in directory 
6 | `config'.
7 | Some examples of suite.def files are also provided.


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.dec_alpha:
--------------------------------------------------------------------------------
 1 | #This is for a DEC Alpha 8400. The code will execute on a 
 2 | #single processor
 3 | #Warning: parallel make does not work properly in general
 4 | MPIF77  = f77
 5 | FLINK   = f77
 6 | #Optimization -O5 breaks SP; works fine for all other codes
 7 | FFLAGS  = -O4
 8 | 
 9 | MPICC   = cc
10 | CLINK   = cc
11 | CFLAGS  = -O5 
12 | 
13 | include ../config/make.dummy
14 | 
15 | CC      = cc -g
16 | BINDIR  = ../bin
17 | 
18 | RAND   = randi8
19 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.irix6.2:
--------------------------------------------------------------------------------
 1 | #This is for a generic single-processor SGI workstation
 2 | MPIF77 = f77
 3 | FLINK	= f77
 4 | FFLAGS	= -O3
 5 | 
 6 | MPICC = cc
 7 | CLINK	= cc
 8 | CFLAGS	= -O3 
 9 | 
10 | include ../config/make.dummy
11 | 
12 | CC	= cc -g
13 | BINDIR	= ../bin
14 | 
15 | RAND   = randi8
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.origin:
--------------------------------------------------------------------------------
 1 | # This is for a an SGI Origin 2000 or 3000 with vendor MPI. The Fortran
 2 | # record length is specified, so it can be used for the I/O benchmark.
 3 | # as well
 4 | MPIF77   = f77 
 5 | FMPI_LIB = -lmpi
 6 | FLINK    = f77 -64
 7 | FFLAGS   = -O3 -64
 8 | 
 9 | MPICC    = cc
10 | CMPI_LIB = -lmpi
11 | CLINK    = cc
12 | CFLAGS   = -O3 
13 | 
14 | CC       = cc -g
15 | BINDIR   = ../bin
16 | 
17 | RAND   = randi8
18 | 
19 | CONVERTFLAG = -DFORTRAN_REC_SIZE=4
20 | 
21 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.sgi_powerchallenge:
--------------------------------------------------------------------------------
 1 | # This is for the SGI PowerChallenge Array at NASA Ames. mrf77 and 
 2 | # mrcc are local scripts that invoke the proper MPI library.
 3 | MPIF77 = mrf77
 4 | FLINK  = mrf77
 5 | FFLAGS = -O3 -OPT:fold_arith_limit=1204
 6 | 
 7 | MPICC  = mrcc
 8 | CLINK  = mrcc
 9 | CFLAGS = -O3 -OPT:fold_arith_limit=1204
10 | 
11 | CC     = cc -g
12 | BINDIR = ../bin
13 | 
14 | RAND   = randi8
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.sp2_babbage:
--------------------------------------------------------------------------------
 1 | #This is for the IBM SP2 at Ames; mrf77 and mrcc are local scripts
 2 | MPIF77     = mrf77
 3 | FLINK      = mrf77
 4 | FFLAGS     = -O3 
 5 | FLINKFLAGS = -bmaxdata:0x60000000
 6 | 
 7 | MPICC      = mrcc
 8 | CLINK      = mrcc
 9 | CFLAGS     = -O3 
10 | CLINKFLAGS = -bmaxdata:0x60000000
11 | 
12 | CC         = cc -g
13 | 
14 | BINDIR     = ../bin
15 | 
16 | RAND       = randi8
17 | 
18 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/make.def.t3d_cosmos:
--------------------------------------------------------------------------------
 1 | #This is for the Cray T3D at the Jet Propulsion Laboratory
 2 | MPIF77     = cf77
 3 | FLINK      = cf77
 4 | FMPI_LIB   = -L/usr/local/mpp/lib -lmpi
 5 | FMPI_INC   = -I/usr/local/mpp/lib/include/mpp
 6 | FFLAGS     = -dp -Wf-onoieeedivide -C cray-t3d 
 7 | #The following flags provide more effective optimization, but may
 8 | #cause the random number generator randi8(_safe) to break in EP
 9 | #FFLAGS    = -dp -Wf-oaggress -Wf-onoieeedivide -C cray-t3d 
10 | FLINKFLAGS = -Wl-Drdahead=on -C cray-t3d
11 | 
12 | MPICC      = cc
13 | CLINK	   = cc
14 | CMPI_LIB   = -L/usr/local/mpp/lib -lmpi
15 | CMPI_INC   = -I/usr/local/mpp/lib/include/mpp
16 | CFLAGS	   = -O3 -Tcray-t3d
17 | CLINKFLAGS = -Tcray-t3d
18 | 
19 | CC	   = cc -g -Tcray-ymp
20 | BINDIR	   = ../bin
21 | 
22 | CONVERTFLAG= -DCONVERTDOUBLE
23 | 
24 | RAND       = randi8
25 | 
26 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.bt:
--------------------------------------------------------------------------------
 1 | bt	S	1
 2 | bt	S	4
 3 | bt	S	9
 4 | bt	S	16
 5 | bt	A	1
 6 | bt	A	4
 7 | bt	A	9
 8 | bt	A	16
 9 | bt	A	25
10 | bt	A	36
11 | bt	A	49
12 | bt	A	64
13 | bt	A	81
14 | bt	A	100
15 | bt	A	121
16 | bt	B	1
17 | bt	B	4
18 | bt	B	9
19 | bt	B	16
20 | bt	B	25
21 | bt	B	36
22 | bt	B	49
23 | bt	B	64
24 | bt	B	81
25 | bt	B	100
26 | bt	B	121
27 | bt	C	1
28 | bt	C	4
29 | bt	C	9
30 | bt	C	16
31 | bt	C	25
32 | bt	C	36
33 | bt	C	49
34 | bt	C	64
35 | bt	C	81
36 | bt	C	100
37 | bt	C	121
38 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.cg:
--------------------------------------------------------------------------------
 1 | cg	S	1
 2 | cg	S	2
 3 | cg	S	4
 4 | cg	S	8
 5 | cg	S	16
 6 | cg	A	1
 7 | cg	A	2
 8 | cg	A	4
 9 | cg	A	8
10 | cg	A	16
11 | cg	A	32
12 | cg	A	64
13 | cg	A	128
14 | cg	B	1
15 | cg	B	2
16 | cg	B	4
17 | cg	B	8
18 | cg	B	16
19 | cg	B	32
20 | cg	B	64
21 | cg	B	128
22 | cg	C	1
23 | cg	C	2
24 | cg	C	4
25 | cg	C	8
26 | cg	C	16
27 | cg	C	32
28 | cg	C	64
29 | cg	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.ep:
--------------------------------------------------------------------------------
 1 | ep	S	1
 2 | ep	S	2
 3 | ep	S	4
 4 | ep	S	8
 5 | ep	S	16
 6 | ep	A	1
 7 | ep	A	2
 8 | ep	A	4
 9 | ep	A	8
10 | ep	A	16
11 | ep	A	32
12 | ep	A	64
13 | ep	A	128
14 | ep	B	1
15 | ep	B	2
16 | ep	B	4
17 | ep	B	8
18 | ep	B	16
19 | ep	B	32
20 | ep	B	64
21 | ep	B	128
22 | ep	C	1
23 | ep	C	2
24 | ep	C	4
25 | ep	C	8
26 | ep	C	16
27 | ep	C	32
28 | ep	C	64
29 | ep	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.ft:
--------------------------------------------------------------------------------
 1 | ft	S	1
 2 | ft	S	2
 3 | ft	S	4
 4 | ft	S	8
 5 | ft	S	16
 6 | ft	A	1
 7 | ft	A	2
 8 | ft	A	4
 9 | ft	A	8
10 | ft	A	16
11 | ft	A	32
12 | ft	A	64
13 | ft	A	128
14 | ft	B	1
15 | ft	B	2
16 | ft	B	4
17 | ft	B	8
18 | ft	B	16
19 | ft	B	32
20 | ft	B	64
21 | ft	B	128
22 | ft	C	1
23 | ft	C	2
24 | ft	C	4
25 | ft	C	8
26 | ft	C	16
27 | ft	C	32
28 | ft	C	64
29 | ft	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.is:
--------------------------------------------------------------------------------
 1 | is	S	1
 2 | is	S	2
 3 | is	S	4
 4 | is	S	8
 5 | is	S	16
 6 | is	A	1
 7 | is	A	2
 8 | is	A	4
 9 | is	A	8
10 | is	A	16
11 | is	A	32
12 | is	A	64
13 | is	A	128
14 | is	B	1
15 | is	B	2
16 | is	B	4
17 | is	B	8
18 | is	B	16
19 | is	B	32
20 | is	B	64
21 | is	B	128
22 | is	C	1
23 | is	C	2
24 | is	C	4
25 | is	C	8
26 | is	C	16
27 | is	C	32
28 | is	C	64
29 | is	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.lu:
--------------------------------------------------------------------------------
 1 | lu	S	1
 2 | lu	S	2
 3 | lu	S	4
 4 | lu	S	8
 5 | lu	S	16
 6 | lu	A	1
 7 | lu	A	2
 8 | lu	A	4
 9 | lu	A	8
10 | lu	A	16
11 | lu	A	32
12 | lu	A	64
13 | lu	A	128
14 | lu	B	1
15 | lu	B	2
16 | lu	B	4
17 | lu	B	8
18 | lu	B	16
19 | lu	B	32
20 | lu	B	64
21 | lu	B	128
22 | lu	C	1
23 | lu	C	2
24 | lu	C	4
25 | lu	C	8
26 | lu	C	16
27 | lu	C	32
28 | lu	C	64
29 | lu	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.mg:
--------------------------------------------------------------------------------
 1 | mg	S	1
 2 | mg	S	2
 3 | mg	S	4
 4 | mg	S	8
 5 | mg	S	16
 6 | mg	A	1
 7 | mg	A	2
 8 | mg	A	4
 9 | mg	A	8
10 | mg	A	16
11 | mg	A	32
12 | mg	A	64
13 | mg	A	128
14 | mg	B	1
15 | mg	B	2
16 | mg	B	4
17 | mg	B	8
18 | mg	B	16
19 | mg	B	32
20 | mg	B	64
21 | mg	B	128
22 | mg	C	1
23 | mg	C	2
24 | mg	C	4
25 | mg	C	8
26 | mg	C	16
27 | mg	C	32
28 | mg	C	64
29 | mg	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.small:
--------------------------------------------------------------------------------
1 | bt	S	1
2 | cg	S	1
3 | ep	S	1
4 | ft	S	1
5 | is	S	1
6 | lu	S	1
7 | mg	S	1
8 | sp	S	1
9 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/NAS.samples/suite.def.sp:
--------------------------------------------------------------------------------
 1 | sp	S	1
 2 | sp	S	4
 3 | sp	S	9
 4 | sp	S	16
 5 | sp	A	1
 6 | sp	A	4
 7 | sp	A	9
 8 | sp	A	16
 9 | sp	A	25
10 | sp	A	36
11 | sp	A	49
12 | sp	A	64
13 | sp	A	81
14 | sp	A	100
15 | sp	A	121
16 | sp	B	1
17 | sp	B	4
18 | sp	B	9
19 | sp	B	16
20 | sp	B	25
21 | sp	B	36
22 | sp	B	49
23 | sp	B	64
24 | sp	B	81
25 | sp	B	100
26 | sp	B	121
27 | sp	C	1
28 | sp	C	4
29 | sp	C	9
30 | sp	C	16
31 | sp	C	25
32 | sp	C	36
33 | sp	C	49
34 | sp	C	64
35 | sp	C	81
36 | sp	C	100
37 | sp	C	121
38 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/make.dummy:
--------------------------------------------------------------------------------
1 | FMPI_LIB  = -L../MPI_dummy -lmpi
2 | FMPI_INC  = -I../MPI_dummy
3 | CMPI_LIB  = -L../MPI_dummy -lmpi
4 | CMPI_INC  = -I../MPI_dummy
5 | default:: ${PROGRAM} libmpi.a
6 | libmpi.a: 
7 | 	cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC)
8 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/suite.def:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name, class, and number
 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 
 7 | # "lu", and "dt". 
 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E"
 9 | # (except that no classes C, D and E for DT, and no class E for IS).
10 | # The number of nodes must be a legal number for a particular
11 | # benchmark. The utility which parses this file is primitive, so
12 | # formatting is inflexible. Separate name/class/number by tabs. 
13 | # Comments start with "#" as the first character on a line. 
14 | # No blank lines. 
15 | # The following example builds 1 processor sample sizes of all benchmarks. 
16 | is	S	2
17 | 
18 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/config/suite.def.template:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name, class, and number
 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 
 7 | # "lu", and "dt". 
 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E"
 9 | # (except that no classes C, D and E for DT, and no class E for IS).
10 | # The number of nodes must be a legal number for a particular
11 | # benchmark. The utility which parses this file is primitive, so
12 | # formatting is inflexible. Separate name/class/number by tabs. 
13 | # Comments start with "#" as the first character on a line. 
14 | # No blank lines. 
15 | # The following example builds 1 processor sample sizes of all benchmarks. 
16 | ft	S	1
17 | mg	S	1
18 | sp	S	1
19 | lu	S	1
20 | bt	S	1
21 | is	S	1
22 | ep	S	1
23 | cg	S	1
24 | dt	S	1
25 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/sys/.gitignore:
--------------------------------------------------------------------------------
1 | setparams
2 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/sys/Makefile:
--------------------------------------------------------------------------------
 1 | include ../config/make.def
 2 | 
 3 | # Note that COMPILE is also defined in make.common and should
 4 | # be the same. We can't include make.common because it has a lot
 5 | # of other garbage. LINK is not defined in make.common because
 6 | # ${MPI_LIB} needs to go at the end of the line. 
 7 | FCOMPILE = $(MPIF77) -c $(FMPI_INC) $(FFLAGS)
 8 | 
 9 | all: setparams 
10 | 
11 | # setparams creates an npbparam.h file for each benchmark 
12 | # configuration. npbparams.h also contains info about how a benchmark
13 | # was compiled and linked
14 | 
15 | setparams: setparams.c ../config/make.def
16 | 	$(CC) ${CONVERTFLAG} -o setparams setparams.c
17 | 
18 | 
19 | clean: 
20 | 	-rm -f setparams setparams.h npbparams.h
21 | 	-rm -f *~ *.o
22 | 
23 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/sys/print_header:
--------------------------------------------------------------------------------
1 | echo '   ========================================='
2 | echo '   =      NAS Parallel Benchmarks 3.3      ='
3 | echo '   =      MPI/F77/C                        ='
4 | echo '   ========================================='
5 | echo ''
6 | 


--------------------------------------------------------------------------------
/applications/NPB/GRAPPA/sys/suite.awk:
--------------------------------------------------------------------------------
 1 | BEGIN { SMAKE = "make" } {
 2 |   if ($1 !~ /^#/ &&  NF > 2) {
 3 |     printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE;
 4 |     printf "%s CLASS=%s NPROCS=%s", SMAKE, $2, $3;
 5 |     if ( NF > 3 ) {
 6 |       if ( $4 ~ /^vec/ ||  $4 ~ /^VEC/ ) {
 7 |         printf " VERSION=%s", $4;
 8 |         if ( NF > 4 ) {
 9 |           printf " SUBTYPE=%s", $5;
10 |         }
11 |       } else {
12 |         printf " SUBTYPE=%s", $4;
13 |         if ( NF > 4 ) {
14 |           printf " VERSION=%s", $5;
15 |         }
16 |       }
17 |     }
18 |     printf "; cd ..\n";
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/add.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  add
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     addition of update to the vector u
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       integer  c, i, j, k, m
16 | 
17 |       do     c = 1, ncells
18 |          do     k = start(3,c), cell_size(3,c)-end(3,c)-1
19 |             do     j = start(2,c), cell_size(2,c)-end(2,c)-1
20 |                do     i = start(1,c), cell_size(1,c)-end(1,c)-1
21 |                   do    m = 1, 5
22 |                      u(m,i,j,k,c) = u(m,i,j,k,c) + rhs(m,i,j,k,c)
23 |                   enddo
24 |                enddo
25 |             enddo
26 |          enddo
27 |       enddo
28 | 
29 |       return
30 |       end
31 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/adi.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  adi
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       call copy_faces
10 | 
11 |       call x_solve
12 | 
13 |       call y_solve
14 | 
15 |       call z_solve
16 | 
17 |       call add
18 | 
19 |       return
20 |       end
21 | 
22 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/btio_common.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine clear_timestep
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       include 'header.h'
10 |       include 'mpinpb.h'
11 | 
12 |       integer cio, kio, jio, ix
13 | 
14 |       do cio=1,ncells
15 |           do kio=0, cell_size(3,cio)-1
16 |               do jio=0, cell_size(2,cio)-1
17 |                   do ix=0,cell_size(1,cio)-1
18 |                             u(1,ix, jio,kio,cio) = 0
19 |                             u(2,ix, jio,kio,cio) = 0
20 |                             u(3,ix, jio,kio,cio) = 0
21 |                             u(4,ix, jio,kio,cio) = 0
22 |                             u(5,ix, jio,kio,cio) = 0
23 |                   enddo
24 |               enddo
25 |           enddo
26 |       enddo
27 | 
28 |       return
29 |       end
30 | 
31 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/exact_solution.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine exact_solution(xi,eta,zeta,dtemp)
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     this function returns the exact solution at point xi, eta, zeta  
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       double precision  xi, eta, zeta, dtemp(5)
16 |       integer m
17 | 
18 |       do m = 1, 5
19 |          dtemp(m) =  ce(m,1) +
20 |      >     xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
21 |      >     eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
22 |      >     zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
23 |      >     zeta*ce(m,13))))
24 |       enddo
25 | 
26 |       return
27 |       end
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/inputbt.data.sample:
--------------------------------------------------------------------------------
1 | 200       number of time steps
2 | 0.0008d0  dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
3 | 64 64 64
4 | 5 0        write interval (optional read interval) for BTIO
5 | 0 1000000  number of nodes in collective buffering and buffer size for BTIO
6 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include 'mpif.h'
 6 | 
 7 |       integer           node, no_nodes, total_nodes, root, comm_setup, 
 8 |      >                  comm_solve, comm_rhs, dp_type
 9 |       logical           active
10 |       common /mpistuff/ node, no_nodes, total_nodes, root, comm_setup, 
11 |      >                  comm_solve, comm_rhs, dp_type, active
12 | 
13 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/work_lhs.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  work_lhs.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       double precision fjac(5, 5, -2:MAX_CELL_DIM+1),
10 |      >                 njac(5, 5, -2:MAX_CELL_DIM+1),
11 |      >                 lhsa(5, 5, -1:MAX_CELL_DIM),
12 |      >                 lhsb(5, 5, -1:MAX_CELL_DIM),
13 |      >                 tmp1, tmp2, tmp3
14 |       common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3
15 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/BT/work_lhs_vec.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  work_lhs_vec.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       double precision fjac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1),
10 |      >                 njac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1),
11 |      >                 lhsa(5, 5, -1:MAX_CELL_DIM,   -1:MAX_CELL_DIM),
12 |      >                 lhsb(5, 5, -1:MAX_CELL_DIM,   -1:MAX_CELL_DIM),
13 |      >                 tmp1, tmp2, tmp3
14 |       common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3
15 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/CG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=cg
 3 | BENCHMARKU=CG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = cg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB}
14 | 
15 | cg.o:		cg.f  mpinpb.h npbparams.h timing.h
16 | 	${FCOMPILE} cg.f
17 | 
18 | 
19 | MPITYPE?=SRUN
20 | NNODE?=2
21 | PPN?=1
22 | SRUN_PARTITION?=grappa
23 | SRUN_HOST?=--partition $(SRUN_PARTITION)
24 | SRUN_NPROC=--nodes=$(NNODE) --ntasks-per-node=$(PPN)
25 | SRUN_HOST=--partition $(SRUN_PARTITION)
26 | SRUN_RUN=srun --resv-ports --cpu_bind=verbose,rank --exclusive --label --kill-on-bad-exit $(SRUN_FLAGS) $(SRUN_HOST) $(SRUN_NPROC)
27 | 
28 | run: $(TARGET)
29 | 	$(SRUN_RUN) ./$< 
30 | 
31 | clean:
32 | 	- rm -f *.o *~ 
33 | 	- rm -f npbparams.h core
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/CG/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include           'mpif.h'
 6 | 
 7 |       integer           me, nprocs, root, dp_type
 8 |       common /mpistuff/ me, nprocs, root, dp_type
 9 | 
10 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/CG/runexps.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require '../../../../experiment_utils'
 3 | 
 4 | 
 5 | db = "cg.db"
 6 | table = :mpi
 7 | 
 8 | 
 9 | cmd = "make run TARGET=../bin/cg.%{problem}.%{nproc} PPN=%{ppn} NNODE=%{nnode}"
10 | 
11 | params = {
12 |     trial: [1,2,3],
13 |     problem: ['D','B','C','A'],
14 |     nproc: [64],
15 |     nnode: [8],
16 |     ppn: expr('nproc / nnode')
17 | }
18 | 
19 | parser = lambda{ |cmdout|
20 |     records = {}
21 | 
22 |     cgreg = /(?<key>[a-zA-Z\s\/]+)\s+=\s+(?<value>.+)/
23 |     cmdout.scan(cgreg).each { |k,v|
24 |         k = k.gsub(/\s+/,"_").gsub(/\//,"_per_")
25 |         if v.match(/\d+\.\d+/) then 
26 |             v = v.to_f
27 |         elsif v.match(/\d+/) then
28 |             v = v.to_i
29 |         end
30 |         records[k.to_sym] = v
31 |     }
32 |     if records.length == 0 then
33 |         raise "no records found"
34 |     end
35 | 
36 |     records
37 | }
38 | 
39 | run_experiments(cmd, params, db, table, &parser)
40 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/CG/timing.h:
--------------------------------------------------------------------------------
1 |       integer t_total, t_conjg, t_rcomm, t_ncomm, t_last
2 |       parameter (t_total=1, t_conjg=2, t_rcomm=3, t_ncomm=4, t_last=4)
3 | 
4 |       logical timeron
5 |       common /timers/ timeron
6 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/DT/DGraph.h:
--------------------------------------------------------------------------------
 1 | #ifndef _DGRAPH
 2 | #define _DGRAPH
 3 | 
 4 | #define BLOCK_SIZE  128
 5 | #define SMALL_BLOCK_SIZE 32
 6 | 
 7 | typedef struct{
 8 |   int id;
 9 |   void *tail,*head;
10 |   int length,width,attribute,maxWidth;
11 | }DGArc;
12 | 
13 | typedef struct{
14 |   int maxInDegree,maxOutDegree;
15 |   int inDegree,outDegree;
16 |   int id;
17 |   char *name;
18 |   DGArc **inArc,**outArc;
19 |   int depth,height,width;
20 |   int color,attribute,address,verified;
21 |   void *feat;
22 | }DGNode;
23 | 
24 | typedef struct{
25 |   int maxNodes,maxArcs;
26 |   int id;
27 |   char *name;
28 |   int numNodes,numArcs;
29 |   DGNode **node;
30 |   DGArc **arc;
31 | } DGraph;
32 | 
33 | DGArc *newArc(DGNode *tl,DGNode *hd);
34 | void arcShow(DGArc *ar);
35 | DGNode *newNode(char *nm);
36 | void nodeShow(DGNode* nd);
37 | 
38 | DGraph* newDGraph(char *nm);
39 | int AttachNode(DGraph *dg,DGNode *nd);
40 | int AttachArc(DGraph *dg,DGArc* nar);
41 | void graphShow(DGraph *dg,int DetailsLevel);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/DT/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=dt
 3 | BENCHMARKU=DT
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | #Override PROGRAM
 9 | DTPROGRAM  = $(BINDIR)/$(BENCHMARK).$(CLASS).x
10 | 
11 | OBJS = dt.o DGraph.o \
12 | 	${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o
13 | 
14 | 
15 | ${PROGRAM}: config ${OBJS}
16 | 	${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB}
17 | 
18 | .c.o:
19 | 	${CCOMPILE} $<
20 | 
21 | dt.o:             dt.c  npbparams.h
22 | DGraph.o:	DGraph.c DGraph.h
23 | 
24 | clean:
25 | 	- rm -f *.o *~ mputil*
26 | 	- rm -f dt npbparams.h core
27 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/DT/README:
--------------------------------------------------------------------------------
 1 | Data Traffic benchmark DT is new in the NPB suite 
 2 | (released as part of NPB3.x-MPI package).
 3 | ----------------------------------------------------
 4 | 
 5 | DT is written in C and same executable can run on any number of processors,
 6 | provided this number is not less than the number of nodes in the communication
 7 | graph.  DT benchmark takes one argument: BH, WH, or SH. This argument 
 8 | specifies the communication graph Black Hole, White Hole, or SHuffle 
 9 | respectively. The current release contains verification numbers for 
10 | CLASSES S, W, A, and B only.  Classes C and D are defined, but verification 
11 | numbers are not provided in this release.
12 | 
13 | The following table summarizes the number of nodes in the communication
14 | graph based on CLASS and graph TYPE.
15 | 
16 | CLASS  N_Source N_Nodes(BH,WH) N_Nodes(SH)
17 |  S      4        5              12
18 |  W      8        11             32
19 |  A      16       21             80
20 |  B      32       43             192
21 |  C      64       85             448
22 |  D      128      171            1024
23 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/EP/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ep
 3 | BENCHMARKU=EP
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o ${COMMON}/timers.o
 8 | 
 9 | include ../sys/make.common
10 | 
11 | ${PROGRAM}: config ${OBJS}
12 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB}
13 | 
14 | 
15 | ep.o:		ep.f  mpinpb.h npbparams.h
16 | 	${FCOMPILE} ep.f
17 | 
18 | clean:
19 | 	- rm -f *.o *~ 
20 | 	- rm -f npbparams.h core
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/EP/README:
--------------------------------------------------------------------------------
1 | This code implements the random-number generator described in the
2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007.
3 | The code is "embarrassingly" parallel in that no communication is
4 | required for the generation of the random numbers itself. There is
5 | no special requirement on the number of processors used for running
6 | the benchmark.
7 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/EP/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include 'mpif.h'
 6 | 
 7 |       integer           me, nprocs, root, dp_type
 8 |       common /mpistuff/ me, nprocs, root, dp_type
 9 | 
10 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/FT/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ft
 3 | BENCHMARKU=FT
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = ft.o ${COMMON}/${RAND}.o ${COMMON}/print_results.o ${COMMON}/timers.o
10 | 
11 | ${PROGRAM}: config ${OBJS}
12 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB}
13 | 
14 | 
15 | 
16 | .f.o:
17 | 	${FCOMPILE} $<
18 | 
19 | ft.o:             ft.f  global.h mpinpb.h npbparams.h
20 | 
21 | clean:
22 | 	- rm -f *.o *~ mputil*
23 | 	- rm -f ft npbparams.h core
24 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/FT/README:
--------------------------------------------------------------------------------
1 | This code implements the time integration of a three-dimensional
2 | partial differential equation using the Fast Fourier Transform.
3 | Some of the dimension statements are not F77 conforming and will
4 | not work using the g77 compiler. All dimension statements,
5 | however, are legal F90.


--------------------------------------------------------------------------------
/applications/NPB/MPI/FT/inputft.data.sample:
--------------------------------------------------------------------------------
1 | 6   ! number of iterations
2 | 2   ! layout type. 0 = 0d, 1 = 1d, 2 = 2d
3 | 2 4 ! processor layout. 0d must be "1 1"; 1d must be "1 N"
4 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/FT/mpinpb.h:
--------------------------------------------------------------------------------
1 |       include 'mpif.h'
2 | c mpi data types
3 |       integer dc_type
4 |       common /mpistuff/ dc_type
5 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/IS/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=is
 3 | BENCHMARKU=IS
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = is.o ${COMMON}/c_print_results.o ${COMMON}/c_timers.o
10 | 
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB}
14 | 
15 | .c.o:
16 | 	${CCOMPILE} $<
17 | 
18 | is.o:             is.c  npbparams.h
19 | 
20 | 
21 | clean:
22 | 	- rm -f *.o *~ mputil*
23 | 	- rm -f is npbparams.h core
24 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/IS/npbparams.h:
--------------------------------------------------------------------------------
 1 | #define CLASS 'A'
 2 | #define NUM_PROCS 32
 3 | /*
 4 |    This file is generated automatically by the setparams utility.
 5 |    It sets the number of processors and the class of the NPB
 6 |    in this directory. Do not modify it by hand.   */
 7 |    
 8 | #define COMPILETIME "11 Sep 2012"
 9 | #define NPBVERSION "3.3"
10 | #define MPICC "cc"
11 | #define CFLAGS "-O3 -g"
12 | #define CLINK "$(MPICC)"
13 | #define CLINKFLAGS "-O3"
14 | #define CMPI_LIB "-L/usr/lib64/openmpi/lib -lmpi"
15 | #define CMPI_INC "-I/usr/include/openmpi-x86_64"
16 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/LU/inputlu.data.sample:
--------------------------------------------------------------------------------
 1 | c
 2 | c***controls printing of the progress of iterations: ipr    inorm
 3 |                                                       1      250
 4 | c
 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax
 6 |                                                              250
 7 | c
 8 | c***magnitude of the time step: dt 
 9 |                                2.0e+00
10 | c
11 | c***relaxation factor for SSOR iterations: omega
12 |                                             1.2
13 | c
14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5
15 |                              1.0e-08   1.0e-08   1.0e-08  1.0e-08  1.0e-08 
16 | c
17 | c***number of grid points in xi and eta and zeta directions: nx   ny   nz
18 |                                                             64  64  64
19 | c
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/LU/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include 'mpif.h'
 6 | 
 7 |       integer           node, no_nodes, root, comm_setup, 
 8 |      >                  comm_solve, comm_rhs, dp_type
 9 |       common /mpistuff/ node, no_nodes, root, comm_setup, 
10 |      >                  comm_solve, comm_rhs, dp_type
11 | 
12 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/LU/timing.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | 
 3 |       integer t_total, t_rhs, t_blts, t_buts, t_jacld, t_jacu,
 4 |      >        t_exch, t_lcomm, t_ucomm, t_rcomm, t_last
 5 |       parameter (t_total=1, t_rhs=2, t_blts=3, t_buts=4, t_jacld=5, 
 6 |      >        t_jacu=6, t_exch=7, t_lcomm=8, t_ucomm=9, t_rcomm=10, 
 7 |      >        t_last=10)
 8 | 
 9 |       double precision maxtime
10 |       logical timeron
11 |       common/timer/maxtime, timeron
12 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=mg
 3 | BENCHMARKU=MG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = mg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB}
14 | 
15 | mg.o:		mg.f  globals.h mpinpb.h npbparams.h
16 | 	${FCOMPILE} mg.f
17 | 
18 | clean:
19 | 	- rm -f *.o *~ 
20 | 	- rm -f npbparams.h core
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MG/mg.input.sample:
--------------------------------------------------------------------------------
1 |  8 = top level
2 |  256 256 256 = nx ny nz
3 |  20 = nit
4 |  0 0 0 0 0 0 0 0 = debug_vec
5 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MG/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include           'mpif.h'
 6 | 
 7 |       integer           me, nprocs, root, dp_type
 8 |       common /mpistuff/ me, nprocs, root, dp_type
 9 | 
10 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for MPI dummy library. 
 2 | # Must be edited for a specific machine. Does NOT read in 
 3 | # the make.def file of NPB 2.3
 4 | F77 = f77
 5 | CC = cc
 6 | AR = ar
 7 | 
 8 | # Enable if either Cray or IBM: (no such flag for most machines: see wtime.h)
 9 | # MACHINE	=	-DCRAY
10 | # MACHINE	=	-DIBM
11 | 
12 | libmpi.a: mpi_dummy.o mpi_dummy_c.o wtime.o
13 | 	$(AR) r libmpi.a mpi_dummy.o mpi_dummy_c.o wtime.o
14 | 
15 | mpi_dummy.o: mpi_dummy.f mpif.h
16 | 	$(F77) -c mpi_dummy.f
17 | # For a Cray C90, try:
18 | #	cf77 -dp -c mpi_dummy.f
19 | # For an IBM 590, try:
20 | #	xlf -c mpi_dummy.f
21 | 
22 | mpi_dummy_c.o: mpi_dummy.c mpi.h
23 | 	$(CC) -c ${MACHINE} -o mpi_dummy_c.o mpi_dummy.c
24 | 
25 | wtime.o: wtime.c
26 | # For most machines or CRAY or IBM
27 | 	$(CC) -c ${MACHINE} wtime.c
28 | # For a precise timer on an SGI Power Challenge, try:
29 | #	$(CC) -o wtime.o -c wtime_sgi64.c
30 | 
31 | test: test.f
32 | 	$(F77) -o test -I. test.f -L. -lmpi
33 | 
34 | 
35 | 
36 | clean: 
37 | 	- rm -f *~ *.o
38 | 	- rm -f test libmpi.a
39 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/mpif.h:
--------------------------------------------------------------------------------
 1 |       integer mpi_comm_world
 2 |       parameter (mpi_comm_world = 0)
 3 | 
 4 |       integer mpi_max, mpi_min, mpi_sum
 5 |       parameter (mpi_max = 1, mpi_sum = 2, mpi_min = 3)
 6 | 
 7 |       integer mpi_byte, mpi_integer, mpi_real, mpi_logical,
 8 |      >                  mpi_double_precision,  mpi_complex,
 9 |      >                  mpi_double_complex
10 |       parameter (mpi_double_precision = 1,
11 |      $           mpi_integer = 2, 
12 |      $           mpi_byte = 3, 
13 |      $           mpi_real= 4, 
14 |      $           mpi_logical = 5, 
15 |      $           mpi_complex = 6,
16 |      $           mpi_double_complex = 7)
17 | 
18 |       integer mpi_any_source
19 |       parameter (mpi_any_source = -1)
20 | 
21 |       integer mpi_err_other
22 |       parameter (mpi_err_other = -1)
23 | 
24 |       double precision mpi_wtime
25 |       external mpi_wtime
26 | 
27 |       integer mpi_status_size
28 |       parameter (mpi_status_size=3)
29 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/test.f:
--------------------------------------------------------------------------------
 1 |       program
 2 |       implicit none
 3 |       double precision t, mpi_wtime
 4 |       external mpi_wtime
 5 |       t = 0.0
 6 |       t = mpi_wtime()
 7 |       print *, t
 8 |       t = mpi_wtime()
 9 |       print *, t
10 |       end
11 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/wtime.c:
--------------------------------------------------------------------------------
 1 | #include "wtime.h"
 2 | #include <sys/time.h>
 3 | 
 4 | void wtime(double *t)
 5 | {
 6 |   static int sec = -1;
 7 |   struct timeval tv;
 8 |   gettimeofday(&tv, (void *)0);
 9 |   if (sec < 0) sec = tv.tv_sec;
10 |   *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
11 | }
12 | 
13 |     
14 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/wtime.f:
--------------------------------------------------------------------------------
 1 |       subroutine wtime(tim)
 2 |       real*8 tim
 3 |       dimension tarray(2)
 4 |       call etime(tarray)
 5 |       tim = tarray(1)
 6 |       return
 7 |       end
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/MPI_dummy/wtime.h:
--------------------------------------------------------------------------------
 1 | /* C/Fortran interface is different on different machines. 
 2 |  * You may need to tweak this.
 3 |  */
 4 | 
 5 | 
 6 | #if defined(IBM)
 7 | #define wtime wtime
 8 | #elif defined(CRAY)
 9 | #define wtime WTIME
10 | #else
11 | #define wtime wtime_
12 | #endif
13 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/README:
--------------------------------------------------------------------------------
 1 | 
 2 | This code implements a 3D Multi-partition algorithm for the solution 
 3 | of the uncoupled systems of linear equations resulting from 
 4 | Beam-Warming approximate factorization.  Consequently, the program 
 5 | must be run on a square number of processors.  The included file 
 6 | "npbparams.h" contains a parameter statement which sets "maxcells" 
 7 | and "problem_size".  The parameter maxcells must be set to the 
 8 | square root of the number of processors.  For example, if running 
 9 | on 25 processors, then set max_cells=5.  The standard problem sizes 
10 | are problem_size=64 for class A, 102 for class B, and 162 for class C.
11 | 
12 | The number of time steps and the time step size dt are set in the 
13 | npbparams.h but may be overridden in the input deck "inputsp.data".  
14 | The number of time steps is 400 for all three 
15 | standard problems, and the appropriate time step sizes "dt" are 
16 | 0.0015d0 for class A, 0.001d0 for class B, and 0.00067 for class C.  
17 | 
18 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/add.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  add
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 | c---------------------------------------------------------------------
11 | c addition of update to the vector u
12 | c---------------------------------------------------------------------
13 | 
14 |        include 'header.h'
15 | 
16 |        integer  c, i, j, k, m
17 | 
18 |        do  c = 1, ncells
19 |           do m = 1, 5
20 |              do  k = start(3,c), cell_size(3,c)-end(3,c)-1
21 |                 do  j = start(2,c), cell_size(2,c)-end(2,c)-1
22 |                    do  i = start(1,c), cell_size(1,c)-end(1,c)-1
23 |                       u(i,j,k,m,c) = u(i,j,k,m,c) + rhs(i,j,k,m,c)
24 |                    end do
25 |                 end do
26 |              end do
27 |           end do
28 |        end do
29 | 
30 |        return
31 |        end
32 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/adi.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  adi
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 |        call copy_faces
11 | 
12 |        call txinvr
13 | 
14 |        call x_solve
15 | 
16 |        call y_solve
17 | 
18 |        call z_solve
19 | 
20 |        call add
21 | 
22 |        return
23 |        end
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/exact_solution.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine exact_solution(xi,eta,zeta,dtemp)
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 | c---------------------------------------------------------------------
11 | c this function returns the exact solution at point xi, eta, zeta  
12 | c---------------------------------------------------------------------
13 | 
14 |        include 'header.h'
15 | 
16 |        double precision  xi, eta, zeta, dtemp(5)
17 |        integer m
18 | 
19 |        do  m = 1, 5
20 |           dtemp(m) =  ce(m,1) +
21 |      >    xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
22 |      >    eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
23 |      >    zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
24 |      >    zeta*ce(m,13))))
25 |        end do
26 | 
27 |        return
28 |        end
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/inputsp.data.sample:
--------------------------------------------------------------------------------
1 | 400       number of time steps
2 | 0.0015d0  dt for class A = 0.0015d0. class B = 0.001d0  class C = 0.00067d0
3 | 64 64 64
4 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/SP/mpinpb.h:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       include 'mpif.h'
 6 | 
 7 |       integer           node, no_nodes, total_nodes, root, comm_setup, 
 8 |      >                  comm_solve, comm_rhs, dp_type
 9 |       logical           active
10 |       common /mpistuff/ node, no_nodes, total_nodes, root, comm_setup, 
11 |      >                  comm_solve, comm_rhs, dp_type, active
12 |       integer           DEFAULT_TAG
13 |       parameter         (DEFAULT_TAG = 0)
14 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/README:
--------------------------------------------------------------------------------
1 | This directory contains examples of make.def files that were used 
2 | by the NPB team in testing the benchmarks on different platforms. 
3 | They can be used as starting points for make.def files for your 
4 | own platform, but you may need to taylor them for best performance 
5 | on your installation. A clean template can be found in directory 
6 | `config'.
7 | Some examples of suite.def files are also provided.


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.dec_alpha:
--------------------------------------------------------------------------------
 1 | #This is for a DEC Alpha 8400. The code will execute on a 
 2 | #single processor
 3 | #Warning: parallel make does not work properly in general
 4 | MPIF77  = f77
 5 | FLINK   = f77
 6 | #Optimization -O5 breaks SP; works fine for all other codes
 7 | FFLAGS  = -O4
 8 | 
 9 | MPICC   = cc
10 | CLINK   = cc
11 | CFLAGS  = -O5 
12 | 
13 | include ../config/make.dummy
14 | 
15 | CC      = cc -g
16 | BINDIR  = ../bin
17 | 
18 | RAND   = randi8
19 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.irix6.2:
--------------------------------------------------------------------------------
 1 | #This is for a generic single-processor SGI workstation
 2 | MPIF77 = f77
 3 | FLINK	= f77
 4 | FFLAGS	= -O3
 5 | 
 6 | MPICC = cc
 7 | CLINK	= cc
 8 | CFLAGS	= -O3 
 9 | 
10 | include ../config/make.dummy
11 | 
12 | CC	= cc -g
13 | BINDIR	= ../bin
14 | 
15 | RAND   = randi8
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.origin:
--------------------------------------------------------------------------------
 1 | # This is for a an SGI Origin 2000 or 3000 with vendor MPI. The Fortran
 2 | # record length is specified, so it can be used for the I/O benchmark.
 3 | # as well
 4 | MPIF77   = f77 
 5 | FMPI_LIB = -lmpi
 6 | FLINK    = f77 -64
 7 | FFLAGS   = -O3 -64
 8 | 
 9 | MPICC    = cc
10 | CMPI_LIB = -lmpi
11 | CLINK    = cc
12 | CFLAGS   = -O3 
13 | 
14 | CC       = cc -g
15 | BINDIR   = ../bin
16 | 
17 | RAND   = randi8
18 | 
19 | CONVERTFLAG = -DFORTRAN_REC_SIZE=4
20 | 
21 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.sgi_powerchallenge:
--------------------------------------------------------------------------------
 1 | # This is for the SGI PowerChallenge Array at NASA Ames. mrf77 and 
 2 | # mrcc are local scripts that invoke the proper MPI library.
 3 | MPIF77 = mrf77
 4 | FLINK  = mrf77
 5 | FFLAGS = -O3 -OPT:fold_arith_limit=1204
 6 | 
 7 | MPICC  = mrcc
 8 | CLINK  = mrcc
 9 | CFLAGS = -O3 -OPT:fold_arith_limit=1204
10 | 
11 | CC     = cc -g
12 | BINDIR = ../bin
13 | 
14 | RAND   = randi8
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.sp2_babbage:
--------------------------------------------------------------------------------
 1 | #This is for the IBM SP2 at Ames; mrf77 and mrcc are local scripts
 2 | MPIF77     = mrf77
 3 | FLINK      = mrf77
 4 | FFLAGS     = -O3 
 5 | FLINKFLAGS = -bmaxdata:0x60000000
 6 | 
 7 | MPICC      = mrcc
 8 | CLINK      = mrcc
 9 | CFLAGS     = -O3 
10 | CLINKFLAGS = -bmaxdata:0x60000000
11 | 
12 | CC         = cc -g
13 | 
14 | BINDIR     = ../bin
15 | 
16 | RAND       = randi8
17 | 
18 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/make.def.t3d_cosmos:
--------------------------------------------------------------------------------
 1 | #This is for the Cray T3D at the Jet Propulsion Laboratory
 2 | MPIF77     = cf77
 3 | FLINK      = cf77
 4 | FMPI_LIB   = -L/usr/local/mpp/lib -lmpi
 5 | FMPI_INC   = -I/usr/local/mpp/lib/include/mpp
 6 | FFLAGS     = -dp -Wf-onoieeedivide -C cray-t3d 
 7 | #The following flags provide more effective optimization, but may
 8 | #cause the random number generator randi8(_safe) to break in EP
 9 | #FFLAGS    = -dp -Wf-oaggress -Wf-onoieeedivide -C cray-t3d 
10 | FLINKFLAGS = -Wl-Drdahead=on -C cray-t3d
11 | 
12 | MPICC      = cc
13 | CLINK	   = cc
14 | CMPI_LIB   = -L/usr/local/mpp/lib -lmpi
15 | CMPI_INC   = -I/usr/local/mpp/lib/include/mpp
16 | CFLAGS	   = -O3 -Tcray-t3d
17 | CLINKFLAGS = -Tcray-t3d
18 | 
19 | CC	   = cc -g -Tcray-ymp
20 | BINDIR	   = ../bin
21 | 
22 | CONVERTFLAG= -DCONVERTDOUBLE
23 | 
24 | RAND       = randi8
25 | 
26 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.bt:
--------------------------------------------------------------------------------
 1 | bt	S	1
 2 | bt	S	4
 3 | bt	S	9
 4 | bt	S	16
 5 | bt	A	1
 6 | bt	A	4
 7 | bt	A	9
 8 | bt	A	16
 9 | bt	A	25
10 | bt	A	36
11 | bt	A	49
12 | bt	A	64
13 | bt	A	81
14 | bt	A	100
15 | bt	A	121
16 | bt	B	1
17 | bt	B	4
18 | bt	B	9
19 | bt	B	16
20 | bt	B	25
21 | bt	B	36
22 | bt	B	49
23 | bt	B	64
24 | bt	B	81
25 | bt	B	100
26 | bt	B	121
27 | bt	C	1
28 | bt	C	4
29 | bt	C	9
30 | bt	C	16
31 | bt	C	25
32 | bt	C	36
33 | bt	C	49
34 | bt	C	64
35 | bt	C	81
36 | bt	C	100
37 | bt	C	121
38 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.cg:
--------------------------------------------------------------------------------
 1 | cg	S	1
 2 | cg	S	2
 3 | cg	S	4
 4 | cg	S	8
 5 | cg	S	16
 6 | cg	A	1
 7 | cg	A	2
 8 | cg	A	4
 9 | cg	A	8
10 | cg	A	16
11 | cg	A	32
12 | cg	A	64
13 | cg	A	128
14 | cg	B	1
15 | cg	B	2
16 | cg	B	4
17 | cg	B	8
18 | cg	B	16
19 | cg	B	32
20 | cg	B	64
21 | cg	B	128
22 | cg	C	1
23 | cg	C	2
24 | cg	C	4
25 | cg	C	8
26 | cg	C	16
27 | cg	C	32
28 | cg	C	64
29 | cg	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.ep:
--------------------------------------------------------------------------------
 1 | ep	S	1
 2 | ep	S	2
 3 | ep	S	4
 4 | ep	S	8
 5 | ep	S	16
 6 | ep	A	1
 7 | ep	A	2
 8 | ep	A	4
 9 | ep	A	8
10 | ep	A	16
11 | ep	A	32
12 | ep	A	64
13 | ep	A	128
14 | ep	B	1
15 | ep	B	2
16 | ep	B	4
17 | ep	B	8
18 | ep	B	16
19 | ep	B	32
20 | ep	B	64
21 | ep	B	128
22 | ep	C	1
23 | ep	C	2
24 | ep	C	4
25 | ep	C	8
26 | ep	C	16
27 | ep	C	32
28 | ep	C	64
29 | ep	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.ft:
--------------------------------------------------------------------------------
 1 | ft	S	1
 2 | ft	S	2
 3 | ft	S	4
 4 | ft	S	8
 5 | ft	S	16
 6 | ft	A	1
 7 | ft	A	2
 8 | ft	A	4
 9 | ft	A	8
10 | ft	A	16
11 | ft	A	32
12 | ft	A	64
13 | ft	A	128
14 | ft	B	1
15 | ft	B	2
16 | ft	B	4
17 | ft	B	8
18 | ft	B	16
19 | ft	B	32
20 | ft	B	64
21 | ft	B	128
22 | ft	C	1
23 | ft	C	2
24 | ft	C	4
25 | ft	C	8
26 | ft	C	16
27 | ft	C	32
28 | ft	C	64
29 | ft	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.is:
--------------------------------------------------------------------------------
 1 | is	S	1
 2 | is	S	2
 3 | is	S	4
 4 | is	S	8
 5 | is	S	16
 6 | is	A	1
 7 | is	A	2
 8 | is	A	4
 9 | is	A	8
10 | is	A	16
11 | is	A	32
12 | is	A	64
13 | is	A	128
14 | is	B	1
15 | is	B	2
16 | is	B	4
17 | is	B	8
18 | is	B	16
19 | is	B	32
20 | is	B	64
21 | is	B	128
22 | is	C	1
23 | is	C	2
24 | is	C	4
25 | is	C	8
26 | is	C	16
27 | is	C	32
28 | is	C	64
29 | is	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.lu:
--------------------------------------------------------------------------------
 1 | lu	S	1
 2 | lu	S	2
 3 | lu	S	4
 4 | lu	S	8
 5 | lu	S	16
 6 | lu	A	1
 7 | lu	A	2
 8 | lu	A	4
 9 | lu	A	8
10 | lu	A	16
11 | lu	A	32
12 | lu	A	64
13 | lu	A	128
14 | lu	B	1
15 | lu	B	2
16 | lu	B	4
17 | lu	B	8
18 | lu	B	16
19 | lu	B	32
20 | lu	B	64
21 | lu	B	128
22 | lu	C	1
23 | lu	C	2
24 | lu	C	4
25 | lu	C	8
26 | lu	C	16
27 | lu	C	32
28 | lu	C	64
29 | lu	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.mg:
--------------------------------------------------------------------------------
 1 | mg	S	1
 2 | mg	S	2
 3 | mg	S	4
 4 | mg	S	8
 5 | mg	S	16
 6 | mg	A	1
 7 | mg	A	2
 8 | mg	A	4
 9 | mg	A	8
10 | mg	A	16
11 | mg	A	32
12 | mg	A	64
13 | mg	A	128
14 | mg	B	1
15 | mg	B	2
16 | mg	B	4
17 | mg	B	8
18 | mg	B	16
19 | mg	B	32
20 | mg	B	64
21 | mg	B	128
22 | mg	C	1
23 | mg	C	2
24 | mg	C	4
25 | mg	C	8
26 | mg	C	16
27 | mg	C	32
28 | mg	C	64
29 | mg	C	128
30 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.small:
--------------------------------------------------------------------------------
1 | bt	S	1
2 | cg	S	1
3 | ep	S	1
4 | ft	S	1
5 | is	S	1
6 | lu	S	1
7 | mg	S	1
8 | sp	S	1
9 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/NAS.samples/suite.def.sp:
--------------------------------------------------------------------------------
 1 | sp	S	1
 2 | sp	S	4
 3 | sp	S	9
 4 | sp	S	16
 5 | sp	A	1
 6 | sp	A	4
 7 | sp	A	9
 8 | sp	A	16
 9 | sp	A	25
10 | sp	A	36
11 | sp	A	49
12 | sp	A	64
13 | sp	A	81
14 | sp	A	100
15 | sp	A	121
16 | sp	B	1
17 | sp	B	4
18 | sp	B	9
19 | sp	B	16
20 | sp	B	25
21 | sp	B	36
22 | sp	B	49
23 | sp	B	64
24 | sp	B	81
25 | sp	B	100
26 | sp	B	121
27 | sp	C	1
28 | sp	C	4
29 | sp	C	9
30 | sp	C	16
31 | sp	C	25
32 | sp	C	36
33 | sp	C	49
34 | sp	C	64
35 | sp	C	81
36 | sp	C	100
37 | sp	C	121
38 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/make.dummy:
--------------------------------------------------------------------------------
1 | FMPI_LIB  = -L../MPI_dummy -lmpi
2 | FMPI_INC  = -I../MPI_dummy
3 | CMPI_LIB  = -L../MPI_dummy -lmpi
4 | CMPI_INC  = -I../MPI_dummy
5 | default:: ${PROGRAM} libmpi.a
6 | libmpi.a: 
7 | 	cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC)
8 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/suite.def:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name, class, and number
 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 
 7 | # "lu", and "dt". 
 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E"
 9 | # (except that no classes C, D and E for DT, and no class E for IS).
10 | # The number of nodes must be a legal number for a particular
11 | # benchmark. The utility which parses this file is primitive, so
12 | # formatting is inflexible. Separate name/class/number by tabs. 
13 | # Comments start with "#" as the first character on a line. 
14 | # No blank lines. 
15 | # The following example builds 1 processor sample sizes of all benchmarks. 
16 | is	S	1
17 | is	S	2
18 | is	S	4
19 | is	S	8
20 | is	S	16
21 | is	S	32
22 | is	S	48
23 | is	S	32
24 | is	S	32
25 | is	S	32
26 | 
27 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/config/suite.def.template:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name, class, and number
 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 
 7 | # "lu", and "dt". 
 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E"
 9 | # (except that no classes C, D and E for DT, and no class E for IS).
10 | # The number of nodes must be a legal number for a particular
11 | # benchmark. The utility which parses this file is primitive, so
12 | # formatting is inflexible. Separate name/class/number by tabs. 
13 | # Comments start with "#" as the first character on a line. 
14 | # No blank lines. 
15 | # The following example builds 1 processor sample sizes of all benchmarks. 
16 | ft	S	1
17 | mg	S	1
18 | sp	S	1
19 | lu	S	1
20 | bt	S	1
21 | is	S	1
22 | ep	S	1
23 | cg	S	1
24 | dt	S	1
25 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/sys/.gitignore:
--------------------------------------------------------------------------------
1 | setparams
2 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/sys/Makefile:
--------------------------------------------------------------------------------
 1 | include ../config/make.def
 2 | 
 3 | # Note that COMPILE is also defined in make.common and should
 4 | # be the same. We can't include make.common because it has a lot
 5 | # of other garbage. LINK is not defined in make.common because
 6 | # ${MPI_LIB} needs to go at the end of the line. 
 7 | FCOMPILE = $(MPIF77) -c $(FMPI_INC) $(FFLAGS)
 8 | 
 9 | all: setparams 
10 | 
11 | # setparams creates an npbparam.h file for each benchmark 
12 | # configuration. npbparams.h also contains info about how a benchmark
13 | # was compiled and linked
14 | 
15 | setparams: setparams.c ../config/make.def
16 | 	$(CC) ${CONVERTFLAG} -o setparams setparams.c
17 | 
18 | 
19 | clean: 
20 | 	-rm -f setparams setparams.h npbparams.h
21 | 	-rm -f *~ *.o
22 | 
23 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/sys/print_header:
--------------------------------------------------------------------------------
1 | echo '   ========================================='
2 | echo '   =      NAS Parallel Benchmarks 3.3      ='
3 | echo '   =      MPI/F77/C                        ='
4 | echo '   ========================================='
5 | echo ''
6 | 


--------------------------------------------------------------------------------
/applications/NPB/MPI/sys/suite.awk:
--------------------------------------------------------------------------------
 1 | BEGIN { SMAKE = "make" } {
 2 |   if ($1 !~ /^#/ &&  NF > 2) {
 3 |     printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE;
 4 |     printf "%s CLASS=%s NPROCS=%s", SMAKE, $2, $3;
 5 |     if ( NF > 3 ) {
 6 |       if ( $4 ~ /^vec/ ||  $4 ~ /^VEC/ ) {
 7 |         printf " VERSION=%s", $4;
 8 |         if ( NF > 4 ) {
 9 |           printf " SUBTYPE=%s", $5;
10 |         }
11 |       } else {
12 |         printf " SUBTYPE=%s", $4;
13 |         if ( NF > 4 ) {
14 |           printf " VERSION=%s", $5;
15 |         }
16 |       }
17 |     }
18 |     printf "; cd ..\n";
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/applications/NPB/NPB3.3-HPF.README:
--------------------------------------------------------------------------------
1 | The HPF version of NPB is not included in this distribution.
2 | Please download it from NPB3.0 instead.
3 | 
4 | http://www.nas.nasa.gov/Software/NPB
5 | 


--------------------------------------------------------------------------------
/applications/NPB/NPB3.3-JAV.README:
--------------------------------------------------------------------------------
1 | The Java version of NPB is not included in this distribution.
2 | Please download it from NPB3.0 instead.
3 | 
4 | http://www.nas.nasa.gov/Software/NPB
5 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/add.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  add
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     addition of update to the vector u
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       integer i, j, k, m
16 | 
17 |       if (timeron) call timer_start(t_add)
18 | !$omp parallel do default(shared) private(i,j,k,m)
19 |       do     k = 1, grid_points(3)-2
20 |          do     j = 1, grid_points(2)-2
21 |             do     i = 1, grid_points(1)-2
22 |                do    m = 1, 5
23 |                   u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k)
24 |                enddo
25 |             enddo
26 |          enddo
27 |       enddo
28 |       if (timeron) call timer_stop(t_add)
29 | 
30 |       return
31 |       end
32 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/adi.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  adi
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       call compute_rhs
10 | 
11 |       call x_solve
12 | 
13 |       call y_solve
14 | 
15 |       call z_solve
16 | 
17 |       call add
18 | 
19 |       return
20 |       end
21 | 
22 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/exact_solution.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine exact_solution(xi,eta,zeta,dtemp)
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     this function returns the exact solution at point xi, eta, zeta  
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       double precision  xi, eta, zeta, dtemp(5)
16 |       integer m
17 | 
18 |       do m = 1, 5
19 |          dtemp(m) =  ce(m,1) +
20 |      >     xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
21 |      >     eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
22 |      >     zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
23 |      >     zeta*ce(m,13))))
24 |       enddo
25 | 
26 |       return
27 |       end
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/inputbt.data.sample:
--------------------------------------------------------------------------------
1 | 60       number of time steps
2 | 0.01d0   dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
3 | 12 12 12
4 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/work_lhs.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  work_lhs.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | c
 9 |       double precision fjac(5, 5,    0:problem_size),
10 |      >                 njac(5, 5,    0:problem_size),
11 |      >                 lhs (5, 5, 3, 0:problem_size),
12 |      >                 tmp1, tmp2, tmp3
13 |       common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3
14 | !$omp threadprivate (/work_lhs/)
15 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/BT/work_lhs_vec.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  work_lhs_vec.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | c
 9 |       double precision fjac(5, 5,    0:problem_size, 0:problem_size),
10 |      >                 njac(5, 5,    0:problem_size, 0:problem_size),
11 |      >                 lhs (5, 5, 3, 0:problem_size, 0:problem_size),
12 |      >                 tmp1, tmp2, tmp3
13 |       common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3
14 | !$omp threadprivate (/work_lhs/)
15 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/CG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=cg
 3 | BENCHMARKU=CG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = cg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | cg.o:		cg.f  globals.h npbparams.h
16 | 	${FCOMPILE} cg.f
17 | 
18 | run:
19 | 	OMP_NUM_THREADS=$(OMP_NUM_THREADS) ../bin/cg.${PROBLEM}.x
20 | 
21 | clean:
22 | 	- rm -f *.o *~ 
23 | 	- rm -f npbparams.h core
24 | 	- if [ -d rii_files ]; then rm -r rii_files; fi
25 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/CG/README.carefully:
--------------------------------------------------------------------------------
 1 | Note: please observe that in the routine conj_grad three 
 2 | implementations of the sparse matrix-vector multiply have
 3 | been supplied.  The default matrix-vector multiply is not
 4 | loop unrolled.  The alternate implementations are unrolled
 5 | to a depth of 2 and unrolled to a depth of 8.  Please
 6 | experiment with these to find the fastest for your particular
 7 | architecture.  If reporting timing results, any of these three may
 8 | be used without penalty.
 9 | 
10 | Performance examples:
11 | The non-unrolled version of the multiply is actually (slightly: 
12 | maybe %5) faster on the sp2-66MHz-WN on 16 nodes than is the 
13 | unrolled-by-2 version below.   On the Cray t3d, the reverse is true, 
14 | i.e., the unrolled-by-two version is some 10% faster.  
15 | The unrolled-by-8 version below is significantly faster
16 | on the Cray t3d - overall speed of code is 1.5 times faster.
17 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/CG/runexps.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require '../../../../experiment_utils'
 3 | 
 4 | 
 5 | db = "cg.db"
 6 | table = :omp_n02
 7 | 
 8 | 
 9 | cmd = "make run PROBLEM=%{problem} OMP_NUM_THREADS=%{nthreads}"
10 | 
11 | params = {
12 |     trial: [1,2,3],
13 |     problem: ['A','B','C','D'],
14 |     nthreads:  [32],#[16,24,32,48,64], 
15 | }
16 | 
17 | parser = lambda{ |cmdout|
18 |     records = {}
19 | 
20 |     cgreg = /(?<key>[a-zA-Z\s\/]+)\s+=\s+(?<value>.+)/
21 |     cmdout.scan(cgreg).each { |k,v|
22 |         k = k.gsub(/\s+/,"_").gsub(/\//,"_per_")
23 |         if v.match(/\d+\.\d+/) then 
24 |             v = v.to_f
25 |         elsif v.match(/\d+/) then
26 |             v = v.to_i
27 |         end
28 |         records[k.to_sym] = v
29 |     }
30 | 
31 |     records
32 | }
33 | 
34 | run_experiments(cmd, params, db, table, &parser)
35 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/DC/ADC.par:
--------------------------------------------------------------------------------
1 | attrNum=12
2 | measuresNum=1
3 | tuplesNum=100
4 | INVERSE_ENDIAN=0
5 | fileName=ADC
6 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/DC/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=dc
 3 | BENCHMARKU=DC
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = adc.o dc.o extbuild.o rbt.o jobcntl.o \
10 | 	${COMMON}/c_print_results.o  \
11 | 	${COMMON}/c_timers.o ${COMMON}/c_wtime.o
12 | 
13 | 
14 | # npbparams.h is provided for backward compatibility with NPB compilation
15 | # header.h: npbparams.h
16 | 
17 | ${PROGRAM}: config ${OBJS} 
18 | 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB}
19 | 
20 | .c.o:
21 | 	${CCOMPILE} $<
22 | 
23 | adc.o:      adc.c npbparams.h
24 | dc.o:       dc.c adcc.h adc.h macrodef.h npbparams.h
25 | extbuild.o: extbuild.c adcc.h adc.h macrodef.h npbparams.h
26 | rbt.o:      rbt.c adcc.h adc.h rbt.h macrodef.h npbparams.h
27 | jobcntl.o:  jobcntl.c adcc.h adc.h macrodef.h npbparams.h
28 | 
29 | clean:
30 | 	- rm -f *.o 
31 | 	- rm -f npbparams.h core
32 | 	- rm -f {../,}ADC.{logf,view,dat,viewsz,groupby,chunks}.* 
33 | 
34 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/DC/macrodef.h:
--------------------------------------------------------------------------------
 1 | #define PutErrMsg(msg) {fprintf(stderr," %s, errno = %d\n", msg, errno);}
 2 | 
 3 | #define WriteToFile(ptr,size,nitems,stream,logf) if( fwrite(ptr,size,nitems,stream) != nitems )\
 4 |        {\
 5 |         fprintf(stderr,"\n Write error from WriteToFile()\n"); return ADC_WRITE_FAILED; \
 6 |        }
 7 | 
 8 | #ifdef WINNT
 9 | #define FSEEK(stream,offset,whence)  fseek(stream, (long)offset,whence);
10 | #else
11 | #define FSEEK(stream,offset,whence)  fseek(stream,offset,whence); 
12 | #endif
13 | 
14 | #define GetRecSize(nd,nm) (DIM_FSZ*nd+MSR_FSZ*nm)
15 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/DC/rbt.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ADC_PARVIEW_TREE_DEF_H_
 2 | #define _ADC_PARVIEW_TREE_DEF_H_
 3 | 
 4 | #define MAX_TREE_HEIGHT	64
 5 | enum{BLACK,RED};
 6 | 
 7 | typedef struct treeNode{
 8 |   struct treeNode *left;
 9 |   struct treeNode *right;
10 |   uint32 clr;
11 |   int64 nodeMemPool[1];
12 | } treeNode;
13 | 
14 | typedef struct RBTree{
15 |   treeNode root;	
16 |   treeNode * mp;
17 |   uint32 count;       
18 |   uint32 treeNodeSize;
19 |   uint32 nodeDataSize;
20 |   uint32 memoryLimit; 
21 |   uint32 memaddr;
22 |   uint32 memoryIsFull;
23 |   uint32 freeNodeCounter;
24 |   uint32 nNodesLimit;
25 |   uint32 nd;
26 |   uint32 nm;
27 |   uint32   *drcts;
28 |   treeNode **nodes;
29 |   unsigned char * memPool;
30 | } RBTree;
31 | 
32 | #define NEW_TREE_NODE(node_ptr,memPool,memaddr,treeNodeSize, \
33 |  freeNodeCounter,memoryIsFull) \
34 |  node_ptr=(struct treeNode*)(memPool+memaddr); \
35 |  memaddr+=treeNodeSize; \
36 |  (freeNodeCounter)--; \
37 |  if( freeNodeCounter == 0 ) { \
38 |      memoryIsFull = 1; \
39 |  }
40 | 
41 | int32 TreeInsert(RBTree *tree, uint32 *attrs);
42 | 
43 | #endif /* _ADC_PARVIEW_TREE_DEF_H_ */
44 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/EP/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ep
 3 | BENCHMARKU=EP
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o \
 8 |        ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | 
16 | ep.o:		ep.f npbparams.h
17 | 	${FCOMPILE} ep.f
18 | 
19 | clean:
20 | 	- rm -f *.o *~ 
21 | 	- rm -f npbparams.h core
22 | 	- if [ -d rii_files ]; then rm -r rii_files; fi
23 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/EP/README:
--------------------------------------------------------------------------------
1 | This code implements the random-number generator described in the
2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007.
3 | The code is "embarrassingly" parallel in that no communication is
4 | required for the generation of the random numbers itself. 
5 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/FT/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ft
 3 | BENCHMARKU=FT
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = ft.o ${COMMON}/${RAND}.o ${COMMON}/print_results.o \
10 |        ${COMMON}/timers.o ${COMMON}/wtime.o
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | 
16 | 
17 | .f.o:
18 | 	${FCOMPILE} $<
19 | 
20 | ft.o:             ft.f  global.h npbparams.h
21 | 
22 | clean:
23 | 	- rm -f *.o *~ mputil*
24 | 	- rm -f ft npbparams.h core
25 | 	- if [ -d rii_files ]; then rm -r rii_files; fi
26 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/FT/README:
--------------------------------------------------------------------------------
1 | This code implements the time integration of a three-dimensional
2 | partial differential equation using the Fast Fourier Transform.
3 | Some of the dimension statements are not F77 conforming and will
4 | not work using the g77 compiler. All dimension statements,
5 | however, are legal F90.


--------------------------------------------------------------------------------
/applications/NPB/OMP/FT/inputft.data.sample:
--------------------------------------------------------------------------------
1 | 6   ! number of iterations
2 | 2   ! layout type. 0 = 0d, 1 = 1d, 2 = 2d
3 | 2 4 ! processor layout. 0d must be "1 1"; 1d must be "1 N"
4 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/IS/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=is
 3 | BENCHMARKU=IS
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = is.o \
10 |        ${COMMON}/c_print_results.o \
11 |        ${COMMON}/c_timers.o \
12 |        ${COMMON}/c_wtime.o
13 | 
14 | 
15 | ${PROGRAM}: config ${OBJS}
16 | 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB}
17 | 
18 | .c.o:
19 | 	${CCOMPILE} $<
20 | 
21 | is.o:             is.c  npbparams.h
22 | 
23 | 
24 | clean:
25 | 	- rm -f *.o *~ mputil*
26 | 	- rm -f npbparams.h core
27 | 	- if [ -d rii_files ]; then rm -r rii_files; fi
28 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/LU/inputlu.data.sample:
--------------------------------------------------------------------------------
 1 | c
 2 | c***controls printing of the progress of iterations: ipr    inorm
 3 |                                                       1      250
 4 | c
 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax
 6 |                                                              250
 7 | c
 8 | c***magnitude of the time step: dt 
 9 |                                2.0e+00
10 | c
11 | c***relaxation factor for SSOR iterations: omega
12 |                                             1.2
13 | c
14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5
15 |                              1.0e-08   1.0e-08   1.0e-08  1.0e-08  1.0e-08 
16 | c
17 | c***number of grid points in xi and eta and zeta directions: nx   ny   nz
18 |                                                             64  64  64
19 | c
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/MG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=mg
 3 | BENCHMARKU=MG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = mg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | mg.o:		mg.f globals.h npbparams.h
16 | 	${FCOMPILE} mg.f
17 | 
18 | clean:
19 | 	- rm -f *.o *~ 
20 | 	- rm -f npbparams.h core
21 | 	- if [ -d rii_files ]; then rm -r rii_files; fi
22 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/MG/mg.input.sample:
--------------------------------------------------------------------------------
1 |  8 = top level
2 |  256 256 256 = nx ny nz
3 |  20 = nit
4 |  0 0 0 0 0 0 0 0 = debug_vec
5 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/SP/add.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  add
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 | c---------------------------------------------------------------------
11 | c addition of update to the vector u
12 | c---------------------------------------------------------------------
13 | 
14 |        include 'header.h'
15 | 
16 |        integer i,j,k,m
17 | 
18 |        if (timeron) call timer_start(t_add)
19 | !$omp parallel do default(shared) private(i,j,k,m)
20 |        do k = 1, nz2
21 |           do j = 1, ny2
22 |              do i = 1, nx2
23 |                 do m = 1, 5
24 |                    u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k)
25 |                 end do
26 |              end do
27 |           end do
28 |        end do
29 |        if (timeron) call timer_stop(t_add)
30 | 
31 |        return
32 |        end
33 | 
34 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/SP/adi.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  adi
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 |        call compute_rhs
11 | 
12 |        call txinvr
13 | 
14 |        call x_solve
15 | 
16 |        call y_solve
17 | 
18 |        call z_solve
19 | 
20 |        call add
21 | 
22 |        return
23 |        end
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/SP/inputsp.data.sample:
--------------------------------------------------------------------------------
1 | 400       number of time steps
2 | 0.0015d0  dt for class A = 0.0015d0. class B = 0.001d0  class C = 0.00067d0
3 | 64 64 64
4 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/common/wtime.c:
--------------------------------------------------------------------------------
 1 | #include "wtime.h"
 2 | #include <time.h>
 3 | #ifndef DOS
 4 | #include <sys/time.h>
 5 | #endif
 6 | 
 7 | void wtime(double *t)
 8 | {
 9 |    /* a generic timer */
10 |    static int sec = -1;
11 |    struct timeval tv;
12 |    gettimeofday(&tv, (void *)0);
13 |    if (sec < 0) sec = tv.tv_sec;
14 |    *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/common/wtime.h:
--------------------------------------------------------------------------------
 1 | /* C/Fortran interface is different on different machines. 
 2 |  * You may need to tweak this.
 3 |  */
 4 | 
 5 | 
 6 | #if defined(IBM)
 7 | #define wtime wtime
 8 | #elif defined(CRAY)
 9 | #define wtime WTIME
10 | #else
11 | #define wtime wtime_
12 | #endif
13 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/README:
--------------------------------------------------------------------------------
1 | This directory contains examples of make.def files that were used 
2 | by the NPB team in testing the benchmarks on different platforms. 
3 | They can be used as starting points for make.def files for your 
4 | own platform, but you may need to taylor them for best performance 
5 | on your installation. A clean template can be found in directory 
6 | `config'.
7 | Some examples of suite.def files are also provided.


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.bt:
--------------------------------------------------------------------------------
1 | bt	S
2 | bt	W
3 | bt	A
4 | bt	B
5 | bt	C
6 | bt	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.cg:
--------------------------------------------------------------------------------
1 | cg	S
2 | cg	W
3 | cg	A
4 | cg	B
5 | cg	C
6 | cg	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.ep:
--------------------------------------------------------------------------------
1 | ep	S
2 | ep	W
3 | ep	A
4 | ep	B
5 | ep	C
6 | ep	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.ft:
--------------------------------------------------------------------------------
1 | ft	S
2 | ft	W
3 | ft	A
4 | ft	B
5 | ft	C
6 | ft	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.is:
--------------------------------------------------------------------------------
1 | is	S
2 | is	W
3 | is	A
4 | is	B
5 | is	C
6 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.lu:
--------------------------------------------------------------------------------
1 | lu	S
2 | lu	W
3 | lu	A
4 | lu	B
5 | lu	C
6 | lu	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.mg:
--------------------------------------------------------------------------------
1 | mg	S
2 | mg	W
3 | mg	A
4 | mg	B
5 | mg	C
6 | mg	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/NAS.samples/suite.def.sp:
--------------------------------------------------------------------------------
1 | sp	S
2 | sp	W
3 | sp	A
4 | sp	B
5 | sp	C
6 | sp	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/config/suite.def.template:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name and the class.
 6 | # The name is one of "cg", "is", "dc", "ep", mg", "ft", "sp",
 7 | #  "bt", "lu", and "ua". 
 8 | # The class is one of "S", "W", "A" through "E" 
 9 | # (except that no classes C,D,E for DC and no class E for IS and UA).
10 | # No blank lines. 
11 | # The following example builds sample sizes of all benchmarks. 
12 | ft	S
13 | mg	S
14 | sp	S
15 | lu	S
16 | bt	S
17 | is	S
18 | ep	S
19 | cg	S
20 | ua	S
21 | dc      S
22 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/sys/Makefile:
--------------------------------------------------------------------------------
 1 | UCC = cc
 2 | include ../config/make.def
 3 | 
 4 | # Note that COMPILE is also defined in make.common and should
 5 | # be the same. We can't include make.common because it has a lot
 6 | # of other garbage. 
 7 | FCOMPILE = $(F77) -c $(F_INC) $(FFLAGS)
 8 | 
 9 | all: setparams 
10 | 
11 | # setparams creates an npbparam.h file for each benchmark 
12 | # configuration. npbparams.h also contains info about how a benchmark
13 | # was compiled and linked
14 | 
15 | setparams: setparams.c ../config/make.def
16 | 	$(UCC) ${CONVERTFLAG} -o setparams setparams.c
17 | 
18 | 
19 | clean: 
20 | 	-rm -f setparams setparams.h npbparams.h
21 | 	-rm -f *~ *.o
22 | 
23 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/sys/print_header:
--------------------------------------------------------------------------------
1 | echo '   ============================================'
2 | echo '   =      NAS PARALLEL BENCHMARKS 3.3         ='
3 | echo '   =      OpenMP Versions                     ='
4 | echo '   =      F77/C                               ='
5 | echo '   ============================================'
6 | echo ''
7 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/sys/print_instructions:
--------------------------------------------------------------------------------
 1 | echo ''
 2 | echo '   To make a NAS benchmark type '
 3 | echo ''
 4 | echo '         make <benchmark-name> CLASS=<class>'
 5 | echo ''
 6 | echo '   where <benchmark-name> is "bt", "cg", "ep", "ft", "is", "lu",'
 7 | echo '                             "mg", "sp", "ua", or "dc"'
 8 | echo '         <class>          is "S", "W", "A", "B", "C" or "D"'
 9 | echo ''
10 | echo '   To make a set of benchmarks, create the file config/suite.def'
11 | echo '   according to the instructions in config/suite.def.template and type'
12 | echo ''
13 | echo '         make suite'
14 | echo ''
15 | echo ' ***************************************************************'
16 | echo ' * Remember to edit the file config/make.def for site specific *'
17 | echo ' * information as described in the README file                 *'
18 | echo ' ***************************************************************'
19 | 
20 | 


--------------------------------------------------------------------------------
/applications/NPB/OMP/sys/suite.awk:
--------------------------------------------------------------------------------
 1 | BEGIN { SMAKE = "make" } {
 2 |   if ($1 !~ /^#/ &&  NF > 1) {
 3 |     printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE;
 4 |     printf "%s CLASS=%s", SMAKE, $2;
 5 |     if (NF > 2) {
 6 |       printf " VERSION=%s", $3;
 7 |     }
 8 |     printf "; cd ..\n";
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/add.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  add
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     addition of update to the vector u
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       integer i, j, k, m
16 | 
17 |       if (timeron) call timer_start(t_add)
18 |       do     k = 1, grid_points(3)-2
19 |          do     j = 1, grid_points(2)-2
20 |             do     i = 1, grid_points(1)-2
21 |                do    m = 1, 5
22 |                   u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k)
23 |                enddo
24 |             enddo
25 |          enddo
26 |       enddo
27 |       if (timeron) call timer_stop(t_add)
28 | 
29 |       return
30 |       end
31 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/adi.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine  adi
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 |       call compute_rhs
10 | 
11 |       call x_solve
12 | 
13 |       call y_solve
14 | 
15 |       call z_solve
16 | 
17 |       call add
18 | 
19 |       return
20 |       end
21 | 
22 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/exact_solution.f:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | 
 4 |       subroutine exact_solution(xi,eta,zeta,dtemp)
 5 | 
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | 
 9 | c---------------------------------------------------------------------
10 | c     this function returns the exact solution at point xi, eta, zeta  
11 | c---------------------------------------------------------------------
12 | 
13 |       include 'header.h'
14 | 
15 |       double precision  xi, eta, zeta, dtemp(5)
16 |       integer m
17 | 
18 |       do m = 1, 5
19 |          dtemp(m) =  ce(m,1) +
20 |      >     xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
21 |      >     eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
22 |      >     zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
23 |      >     zeta*ce(m,13))))
24 |       enddo
25 | 
26 |       return
27 |       end
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/inputbt.data.sample:
--------------------------------------------------------------------------------
1 | 60       number of time steps
2 | 0.01d0   dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
3 | 12 12 12
4 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/work_lhs.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  header.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | c
 9 |       double precision fjac(5, 5,    0:problem_size),
10 |      >                 njac(5, 5,    0:problem_size),
11 |      >                 lhs (5, 5, 3, 0:problem_size),
12 |      >                 tmp1, tmp2, tmp3
13 |       common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3
14 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/BT/work_lhs_vec.h:
--------------------------------------------------------------------------------
 1 | c---------------------------------------------------------------------
 2 | c---------------------------------------------------------------------
 3 | c
 4 | c  header.h
 5 | c
 6 | c---------------------------------------------------------------------
 7 | c---------------------------------------------------------------------
 8 | c
 9 |       double precision fjac(5, 5,    0:problem_size, 0:problem_size),
10 |      >                 njac(5, 5,    0:problem_size, 0:problem_size),
11 |      >                 lhs (5, 5, 3, 0:problem_size, 0:problem_size),
12 |      >                 tmp1, tmp2, tmp3
13 |       common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3
14 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/CG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=cg
 3 | BENCHMARKU=CG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = cg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | cg.o:		cg.f  globals.h npbparams.h
16 | 	${FCOMPILE} cg.f
17 | 
18 | clean:
19 | 	- rm -f *.o *~ 
20 | 	- rm -f npbparams.h core
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/CG/README.carefully:
--------------------------------------------------------------------------------
 1 | Note: please observe that in the routine conj_grad three 
 2 | implementations of the sparse matrix-vector multiply have
 3 | been supplied.  The default matrix-vector multiply is not
 4 | loop unrolled.  The alternate implementations are unrolled
 5 | to a depth of 2 and unrolled to a depth of 8.  Please
 6 | experiment with these to find the fastest for your particular
 7 | architecture.  If reporting timing results, any of these three may
 8 | be used without penalty.
 9 | 
10 | Performance examples:
11 | The non-unrolled version of the multiply is actually (slightly: 
12 | maybe %5) faster on the sp2-66MHz-WN on 16 nodes than is the 
13 | unrolled-by-2 version below.   On the Cray t3d, the reverse is true, 
14 | i.e., the unrolled-by-two version is some 10% faster.  
15 | The unrolled-by-8 version below is significantly faster
16 | on the Cray t3d - overall speed of code is 1.5 times faster.
17 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/DC/ADC.par:
--------------------------------------------------------------------------------
1 | attrNum=12
2 | measuresNum=1
3 | tuplesNum=100
4 | INVERSE_ENDIAN=0
5 | fileName=ADC
6 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/DC/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=dc
 3 | BENCHMARKU=DC
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = adc.o dc.o extbuild.o rbt.o jobcntl.o \
10 | 	${COMMON}/c_print_results.o  \
11 | 	${COMMON}/c_timers.o ${COMMON}/c_wtime.o
12 | 
13 | 
14 | # npbparams.h is provided for backward compatibility with NPB compilation
15 | # header.h: npbparams.h
16 | 
17 | ${PROGRAM}: config ${OBJS} 
18 | 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB}
19 | 
20 | .c.o:
21 | 	$(CCOMPILE) $<
22 | 
23 | adc.o:      adc.c npbparams.h
24 | dc.o:       dc.c adcc.h adc.h macrodef.h npbparams.h
25 | extbuild.o: extbuild.c adcc.h adc.h macrodef.h npbparams.h
26 | rbt.o:      rbt.c adcc.h adc.h rbt.h macrodef.h npbparams.h
27 | jobcntl.o:  jobcntl.c adcc.h adc.h macrodef.h npbparams.h
28 | 
29 | clean:
30 | 	- rm -f *.o 
31 | 	- rm -f npbparams.h core
32 | 	- rm -f {../,}ADC.{logf,view,dat,viewsz,groupby,chunks}.* 
33 | 
34 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/DC/macrodef.h:
--------------------------------------------------------------------------------
 1 | #define PutErrMsg(msg) {fprintf(stderr," %s, errno = %d\n", msg, errno);}
 2 | 
 3 | #define WriteToFile(ptr,size,nitems,stream,logf) if( fwrite(ptr,size,nitems,stream) != nitems )\
 4 |        {\
 5 |         fprintf(stderr,"\n Write error from WriteToFile()\n"); return ADC_WRITE_FAILED; \
 6 |        }
 7 | 
 8 | #ifdef WINNT
 9 | #define FSEEK(stream,offset,whence)  fseek(stream, (long)offset,whence);
10 | #else
11 | #define FSEEK(stream,offset,whence)  fseek(stream,offset,whence); 
12 | #endif
13 | 
14 | #define GetRecSize(nd,nm) (DIM_FSZ*nd+MSR_FSZ*nm)
15 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/DC/rbt.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ADC_PARVIEW_TREE_DEF_H_
 2 | #define _ADC_PARVIEW_TREE_DEF_H_
 3 | 
 4 | #define MAX_TREE_HEIGHT	64
 5 | enum{BLACK,RED};
 6 | 
 7 | typedef struct treeNode{
 8 |   struct treeNode *left;
 9 |   struct treeNode *right;
10 |   uint32 clr;
11 |   int64 nodeMemPool[1];
12 | } treeNode;
13 | 
14 | typedef struct RBTree{
15 |   treeNode root;	
16 |   treeNode * mp;
17 |   uint32 count;       
18 |   uint32 treeNodeSize;
19 |   uint32 nodeDataSize;
20 |   uint32 memoryLimit; 
21 |   uint32 memaddr;
22 |   uint32 memoryIsFull;
23 |   uint32 freeNodeCounter;
24 |   uint32 nNodesLimit;
25 |   uint32 nd;
26 |   uint32 nm;
27 |   uint32   *drcts;
28 |   treeNode **nodes;
29 |   unsigned char * memPool;
30 | } RBTree;
31 | 
32 | #define NEW_TREE_NODE(node_ptr,memPool,memaddr,treeNodeSize, \
33 |  freeNodeCounter,memoryIsFull) \
34 |  node_ptr=(struct treeNode*)(memPool+memaddr); \
35 |  memaddr+=treeNodeSize; \
36 |  (freeNodeCounter)--; \
37 |  if( freeNodeCounter == 0 ) { \
38 |      memoryIsFull = 1; \
39 |  }
40 | 
41 | int32 TreeInsert(RBTree *tree, uint32 *attrs);
42 | 
43 | #endif /* _ADC_PARVIEW_TREE_DEF_H_ */
44 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/EP/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ep
 3 | BENCHMARKU=EP
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o \
 8 |        ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | 
16 | ep.o:		ep.f npbparams.h
17 | 	${FCOMPILE} ep.f
18 | 
19 | clean:
20 | 	- rm -f *.o *~ 
21 | 	- rm -f npbparams.h core
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/EP/README:
--------------------------------------------------------------------------------
1 | This code implements the random-number generator described in the
2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007.
3 | The code is "embarrassingly" parallel in that no communication is
4 | required for the generation of the random numbers itself. 
5 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/FT/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=ft
 3 | BENCHMARKU=FT
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = appft.o auxfnct.o fft3d.o mainft.o verify.o \
10 |        ${COMMON}/${RAND}.o ${COMMON}/print_results.o \
11 |        ${COMMON}/timers.o ${COMMON}/wtime.o
12 | 
13 | ${PROGRAM}: config ${OBJS}
14 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
15 | 
16 | 
17 | 
18 | .f.o:
19 | 	${FCOMPILE} $<
20 | 
21 | appft.o:	appft.f  global.h npbparams.h
22 | auxfnct.o:	auxfnct.f  global.h npbparams.h
23 | fft3d.o:	fft3d.f  global.h npbparams.h
24 | mainft.o:	mainft.f  global.h npbparams.h
25 | verify.o:	verify.f  global.h npbparams.h
26 | 
27 | clean:
28 | 	- rm -f *.o *~ mputil*
29 | 	- rm -f ft npbparams.h core
30 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/IS/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=is
 3 | BENCHMARKU=IS
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | include ../sys/make.common
 8 | 
 9 | OBJS = is.o \
10 |        ${COMMON}/c_print_results.o \
11 |        ${COMMON}/c_timers.o \
12 |        ${COMMON}/c_wtime.o
13 | 
14 | 
15 | ${PROGRAM}: config ${OBJS}
16 | 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB}
17 | 
18 | .c.o:
19 | 	${CCOMPILE} $<
20 | 
21 | is.o:             is.c  npbparams.h
22 | 
23 | 
24 | clean:
25 | 	- rm -f *.o *~ mputil*
26 | 	- rm -f npbparams.h core
27 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/LU/inputlu.data.sample:
--------------------------------------------------------------------------------
 1 | c
 2 | c***controls printing of the progress of iterations: ipr    inorm
 3 |                                                       1      250
 4 | c
 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax
 6 |                                                              250
 7 | c
 8 | c***magnitude of the time step: dt 
 9 |                                2.0e+00
10 | c
11 | c***relaxation factor for SSOR iterations: omega
12 |                                             1.2
13 | c
14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5
15 |                              1.0e-08   1.0e-08   1.0e-08  1.0e-08  1.0e-08 
16 | c
17 | c***number of grid points in xi and eta and zeta directions: nx   ny   nz
18 |                                                             64  64  64
19 | c
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/MG/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=/bin/sh
 2 | BENCHMARK=mg
 3 | BENCHMARKU=MG
 4 | 
 5 | include ../config/make.def
 6 | 
 7 | OBJS = mg.o ${COMMON}/print_results.o  \
 8 |        ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o
 9 | 
10 | include ../sys/make.common
11 | 
12 | ${PROGRAM}: config ${OBJS}
13 | 	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
14 | 
15 | mg.o:		mg.f globals.h npbparams.h
16 | 	${FCOMPILE} mg.f
17 | 
18 | clean:
19 | 	- rm -f *.o *~ 
20 | 	- rm -f npbparams.h core
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/MG/mg.input.sample:
--------------------------------------------------------------------------------
1 |  8 = top level
2 |  256 256 256 = nx ny nz
3 |  20 = nit
4 |  0 0 0 0 0 0 0 0 = debug_vec
5 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/SP/add.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  add
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 | c---------------------------------------------------------------------
11 | c addition of update to the vector u
12 | c---------------------------------------------------------------------
13 | 
14 |        include 'header.h'
15 | 
16 |        integer i,j,k,m
17 | 
18 |        if (timeron) call timer_start(t_add)
19 |        do k = 1, nz2
20 |           do j = 1, ny2
21 |              do i = 1, nx2
22 |                 do m = 1, 5
23 |                    u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k)
24 |                 end do
25 |              end do
26 |           end do
27 |        end do
28 |        if (timeron) call timer_stop(t_add)
29 | 
30 |        return
31 |        end
32 | 
33 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/SP/adi.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |        subroutine  adi
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 |        call compute_rhs
11 | 
12 |        call txinvr
13 | 
14 |        call x_solve
15 | 
16 |        call y_solve
17 | 
18 |        call z_solve
19 | 
20 |        call add
21 | 
22 |        return
23 |        end
24 | 
25 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/SP/exact_solution.f:
--------------------------------------------------------------------------------
 1 | 
 2 | c---------------------------------------------------------------------
 3 | c---------------------------------------------------------------------
 4 | 
 5 |       subroutine exact_solution(xi,eta,zeta,dtemp)
 6 | 
 7 | c---------------------------------------------------------------------
 8 | c---------------------------------------------------------------------
 9 | 
10 | c---------------------------------------------------------------------
11 | c this function returns the exact solution at point xi, eta, zeta  
12 | c---------------------------------------------------------------------
13 | 
14 |        include 'header.h'
15 | 
16 |        double precision  xi, eta, zeta, dtemp(5)
17 |        integer m
18 | 
19 |        do  m = 1, 5
20 |           dtemp(m) =  ce(m,1) +
21 |      >    xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
22 |      >    eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
23 |      >    zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
24 |      >    zeta*ce(m,13))))
25 |        end do
26 | 
27 |        return
28 |        end
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/SP/inputsp.data.sample:
--------------------------------------------------------------------------------
1 | 400       number of time steps
2 | 0.0015d0  dt for class A = 0.0015d0. class B = 0.001d0  class C = 0.00067d0
3 | 64 64 64
4 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/common/wtime.c:
--------------------------------------------------------------------------------
 1 | #include "wtime.h"
 2 | #include <time.h>
 3 | #ifndef DOS
 4 | #include <sys/time.h>
 5 | #endif
 6 | 
 7 | void wtime(double *t)
 8 | {
 9 |   static int sec = -1;
10 |   struct timeval tv;
11 |   gettimeofday(&tv, (void *)0);
12 |   if (sec < 0) sec = tv.tv_sec;
13 |   *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
14 | }
15 | 
16 |     
17 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/common/wtime.h:
--------------------------------------------------------------------------------
 1 | /* C/Fortran interface is different on different machines. 
 2 |  * You may need to tweak this.
 3 |  */
 4 | 
 5 | 
 6 | #if defined(IBM)
 7 | #define wtime wtime
 8 | #elif defined(CRAY)
 9 | #define wtime WTIME
10 | #else
11 | #define wtime wtime_
12 | #endif
13 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/README:
--------------------------------------------------------------------------------
1 | This directory contains examples of make.def files that were used 
2 | by the NPB team in testing the benchmarks on different platforms. 
3 | They can be used as starting points for make.def files for your 
4 | own platform, but you may need to taylor them for best performance 
5 | on your installation. A clean template can be found in directory 
6 | `config'.
7 | Some examples of suite.def files are also provided.


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.bt:
--------------------------------------------------------------------------------
1 | bt	S
2 | bt	W
3 | bt	A
4 | bt	B
5 | bt	C
6 | bt	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.cg:
--------------------------------------------------------------------------------
1 | cg	S
2 | cg	W
3 | cg	A
4 | cg	B
5 | cg	C
6 | cg	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.ep:
--------------------------------------------------------------------------------
1 | ep	S
2 | ep	W
3 | ep	A
4 | ep	B
5 | ep	C
6 | ep	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.ft:
--------------------------------------------------------------------------------
1 | ft	S
2 | ft	W
3 | ft	A
4 | ft	B
5 | ft	C
6 | ft	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.is:
--------------------------------------------------------------------------------
1 | is	S
2 | is	W
3 | is	A
4 | is	B
5 | is	C
6 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.lu:
--------------------------------------------------------------------------------
1 | lu	S
2 | lu	W
3 | lu	A
4 | lu	B
5 | lu	C
6 | lu	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.mg:
--------------------------------------------------------------------------------
1 | mg	S
2 | mg	W
3 | mg	A
4 | mg	B
5 | mg	C
6 | mg	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/NAS.samples/suite.def.sp:
--------------------------------------------------------------------------------
1 | sp	S
2 | sp	W
3 | sp	A
4 | sp	B
5 | sp	C
6 | sp	D
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/config/suite.def.template:
--------------------------------------------------------------------------------
 1 | # config/suite.def
 2 | # This file is used to build several benchmarks with a single command. 
 3 | # Typing "make suite" in the main directory will build all the benchmarks
 4 | # specified in this file. 
 5 | # Each line of this file contains a benchmark name, class.
 6 | # The name is one of "cg", "is", "dc", "ep", mg", "ft", "sp",
 7 | #  "bt", "lu", and "ua". 
 8 | # The class is one of "S", "W", "A", "B", and "C"
 9 | # (classes D and E are defined for a number of benchmarks, but they
10 | #  are likely not practical to run in serial. See README.install).
11 | # No blank lines. 
12 | # The following example builds serial sample sizes of all benchmarks. 
13 | ft	S
14 | mg	S
15 | sp	S
16 | lu	S
17 | bt	S
18 | is	S
19 | ep	S
20 | cg	S
21 | ua	S
22 | dc      S
23 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/sys/Makefile:
--------------------------------------------------------------------------------
 1 | UCC = cc
 2 | include ../config/make.def
 3 | 
 4 | # Note that COMPILE is also defined in make.common and should
 5 | # be the same. We can't include make.common because it has a lot
 6 | # of other garbage. 
 7 | FCOMPILE = $(F77) -c $(F_INC) $(FFLAGS)
 8 | 
 9 | all: setparams 
10 | 
11 | # setparams creates an npbparam.h file for each benchmark 
12 | # configuration. npbparams.h also contains info about how a benchmark
13 | # was compiled and linked
14 | 
15 | setparams: setparams.c ../config/make.def
16 | 	$(UCC) ${CONVERTFLAG} -o setparams setparams.c
17 | 
18 | 
19 | clean: 
20 | 	-rm -f setparams setparams.h npbparams.h
21 | 	-rm -f *~ *.o
22 | 
23 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/sys/print_header:
--------------------------------------------------------------------------------
1 | echo '   ==========================================='
2 | echo '   =      NAS PARALLEL BENCHMARKS 3.3        ='
3 | echo '   =      Serial Versions                    ='
4 | echo '   =      F77/C                              ='
5 | echo '   ==========================================='
6 | echo ''
7 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/sys/print_instructions:
--------------------------------------------------------------------------------
 1 | echo ''
 2 | echo '   To make a NAS benchmark type '
 3 | echo ''
 4 | echo '         make <benchmark-name> CLASS=<class>'
 5 | echo ''
 6 | echo '   where <benchmark-name> is "bt", "cg", "ep", "ft", "is", "lu",'
 7 | echo '                             "lu-hp", "mg", "sp", or "ua"'
 8 | echo '         <class>          is "S", "W", "A", "B", "C" or "D"'
 9 | echo ''
10 | echo '   To make a set of benchmarks, create the file config/suite.def'
11 | echo '   according to the instructions in config/suite.def.template and type'
12 | echo ''
13 | echo '         make suite'
14 | echo ''
15 | echo ' ***************************************************************'
16 | echo ' * Remember to edit the file config/make.def for site specific *'
17 | echo ' * information as described in the README file                 *'
18 | echo ' ***************************************************************'
19 | 
20 | 


--------------------------------------------------------------------------------
/applications/NPB/SERIAL/sys/suite.awk:
--------------------------------------------------------------------------------
 1 | BEGIN { SMAKE = "make" } {
 2 |   if ($1 !~ /^#/ &&  NF > 1) {
 3 |     printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE;
 4 |     printf "%s CLASS=%s", SMAKE, $2;
 5 |     if (NF > 2) {
 6 |       printf " VERSION=%s", $3;
 7 |     }
 8 |     printf "; cd ..\n";
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/applications/demos/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | file(GLOB DEMOS
 3 |   "*/*.cpp"
 4 | )
 5 | 
 6 | # make separate build targets for each BFS variant
 7 | foreach(file ${DEMOS})
 8 |   get_filename_component(base ${file} NAME_WE)
 9 |   add_grappa_exe(demo-${base} ${base}.exe ${file})
10 |   set_property(TARGET ${name} PROPERTY FOLDER "Applications")      
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/applications/demos/standalone/Makefile:
--------------------------------------------------------------------------------
 1 | # Example standalone Grappa app using Grappa's GNU Make include file
 2 | #
 3 | # To use, build and install Grappa. Then source <Grappa installation
 4 | # path>/bin/settings.sh. After that you should be able to just say
 5 | # "make" in this directory, and run the generated binary like you do
 6 | # any other MPI program.
 7 | 
 8 | GRAPPA_IMPLICIT_RULES:=on
 9 | include $(GRAPPA_PREFIX)/share/Grappa/grappa.mk
10 | 
11 | standalone: standalone.o
12 | 


--------------------------------------------------------------------------------
/applications/graph500/.gitignore:
--------------------------------------------------------------------------------
 1 | *.a
 2 | *.o
 3 | *~
 4 | make.inc
 5 | seq-list/seq-list
 6 | seq-csr/seq-csr
 7 | omp-csr/omp-csr
 8 | *.pl
 9 | xmt-csr/xmt-csr
10 | xmt-csr-local/xmt-csr-local
11 | *.gcda
12 | mpi/graph500_mpi_*
13 | generator/generator_test_mpi
14 | *.bin
15 | make-edgelist
16 | 


--------------------------------------------------------------------------------
/applications/graph500/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(generator)


--------------------------------------------------------------------------------
/applications/graph500/README-Grappa.md:
--------------------------------------------------------------------------------
1 | 
2 | # Don't use this
3 | 
4 | This code no longer works in the current version of Grappa; it's here because part of it is a dependence of some other examples.
5 | 
6 | If you're looking for a BFS implementation, use the one in the applications/graphlab directory or in nativegraph/bfs/bfs_beamer.cpp.
7 | 
8 | 


--------------------------------------------------------------------------------
/applications/graph500/generator/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set(SOURCES
 3 |   graph_generator.h
 4 |   graph_generator.c
 5 |   make_graph.h
 6 |   make_graph.c
 7 |   splittable_mrg.h
 8 |   splittable_mrg.c
 9 |   utils.h
10 |   utils.c
11 |   user_settings.h
12 |   mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h
13 |   ../prng.c
14 | )
15 | set_source_files_properties( make_graph.c graph_generator.c utils.c PROPERTIES LANGUAGE CXX )
16 | 
17 | add_definitions(
18 |   -Drestrict=__restrict__
19 |   -DGRAPH_GENERATOR_GRAPPA
20 |   -DGRAPH_GENERATOR_SEQ
21 |   -DGRAPHGEN_DISTRIBUTED_MEMORY
22 | )
23 | 
24 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
25 | 
26 | add_library(generator EXCLUDE_FROM_ALL ${SOURCES})
27 | set_property(TARGET generator PROPERTY FOLDER "Applications")
28 | add_dependencies(generator all-third-party)
29 | 


--------------------------------------------------------------------------------
/applications/graph500/generator/Makefile.mpi:
--------------------------------------------------------------------------------
 1 | CC = mpicc
 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg
 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g
 4 | LDFLAGS = -O3
 5 | # LDFLAGS = -g
 6 | MPICC = mpicc
 7 | 
 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
10 | 
11 | all: generator_test_mpi
12 | 
13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
14 | 	$(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm
15 | 
16 | clean:
17 | 	-rm -f generator_test_mpi
18 | 


--------------------------------------------------------------------------------
/applications/graph500/generator/Makefile.omp:
--------------------------------------------------------------------------------
 1 | CC = gcc -fopenmp
 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg
 3 | LDFLAGS = -O3
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_omp
 9 | 
10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_omp
15 | 


--------------------------------------------------------------------------------
/applications/graph500/generator/Makefile.seq:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ -D_GRAPPA # -g -pg
 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__
 4 | LDFLAGS = -g # -g -pg
 5 | 
 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 8 | 
 9 | all: generator_test_seq
10 | 
11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
12 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm
13 | 
14 | clean:
15 | 	-rm -f generator_test_seq
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/graph500/generator/Makefile.xmt:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -DNDEBUG 
 3 | LDFLAGS = $(CFLAGS) # -g -pg
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_xmt
 9 | 
10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_xmt
15 | 


--------------------------------------------------------------------------------
/applications/graph500/graph500.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(GRAPH500_HEADER_)
 5 | #define GRAPH500_HEADER_
 6 | 
 7 | #define NAME "Graph500 sequential list"
 8 | #define VERSION 0
 9 | 
10 | #include "generator/graph_generator.h"
11 | 
12 | /** Pass the edge list to an external graph creation routine. */
13 | int create_graph_from_edgelist (struct packed_edge *IJ, int64_t nedge);
14 | 
15 | /** Create the BFS tree from a given source vertex. */
16 | int make_bfs_tree (int64_t *bfs_tree_out, int64_t *max_vtx_out,
17 | 		   int64_t srcvtx);
18 | 
19 | /** Clean up. */
20 | void destroy_graph (void);
21 | 
22 | #endif /* GRAPH500_HEADER_ */
23 | 


--------------------------------------------------------------------------------
/applications/graph500/grappa/.gitignore:
--------------------------------------------------------------------------------
1 | .igor
2 | 


--------------------------------------------------------------------------------
/applications/graph500/grappa/asciize.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | f = open(ARGV[0])
 3 | puts "-------"
 4 | nedge = f.read(8).unpack("L")[0]
 5 | nv = f.read(8).unpack("L")[0]
 6 | nadj = f.read(8).unpack("L")[0]
 7 | nbfs = f.read(8).unpack("L")[0]
 8 | puts "nedge: #{nedge}, nv: #{nv}, nadj: #{nadj}, nbfs: #{nbfs}"
 9 | 
10 | puts "-- edges --"
11 | (0...nedge*2).each{|i|
12 |   puts "#{i}: #{f.read(8).unpack('L')[0]}"
13 | }
14 | 
15 | puts "-- xoff --"
16 | (0...(2*nv+2)).each{|i|
17 |   puts "#{i}: #{f.read(8).unpack('L')[0]}"
18 | }
19 | 
20 | puts "-- xadj --"
21 | (0...nadj).each{|i|
22 |   puts "#{i}: #{f.read(8).unpack('L')[0]}"
23 | }
24 | 
25 | puts "-- bfsroots --"
26 | (0...nbfs).each{|i|
27 |   puts "#{i}: #{f.read(8).unpack('L')[0]}"
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/applications/graph500/grappa/graph.cpp:
--------------------------------------------------------------------------------
1 | #include "graph.hpp"
2 | 
3 | 


--------------------------------------------------------------------------------
/applications/graph500/grappa/options.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #ifndef _OPTIONS_H
 5 | #define _OPTIONS_H
 6 | 
 7 | extern int VERBOSE;
 8 | extern int use_RMAT;
 9 | extern char *dumpname;
10 | extern char *rootname;
11 | 
12 | #define A_PARAM 0.57
13 | #define B_PARAM 0.19
14 | #define C_PARAM 0.19
15 | /* Hence D = 0.05. */
16 | 
17 | extern double A, B, C, D;
18 | 
19 | #define NBFS_max 64
20 | extern int NBFS;
21 | 
22 | #define default_SCALE ((int64_t)14)
23 | #define default_edgefactor ((int64_t)16)
24 | 
25 | extern int64_t SCALE;
26 | extern int64_t edgefactor;
27 | 
28 | extern bool load_checkpoint;
29 | extern bool write_checkpoint;
30 | 
31 | extern bool verify;
32 | 
33 | void get_options (int argc, char **argv);
34 | 
35 | #endif /* _OPTIONS_H */
36 | 


--------------------------------------------------------------------------------
/applications/graph500/grappa/trace.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'pty'
 3 | require 'fileutils'; include FileUtils
 4 | 
 5 | cmd = ARGV.join(' ')
 6 | begin
 7 |   PTY.spawn(cmd) do |stdin, stdout, pid|
 8 |     begin
 9 |       stdin.sync
10 |       stdin.each{|line| puts line.strip}
11 |     rescue Errno::EIO
12 |     end
13 |   end
14 | rescue PTY::ChildExited
15 | end
16 | 
17 | otf = Dir.glob("*.otf").max_by {|f| File.mtime(f)}
18 | base = otf[/(.*)\.otf/,1]
19 | open("#{base}.sh", "w"){|f| f.write("#{cmd}\n") }
20 | dest = "trace/#{base}"
21 | mkdir dest
22 | `mv #{base}.* #{dest}`
23 | 


--------------------------------------------------------------------------------
/applications/graph500/kronecker.h:
--------------------------------------------------------------------------------
 1 | #if !defined(KRONECKER_HEADER_)
 2 | #define KRONECKER_HEADER_
 3 | 
 4 | #include "generator/graph_generator.h"
 5 | 
 6 | void kronecker_edgelist (struct packed_edge *IJ, int64_t nedge, int64_t SCALE,
 7 | 			 double A, double B, double C);
 8 | 
 9 | #endif /* KRONECKER_HEADER_ */
10 | 


--------------------------------------------------------------------------------
/applications/graph500/make-incs/make.inc-gcc:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | # Copyright 2010, Georgia Institute of Technology, USA.
 3 | # See COPYING for license.
 4 | CFLAGS = -g -std=c99
 5 | #CFLAGS = -g -std=c99 -O3 -march=native -fgcse-sm -fgcse-las -fgcse-after-reload -floop-strip-mine -ftree-loop-im -fivopts -funswitch-loops
 6 | LDLIBS = -lm -lrt
 7 | CPPFLAGS = -DUSE_MMAP_LARGE -DUSE_MMAP_LARGE_EXT
 8 | 
 9 | BUILD_OPENMP = Yes
10 | CFLAGS_OPENMP = -fopenmp
11 | 


--------------------------------------------------------------------------------
/applications/graph500/make-incs/make.inc-osx:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | # Copyright 2010, University of Illinois at Urbana-Champaign
 3 | # See COPYING for license.
 4 | CFLAGS = -g -std=c99 -Wall
 5 | LDLIBS = -lm
 6 | # OSX does not support MAP_ANON in POSIX mode and the timers for MacOSX are
 7 | # not available for Leopard
 8 | # Use HAVE_MACH_ABSOLUTE_TIME to get the Mac OSX Timer
 9 | # Use HAVE_ALLOCA_H to include alloca.h
10 | # Use HAVE_MPI_INT64_T to use the MPI 2.2 Datatype for int64_t items
11 | # Use HAVE_UNISTD_H to include unistd.h (for getopt definitions)
12 | CPPFLAGS = -DHAVE_MACH_ABSOLUTE_TIME -DHAVE_ALLOCA_H -DHAVE_MPI_INT64_T -DHAVE_UNISTD_H
13 | BUILD_MPI = No
14 | BUILD_OPENMP = Yes
15 | 


--------------------------------------------------------------------------------
/applications/graph500/make-incs/make.inc-xmt:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | # Copyright 2010, Georgia Institute of Technology, USA.
 3 | # See COPYING for license.
 4 | # Modules and Emacs+tramp don't cooperate at PNNL.
 5 | #PATH:=/opt/mta-pe/6.5.0/bin:${PATH}
 6 | CFLAGS = 
 7 | LDLIBS = -lprand -lm -lrt
 8 | CPPFLAGS = -DUSE_MMAP_LARGE -DNDEBUG
 9 | 
10 | BUILD_OPENMP = No
11 | CFLAGS_OPENMP =
12 | 
13 | BUILD_XMT = Yes
14 | 
15 | CLEANS:= xmt-csr/*.pl xmt-csr-local/*.pl *.o
16 | 
17 | 


--------------------------------------------------------------------------------
/applications/graph500/mpi/igor_mpi_bfs.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'igor'
 3 | 
 4 | # inherit parser, sbatch_flags
 5 | require_relative '../../../util/igor_common.rb'
 6 | 
 7 | Igor do
 8 |   include Isolatable
 9 | 
10 |   database '~/exp/sosp.db', :bfs
11 | 
12 |   # isolate everything needed for the executable so we can sbcast them for local execution
13 |   isolate(%w[simple replicated replicated_csc].map{|v| "graph500_mpi_#{v}"},
14 |           File.dirname(__FILE__))
15 |   
16 |   command "#{$srun} %{tdir}/graph500_mpi_%{mpibfs} %{scale} %{edgefactor}"
17 |   
18 |   sbatch_flags << "--time=4:00:00"
19 |   
20 |   params {
21 |     mpibfs      'simple'
22 |     nnode       2
23 |     ppn         1
24 |     scale       20
25 |     edgefactor  16
26 |   }
27 |   
28 |   expect :max_teps
29 |     
30 |   $filtered = results{|t| t.select(:id, :mpibfs, :scale, :nnode, :ppn, :run_at, :min_time, :max_teps) }
31 |   
32 |   interact # enter interactive mode
33 | end
34 | 


--------------------------------------------------------------------------------
/applications/graph500/octave/Graph500.m:
--------------------------------------------------------------------------------
 1 | SCALE = 10;
 2 | edgefactor = 16;
 3 | NBFS = 64;
 4 | 
 5 | rand ("seed", 103);
 6 | 
 7 | ij = kronecker_generator (SCALE, edgefactor);
 8 | 
 9 | tic;
10 | G = kernel_1 (ij);
11 | kernel_1_time = toc;
12 | 
13 | N = size (G, 1);
14 | coldeg = full (spstats (G));
15 | search_key = randperm (N);
16 | search_key(coldeg(search_key) == 0) = [];
17 | if length (search_key) > NBFS,
18 |   search_key = search_key(1:NBFS);
19 | else
20 |   NBFS = length (search_key);
21 | end
22 | search_key = search_key - 1;  
23 | 
24 | kernel_2_time = Inf * ones (NBFS, 1);
25 | kernel_2_nedge = zeros (NBFS, 1);
26 | 
27 | indeg = histc (ij(:), 1:N); % For computing the number of edges
28 | 
29 | for k = 1:NBFS,
30 |   tic;
31 |   parent = kernel_2 (G, search_key(k));
32 |   kernel_2_time(k) = toc;
33 |   err = validate (parent, ij, search_key (k));
34 |   if err <= 0,
35 |     error (sprintf ("BFS %d from search key %d failed to validate: %d",
36 | 		    k, search_key(k), err));
37 |   end
38 |   kernel_2_nedge(k) = sum (indeg(parent >= 0))/2; % Volume/2
39 | end
40 | 
41 | output (SCALE, edgefactor, NBFS, kernel_1_time, kernel_2_time, kernel_2_nedge);
42 | 


--------------------------------------------------------------------------------
/applications/graph500/octave/kernel_1.m:
--------------------------------------------------------------------------------
 1 | function G = kernel_1 (ij)
 2 | %% Compute a sparse adjacency matrix representation
 3 | %% of the graph with edges from ij.
 4 | 
 5 |   %% Remove self-edges.
 6 |   ij(:, ij(1,:) == ij(2,:)) = [];
 7 |   %% Adjust away from zero labels.
 8 |   ij = ij + 1;
 9 |   %% Find the maximum label for sizing.
10 |   N = max (max (ij));
11 |   %% Create the matrix, ensuring it is square.
12 |   G = sparse (ij(1,:), ij(2,:), ones (1, size (ij, 2)), N, N);
13 |   %% Symmetrize to model an undirected graph.
14 |   G = spones (G + G.');
15 | 


--------------------------------------------------------------------------------
/applications/graph500/octave/kernel_2.m:
--------------------------------------------------------------------------------
 1 | function parent = kernel_2 (G, root)
 2 | %% Compute a sparse adjacency matrix representation
 3 | %% of the graph with edges from ij.
 4 | 
 5 |   N = size (G, 1);
 6 |   %% Adjust from zero labels.
 7 |   root = root + 1;
 8 |   parent = zeros (N, 1);
 9 |   parent (root) = root;
10 | 
11 |   vlist = zeros (N, 1);
12 |   vlist(1) = root;
13 |   lastk = 1;
14 |   for k = 1:N,
15 |     v = vlist(k);
16 |     if v == 0, break; end
17 |     [I,J,V] = find (G(:, v));
18 |     nxt = I(parent(I) == 0);
19 |     parent(nxt) = v;
20 |     vlist(lastk + (1:length (nxt))) = nxt;
21 |     lastk = lastk + length (nxt);
22 |   end
23 | 
24 |   %% Adjust to zero labels.
25 |   parent = parent - 1;
26 | 
27 | 


--------------------------------------------------------------------------------
/applications/graph500/octave/validate.m:
--------------------------------------------------------------------------------
 1 | function out = validate (parent, ij, search_key)
 2 |   out = 1;
 3 |   parent = parent + 1;
 4 |   search_key = search_key + 1;
 5 | 
 6 |   if parent (search_key) != search_key,
 7 |     out = 0;
 8 |     return;
 9 |   end
10 | 
11 |   ij = ij + 1;
12 |   N = max (max (ij));
13 |   slice = find (parent > 0);
14 | 
15 |   level = zeros (size (parent));
16 |   level (slice) = 1;
17 |   P = parent (slice);
18 |   mask = P != search_key;
19 |   k = 0;
20 |   while any (mask),
21 |     level(slice(mask)) = level(slice(mask)) + 1;
22 |     P = parent (P);
23 |     mask = P != search_key;
24 |     k = k + 1;
25 |     if k > N,
26 |       %% There must be a cycle in the tree.
27 |       out = -3;
28 |       return;
29 |     end
30 |   end
31 | 
32 |   lij = level (ij);
33 |   neither_in = lij(1,:) == 0 & lij(2,:) == 0;
34 |   both_in = lij(1,:) > 0 & lij(2,:) > 0;
35 |   if any (not (neither_in | both_in)),
36 |     out = -4;
37 |     return
38 |   end
39 |   respects_tree_level = abs (lij(1,:) - lij(2,:)) <= 1;
40 |   if any (not (neither_in | respects_tree_level)),
41 |     out = -5;
42 |     return
43 |   end
44 | 


--------------------------------------------------------------------------------
/applications/graph500/options.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(OPTIONS_HEADER_)
 5 | #define OPTIONS_HEADER_
 6 | 
 7 | #include <stdbool.h>
 8 | 
 9 | extern int VERBOSE;
10 | extern int use_RMAT;
11 | extern char *dumpname;
12 | extern char *rootname;
13 | 
14 | #define A_PARAM 0.57
15 | #define B_PARAM 0.19
16 | #define C_PARAM 0.19
17 | /* Hence D = 0.05. */
18 | 
19 | extern double A, B, C, D;
20 | 
21 | #define NBFS_max 8
22 | extern int NBFS;
23 | 
24 | #define default_SCALE ((int64_t)14)
25 | #define default_edgefactor ((int64_t)16)
26 | 
27 | extern int64_t SCALE;
28 | extern int64_t edgefactor;
29 | 
30 | void get_options (int argc, char **argv);
31 | 
32 | extern bool load_checkpoint;
33 | extern bool verify;
34 | 
35 | #endif /* OPTIONS_HEADER_ */
36 | 


--------------------------------------------------------------------------------
/applications/graph500/prng.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(PRNG_HEADER_)
 5 | #define PRNG_HEADER_
 6 | 
 7 | /** Initialze the PRNG, called in a sequential context. */
 8 | void init_random (void);
 9 | 
10 | extern uint64_t userseed;
11 | extern uint_fast32_t prng_seed[5];
12 | extern void *prng_state;
13 | 
14 | #ifdef __MTA__
15 | #include <mta_rng.h>
16 | #else
17 | #include <stdlib.h>
18 | static void prand(int64_t n, double * v) {
19 |   int64_t i;
20 |   extern int64_t xmtcompat_rand_initialized;
21 |   extern void xmtcompat_initialize_rand(void);
22 |   if (!xmtcompat_rand_initialized) xmtcompat_initialize_rand();
23 |   for (i = 0; i < n; ++i) {
24 |     v[i] = drand48();
25 |   }
26 | }
27 | #endif /* !defined(__MTA__) */
28 | 
29 | #endif /* PRNG_HEADER_ */
30 | 


--------------------------------------------------------------------------------
/applications/graph500/rmat.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(RMAT_HEADER_)
 5 | #define RMAT_HEADER_
 6 | 
 7 | #include "generator/graph_generator.h"
 8 | 
 9 | /** Fill IJ with a randomly permuted R-MAT generated edge list. */
10 | void rmat_edgelist (struct packed_edge *IJ, int64_t nedge, int SCALE,
11 | 		    double A, double B, double C);
12 | void permute_vertex_labels (struct packed_edge * IJ, int64_t nedge, int64_t max_nvtx,
13 | 			    void * st, int64_t * newlabel);
14 | void permute_edgelist (struct packed_edge * IJ, int64_t nedge, void *st);
15 | 
16 | #endif /* RMAT_HEADER_ */
17 | 


--------------------------------------------------------------------------------
/applications/graph500/timer.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(TIMER_HEADER_)
 5 | #define TIMER_HEADER_
 6 | 
 7 | /** Start timing. */
 8 | void tic (void);
 9 | 
10 | /** Return seconds since last tic. */
11 | double toc (void);
12 | 
13 | /** return current seconds */
14 | double timer(void);
15 | 
16 | /** Macro to time a block. */
17 | #define TIME(timevar, what) do { tic (); what; timevar = toc(); } while (0)
18 | 
19 | #endif /* TIMER_HEADER_ */
20 | 


--------------------------------------------------------------------------------
/applications/graph500/verify.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(VERIFY_HEADER_)
 5 | #define VERIFY_HEADER_
 6 | 
 7 | #include "generator/graph_generator.h"
 8 | 
 9 | /** Verify a BFS tree, return volume or -1 if failed. */
10 | int64_t verify_bfs_tree (int64_t *bfs_tree, int64_t max_bfsvtx,
11 | 			 int64_t root,
12 | 			 const struct packed_edge *IJ, int64_t nedge);
13 | 
14 | #endif /* VERIFY_HEADER_ */
15 | 


--------------------------------------------------------------------------------
/applications/graph500/xalloc.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(XALLOC_HEADER_)
 5 | #define XALLOC_HEADER_
 6 | 
 7 | void * xmalloc (size_t);
 8 | void * xmalloc_large (size_t);
 9 | void xfree_large (void *);
10 | void * xmalloc_large_ext (size_t);
11 | 
12 | /*
13 | void mark_large_unused (void *);
14 | void mark_large_willuse (void *);
15 | */
16 | 
17 | #endif /* XALLOC_HEADER_ */
18 | 


--------------------------------------------------------------------------------
/applications/graphlab/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(COMMON graphlab.hpp graphlab.cpp)
2 | 
3 | foreach(app pagerank sssp test pagerank_new cc bfs)
4 |   add_grappa_exe(graphlab-${app} ${app}.exe ${app}.cpp ${COMMON})
5 |   set_property(TARGET ${name} PROPERTY FOLDER "Graphlab")
6 | endforeach()
7 | 


--------------------------------------------------------------------------------
/applications/graphlab/README.md:
--------------------------------------------------------------------------------
 1 | GraphLab API in Grappa
 2 | ----------------------
 3 | 
 4 | This directory contains code to emulate the [GraphLab][] API with a simple layer on top of Grappa. This API is not perfectly compatible with GraphLab code, but the example vertex programs in this directory are mostly faithful to those in GraphLab proper.
 5 | 
 6 | There are currently two implementations:
 7 | 
 8 | - `NaiveGraphlabEngine` (`graphlab_naive.hpp`): implements a restricted GraphLab API using the builtin Grappa Graph structure. Most notably, only `gather:IN_EDGES` and `scatter:OUT_EDGES` are supported.
 9 | 
10 | - `GraphlabEngine` (`graphlab_splitv.hpp`): built on a custom graph structure mimicking GraphLab's greedy vertex-split representation. This is currently slower, and still does not implement the full range of options. `pagerank_new.cpp` is an example that uses this engine.
11 | 
12 | [GraphLab]: graphlab.org


--------------------------------------------------------------------------------
/applications/graphlab/graphlab.cpp:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////////
 2 | /// GraphLab is an API and runtime system for graph-parallel computation.
 3 | /// This is a rough prototype implementation of the programming model to
 4 | /// demonstrate using Grappa as a platform for other models.
 5 | /// More information on the actual GraphLab system can be found at:
 6 | /// graphlab.org.
 7 | ////////////////////////////////////////////////////////////////////////
 8 | 
 9 | #include "graphlab.hpp"
10 | 
11 | GRAPPA_DEFINE_METRIC(SummarizingMetric<double>, iteration_time, 0);
12 | GRAPPA_DEFINE_METRIC(SummarizingMetric<int>, core_set_size, 0);
13 | 
14 | DEFINE_int32(max_iterations, 1024, "Stop after this many iterations, no matter what.");
15 | 


--------------------------------------------------------------------------------
/applications/isopath/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(grappa)
2 | 


--------------------------------------------------------------------------------
/applications/isopath/generator/Makefile.grappa:
--------------------------------------------------------------------------------
 1 | include ../../../system/Makefile
 2 | 
 3 | CFLAGS += -O3 -DGRAPH_GENERATOR_GRAPPA -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg
 4 | CFLAGS += -I$(GRAPPA_HOME)/system -I$(GRAPPA_HOME)/system/tasks
 5 | CFLAGS += -D_GRAPPA
 6 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g
 7 | 
 8 | # force cplusplus on .c
 9 | CC= $(CXX)
10 | 
11 | GENERATOR_OBJS = graph_generator.o make_graph.o splittable_mrg.o utils.o
12 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
13 | 
14 | all: generator_test_mpi
15 | 	
16 | splittable_mrg.o: splittable_mrg.c
17 | 	make -f Makefile.seq $@
18 | 
19 | genlib: $(GENERATOR_OBJS) $(GENERATOR_HEADERS)
20 | 	ar rcs generator.a $(GENERATOR_OBJS)
21 | 
22 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
23 | 	$(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm
24 | 
25 | clean:
26 | 	rm -f generator_test_mpi
27 | 	rm -f $(GENERATOR_OBJS) generator.a
28 | 


--------------------------------------------------------------------------------
/applications/isopath/generator/Makefile.mpi:
--------------------------------------------------------------------------------
 1 | CC = mpicc
 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg
 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g
 4 | LDFLAGS = -O3
 5 | # LDFLAGS = -g
 6 | MPICC = mpicc
 7 | 
 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
10 | 
11 | all: generator_test_mpi
12 | 
13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
14 | 	$(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm
15 | 
16 | clean:
17 | 	-rm -f generator_test_mpi
18 | 


--------------------------------------------------------------------------------
/applications/isopath/generator/Makefile.omp:
--------------------------------------------------------------------------------
 1 | CC = gcc -fopenmp
 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg
 3 | LDFLAGS = -O3
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_omp
 9 | 
10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_omp
15 | 


--------------------------------------------------------------------------------
/applications/isopath/generator/Makefile.seq:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ # -g -pg
 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__
 4 | LDFLAGS = -g # -g -pg
 5 | 
 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 8 | 
 9 | all: generator_test_seq
10 | 
11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
12 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm
13 | 
14 | clean:
15 | 	-rm -f generator_test_seq
16 | 


--------------------------------------------------------------------------------
/applications/isopath/generator/Makefile.xmt:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -DNDEBUG 
 3 | LDFLAGS = $(CFLAGS) # -g -pg
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_xmt
 9 | 
10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_xmt
15 | 


--------------------------------------------------------------------------------
/applications/isopath/grappa/.gitignore:
--------------------------------------------------------------------------------
1 | .igor
2 | 


--------------------------------------------------------------------------------
/applications/isopath/grappa/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set(SOURCES
 3 |   common.h
 4 |   oned_csr.h
 5 |   oned_csr.cpp
 6 |   options.h
 7 |   options.cpp
 8 |   timer.h
 9 |   ../prng.c
10 |   simple_graphs.hpp
11 |   simple_graphs.cpp
12 | )
13 | 
14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
15 | add_definitions(-Drestrict=__restrict__ -DGRAPH_GENERATOR_GRAPPA)
16 | 
17 | add_grappa_application(isopath.exe ${SOURCES} isopath.cpp)
18 | target_link_libraries(isopath.exe generator)
19 | 


--------------------------------------------------------------------------------
/applications/isopath/grappa/graph.cpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "graph.hpp"
3 | 
4 | 


--------------------------------------------------------------------------------
/applications/isopath/grappa/options.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #ifndef _OPTIONS_H
 5 | #define _OPTIONS_H
 6 | 
 7 | extern int VERBOSE;
 8 | extern int use_RMAT;
 9 | extern char *dumpname;
10 | extern char *rootname;
11 | 
12 | #define A_PARAM 0.57
13 | #define B_PARAM 0.19
14 | #define C_PARAM 0.19
15 | /* Hence D = 0.05. */
16 | 
17 | extern double A, B, C, D;
18 | 
19 | #define NBFS_max 64
20 | extern int NBFS;
21 | 
22 | #define default_SCALE ((int64_t)14)
23 | #define default_edgefactor ((int64_t)16)
24 | 
25 | extern int64_t SCALE;
26 | extern int64_t edgefactor;
27 | 
28 | extern bool load_checkpoint;
29 | extern bool write_checkpoint;
30 | 
31 | extern bool verify;
32 | 
33 | void get_options (int argc, char **argv);
34 | 
35 | #endif /* _OPTIONS_H */
36 | 


--------------------------------------------------------------------------------
/applications/isopath/grappa/simple_graphs.hpp:
--------------------------------------------------------------------------------
1 | //Generates tuple graph representation for a few simple graphs
2 | 
3 | void meshgrid_graph(int64_t * num_edges, GlobalAddress<packed_edge> * tuple_edges, int n, int m);
4 | 
5 | void balanced_tree_graph(int64_t * num_edges, GlobalAddress<packed_edge> * tuple_edges, int lvs, int branches);
6 | 
7 | void complete_graph(int64_t * num_edges, GlobalAddress<packed_edge> * tuple_edges, int vertices);
8 | 


--------------------------------------------------------------------------------
/applications/isopath/options.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(OPTIONS_HEADER_)
 5 | #define OPTIONS_HEADER_
 6 | 
 7 | #include <stdbool.h>
 8 | 
 9 | extern int VERBOSE;
10 | extern int use_RMAT;
11 | extern char *dumpname;
12 | extern char *rootname;
13 | 
14 | #define A_PARAM 0.57
15 | #define B_PARAM 0.19
16 | #define C_PARAM 0.19
17 | /* Hence D = 0.05. */
18 | 
19 | extern double A, B, C, D;
20 | 
21 | #define NBFS_max 8
22 | extern int NBFS;
23 | 
24 | #define default_SCALE ((int64_t)14)
25 | #define default_edgefactor ((int64_t)16)
26 | 
27 | extern int64_t SCALE;
28 | extern int64_t edgefactor;
29 | 
30 | void get_options (int argc, char **argv);
31 | 
32 | extern bool load_checkpoint;
33 | extern bool verify;
34 | 
35 | #endif /* OPTIONS_HEADER_ */
36 | 


--------------------------------------------------------------------------------
/applications/isopath/prng.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(PRNG_HEADER_)
 5 | #define PRNG_HEADER_
 6 | 
 7 | /** Initialze the PRNG, called in a sequential context. */
 8 | void init_random (void);
 9 | 
10 | extern uint64_t userseed;
11 | extern uint_fast32_t prng_seed[5];
12 | extern void *prng_state;
13 | 
14 | #ifdef __MTA__
15 | #include <mta_rng.h>
16 | #else
17 | #include <stdlib.h>
18 | static void prand(int64_t n, double * v) {
19 |   int64_t i;
20 |   extern int64_t xmtcompat_rand_initialized;
21 |   extern void xmtcompat_initialize_rand(void);
22 |   if (!xmtcompat_rand_initialized) xmtcompat_initialize_rand();
23 |   for (i = 0; i < n; ++i) {
24 |     v[i] = drand48();
25 |   }
26 | }
27 | #endif /* !defined(__MTA__) */
28 | 
29 | #endif /* PRNG_HEADER_ */
30 | 


--------------------------------------------------------------------------------
/applications/isopath/timer.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(TIMER_HEADER_)
 5 | #define TIMER_HEADER_
 6 | 
 7 | /** Start timing. */
 8 | void tic (void);
 9 | 
10 | /** Return seconds since last tic. */
11 | double toc (void);
12 | 
13 | /** return current seconds */
14 | double timer(void);
15 | 
16 | /** Macro to time a block. */
17 | #define TIME(timevar, what) do { tic (); what; timevar = toc(); } while (0)
18 | 
19 | #endif /* TIMER_HEADER_ */
20 | 


--------------------------------------------------------------------------------
/applications/isopath/verify.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(VERIFY_HEADER_)
 5 | #define VERIFY_HEADER_
 6 | 
 7 | #include "generator/graph_generator.h"
 8 | 
 9 | /** Verify a BFS tree, return volume or -1 if failed. */
10 | int64_t verify_bfs_tree (int64_t *bfs_tree, int64_t max_bfsvtx,
11 | 			 int64_t root,
12 | 			 const struct packed_edge *IJ, int64_t nedge);
13 | 
14 | #endif /* VERIFY_HEADER_ */
15 | 


--------------------------------------------------------------------------------
/applications/isopath/xalloc.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */
 2 | /* Copyright 2010,  Georgia Institute of Technology, USA. */
 3 | /* See COPYING for license. */
 4 | #if !defined(XALLOC_HEADER_)
 5 | #define XALLOC_HEADER_
 6 | 
 7 | void * xmalloc (size_t);
 8 | void * xmalloc_large (size_t);
 9 | void xfree_large (void *);
10 | void * xmalloc_large_ext (size_t);
11 | 
12 | /*
13 | void mark_large_unused (void *);
14 | void mark_large_willuse (void *);
15 | */
16 | 
17 | #endif /* XALLOC_HEADER_ */
18 | 


--------------------------------------------------------------------------------
/applications/join/.gitignore:
--------------------------------------------------------------------------------
1 | grappa*.cpp
2 | strings.cc
3 | strings.h
4 | utils.h
5 | utils.cc
6 | *bk
7 | *bkup
8 | 


--------------------------------------------------------------------------------
/applications/join/Aggregates.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace Aggregates {
 4 |   template < typename State, typename UV >
 5 |     State SUM(State sofar, UV nextval) {
 6 |       return sofar + nextval;
 7 |     }
 8 | 
 9 |   template < typename State, typename UV >
10 |     State COUNT(State sofar, UV nextval) {
11 |       return sofar + 1;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/applications/join/HashJoin.cpp:
--------------------------------------------------------------------------------
1 | #include "HashJoin.hpp"
2 | Grappa::GlobalCompletionEvent default_join_left_gce;
3 | Grappa::GlobalCompletionEvent default_join_right_gce;
4 | Grappa::GlobalCompletionEvent default_join_reduce_gce;
5 | 


--------------------------------------------------------------------------------
/applications/join/Local_graph_tests.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include "local_graph.hpp"
 3 | 
 4 | 
 5 | BOOST_AUTO_TEST_SUITE( Local_graph_tests );
 6 | 
 7 | 
 8 | 
 9 | BOOST_AUTO_TEST_CASE( testBasicList ) {
10 |   BOOST_MESSAGE("Testing basic adj list"); 
11 | 
12 |   std::vector<Edge> edges;
13 |   edges.push_back({4,5});
14 |   edges.push_back({6,7});
15 |   edges.push_back({10,11});
16 | 
17 |   LocalAdjListGraph g(edges);
18 |   BOOST_CHECK( g.neighbors(4)[0] == 5 );
19 |   BOOST_CHECK( g.neighbors(6)[0] == 7 );
20 |   BOOST_CHECK( g.neighbors(10)[0] == 11 );
21 | }
22 | 
23 | BOOST_AUTO_TEST_SUITE_END();
24 | 


--------------------------------------------------------------------------------
/applications/join/MapReduce.cpp:
--------------------------------------------------------------------------------
 1 | #include "MapReduce.hpp"
 2 | 
 3 | namespace MapReduce {
 4 | Grappa::GlobalCompletionEvent default_mr_gce;
 5 | }
 6 | 
 7 | GRAPPA_DEFINE_METRIC(SummarizingMetric<double>, mr_mapping_runtime, 0);
 8 | GRAPPA_DEFINE_METRIC(SummarizingMetric<double>, mr_combining_runtime, 0);
 9 | GRAPPA_DEFINE_METRIC(SummarizingMetric<double>, mr_reducing_runtime, 0);
10 | GRAPPA_DEFINE_METRIC(SummarizingMetric<double>, mr_reallocation_runtime, 0);
11 | 


--------------------------------------------------------------------------------
/applications/join/MatchesDHT.cpp:
--------------------------------------------------------------------------------
 1 | #include "MatchesDHT.hpp"
 2 | 
 3 | // for all hash tables
 4 | //GRAPPA_DEFINE_METRIC(MaxMetric<uint64_t>, max_cell_length, 0);
 5 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_tables_size, 0);
 6 | GRAPPA_DEFINE_METRIC(SummarizingMetric<uint64_t>, hash_tables_lookup_steps, 0);
 7 | 
 8 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_remote_lookups, 0);
 9 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_remote_inserts, 0);
10 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_local_lookups, 0);
11 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_local_inserts, 0);
12 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_called_lookups, 0);
13 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, hash_called_inserts, 0);
14 | 


--------------------------------------------------------------------------------
/applications/join/Tuple.cpp:
--------------------------------------------------------------------------------
 1 | #include "Tuple.hpp"
 2 | #include <sstream>
 3 | 
 4 | std::ostream& operator<< (std::ostream& o, Tuple& t) {
 5 |   std::stringstream ss;
 6 |   ss << "(";
 7 |   for ( uint64_t i=0; i<TUPLE_LEN; i++) {
 8 |     ss << " " << t.columns[i];
 9 |     ss << ",";
10 |   }
11 |   ss << ")";
12 |   o << ss.str();
13 | }
14 | 


--------------------------------------------------------------------------------
/applications/join/Tuple.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef TUPLE_HPP
 2 | #define TUPLE_HPP
 3 | 
 4 | #include <stdint.h>
 5 | #include <iostream>
 6 | 
 7 | #define TUPLE_LEN 2
 8 | struct Tuple {
 9 |   int64_t columns[TUPLE_LEN];
10 | };
11 | 
12 | std::ostream& operator<< (std::ostream& o, Tuple& t);
13 | 
14 | #endif // TUPLE_HPP
15 | 
16 | 


--------------------------------------------------------------------------------
/applications/join/convert2bin.cpp:
--------------------------------------------------------------------------------
 1 | #include "relation_io.hpp"
 2 | 
 3 | int main(int argc, char** argv) {
 4 | 
 5 |   if (argc < 5) {
 6 |     std::cerr << "Usage: " << argv[0] << " FILE TYPE{i,d} SEPS BURNS" << std::endl;
 7 |     exit(1);
 8 |   }
 9 |   
10 |   if (strncmp(argv[2], "i", 1) == 0) {
11 |     convert2bin<int64_t,decltype(&toInt)>( argv[1], &toInt, argv[3], atoi(argv[4]) );
12 |   } else if (strncmp(argv[2], "d", 1) == 0) {
13 |     convert2bin<double,decltype(&toDouble)>( argv[1], &toDouble, argv[3], atoi(argv[4]) );
14 |   } else {
15 |     std::cerr << "unrecognized type " << argv[2] << std::endl;
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/applications/join/double.txt:
--------------------------------------------------------------------------------
 1 | 0 1
 2 | 1 2
 3 | 2 3
 4 | 5 6
 5 | 6 7
 6 | 7 8
 7 | 101 102
 8 | 101 103
 9 | 101 104
10 | 101 105
11 | 101 106
12 | 101 107
13 | 101 108
14 | 101 109
15 | 101 110
16 | 101 111
17 | 


--------------------------------------------------------------------------------
/applications/join/extract_timestamps.sh:
--------------------------------------------------------------------------------
1 | input=$1
2 | #schema
3 | #process-id event-type timestamp
4 | echo "stream type time" >$input.trace
5 | grep timestamp $input | awk '{gsub(/ +/, " ");print}' | cut -d ' ' -f 7,8,9 >>$input.trace
6 | 


--------------------------------------------------------------------------------
/applications/join/hex_tri.soln.txt:
--------------------------------------------------------------------------------
1 | 1 2 3
2 | 1 3 4
3 | 1 4 5
4 | 1 6 7
5 | 1 2 7
6 | 


--------------------------------------------------------------------------------
/applications/join/hex_tri.txt:
--------------------------------------------------------------------------------
 1 | 1	2
 2 | 1	3
 3 | 1	4
 4 | 1	5
 5 | 1	6
 6 | 1	7
 7 | 2	1
 8 | 3	1
 9 | 4	1
10 | 5	1
11 | 6	1
12 | 7	1
13 | 2	3
14 | 3	4
15 | 4	5
16 | 5	6
17 | 6	7
18 | 7	2
19 | 7	6
20 | 6	5
21 | 5	4
22 | 4	3
23 | 3	2
24 | 2	7
25 | 


--------------------------------------------------------------------------------
/applications/join/overlapping.txt:
--------------------------------------------------------------------------------
 1 | 0 1
 2 | 0 2
 3 | 2 3
 4 | 3 4
 5 | 4 5
 6 | 6 7
 7 | 101 102
 8 | 101 103
 9 | 101 104
10 | 101 105
11 | 101 106
12 | 101 107
13 | 101 108
14 | 101 109
15 | 101 110
16 | 101 111
17 | 


--------------------------------------------------------------------------------
/applications/join/relation.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Addressing.hpp>
 4 | 
 5 | template < typename T >                                                                                                
 6 | struct Relation {
 7 |     GlobalAddress<T> data;
 8 |     size_t numtuples;
 9 | };  
10 | 


--------------------------------------------------------------------------------
/applications/join/relation_io.cpp:
--------------------------------------------------------------------------------
1 | #include <gflags/gflags.h>
2 | 
3 | DEFINE_string(relations, ".", "path to relation files");
4 | DEFINE_bool(bin, true, "input file is binary format");
5 | 
6 | 


--------------------------------------------------------------------------------
/applications/join/scripts/activenodes.sh:
--------------------------------------------------------------------------------
1 | sueue | grep bdmyers | getcolumn 9 
2 | 


--------------------------------------------------------------------------------
/applications/join/scripts/forall.sh:
--------------------------------------------------------------------------------
 1 | cmd=$1
 2 | 
 3 | declare -a arr
 4 | 
 5 | while read line 
 6 | do
 7 |     arr+=($line)
 8 | done
 9 | 
10 | for h in "${arr[@]}"
11 | do
12 |     ssh $h $cmd
13 | done
14 | 


--------------------------------------------------------------------------------
/applications/join/scripts/getcolumn.sh:
--------------------------------------------------------------------------------
1 | awk '{ gsub(/[ \t]+/, " ");print }' | cut -d ' ' -f $1
2 | 


--------------------------------------------------------------------------------
/applications/join/scripts/nodes_nested2names.sh:
--------------------------------------------------------------------------------
1 | scontrol show hostname $1
2 | 


--------------------------------------------------------------------------------
/applications/join/scripts/pidlist.sh:
--------------------------------------------------------------------------------
1 | ps aux | grep grappa | grep -v grep | getcolumn.sh 2
2 | 


--------------------------------------------------------------------------------
/applications/join/single.txt:
--------------------------------------------------------------------------------
 1 | 0 1
 2 | 0 2
 3 | 2 3
 4 | 3 4
 5 | 1 11
 6 | 6 7
 7 | 101 102
 8 | 101 103
 9 | 101 104
10 | 101 105
11 | 101 106
12 | 101 107
13 | 101 108
14 | 101 109
15 | 101 110
16 | 101 111
17 | 


--------------------------------------------------------------------------------
/applications/join/small_tri.soln.txt:
--------------------------------------------------------------------------------
1 | 1 2 3
2 | 2 3 4
3 | 


--------------------------------------------------------------------------------
/applications/join/small_tri.txt:
--------------------------------------------------------------------------------
1 | 1	2
2 | 2	3
3 | 3	1
4 | 3	4
5 | 4	2
6 | 4	5
7 | 5	1
8 | 


--------------------------------------------------------------------------------
/applications/join/sp2b.100mb.sh:
--------------------------------------------------------------------------------
1 | DIR=$SP2B/bin
2 | rm -f sp2bench_1m sp2bench_1m.index
3 | ln -s $DIR/sp2b.100mb.i sp2bench_1m
4 | ln -s $DIR/sp2b.100mb.index sp2bench_1m.index
5 | export NTUPLES=`wc -l $DIR/sp2b.100mb.i | cut -d ' ' -f1`
6 | 


--------------------------------------------------------------------------------
/applications/join/sp2b.100t.sh:
--------------------------------------------------------------------------------
1 | DIR=/sampa/home/bdmyers/escience/datalogcompiler/c_test_environment
2 | rm -f sp2bench_1m sp2bench_1m.index
3 | ln -s $DIR/sp2b.100t.i sp2bench_1m
4 | ln -s $DIR/sp2b.100t.index sp2bench_1m.index
5 | export NTUPLES=`wc -l $DIR/sp2b.100t.i | cut -d ' ' -f1`
6 | 


--------------------------------------------------------------------------------
/applications/join/sp2b.1gb.sh:
--------------------------------------------------------------------------------
1 | DIR=$SP2B/bin
2 | rm -f sp2bench_1m sp2bench_1m.index
3 | ln -s $DIR/sp2b.1gb.i sp2bench_1m
4 | ln -s $DIR/sp2b.1gb.index sp2bench_1m.index
5 | export NTUPLES=`wc -l $DIR/sp2b.1gb.i | cut -d ' ' -f1`
6 | 


--------------------------------------------------------------------------------
/applications/join/squares.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Query.hpp"
 3 | #include "grappa/graph.hpp"
 4 | 
 5 | class SquareQuery : public Query {
 6 |   public:
 7 |     virtual void preprocessing(std::vector<tuple_graph> relations);
 8 | 
 9 |     virtual void execute(std::vector<tuple_graph> relations);
10 | };
11 | 


--------------------------------------------------------------------------------
/applications/join/squares_bushy.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Query.hpp"
 3 | #include "grappa/graph.hpp"
 4 | 
 5 | class SquareBushyPlan : public Query {
 6 |   public:
 7 |     virtual void preprocessing(std::vector<tuple_graph> relations);
 8 | 
 9 |     virtual void execute(std::vector<tuple_graph> relations);
10 | };
11 | 


--------------------------------------------------------------------------------
/applications/join/squares_partition.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Query.hpp"
 3 | #include "grappa/graph.hpp"
 4 | 
 5 | class SquarePartition4way: public Query {
 6 |   private:
 7 |     GlobalAddress<Graph<Vertex>> index; 
 8 |   public:
 9 |     virtual void preprocessing(std::vector<tuple_graph> relations);
10 | 
11 |     virtual void execute(std::vector<tuple_graph> relations);
12 | };
13 | 


--------------------------------------------------------------------------------
/applications/join/squares_partition_bushy.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "Query.hpp"
 3 | #include "grappa/graph.hpp"
 4 | 
 5 | class SquarePartitionBushy4way: public Query {
 6 |   private:
 7 |     GlobalAddress<Graph<Vertex>> index; 
 8 |   public:
 9 |     virtual void preprocessing(std::vector<tuple_graph> relations);
10 | 
11 |     virtual void execute(std::vector<tuple_graph> relations);
12 | };
13 | 


--------------------------------------------------------------------------------
/applications/join/stats.cpp:
--------------------------------------------------------------------------------
 1 | #include "stats.h"
 2 | 
 3 | GRAPPA_DEFINE_METRIC(SimpleMetric<double>, query_runtime, 0);
 4 | GRAPPA_DEFINE_METRIC(SimpleMetric<double>, scan_runtime, 0);
 5 | GRAPPA_DEFINE_METRIC(SimpleMetric<double>, in_memory_runtime,0);
 6 | GRAPPA_DEFINE_METRIC(SimpleMetric<double>, init_runtime,0);
 7 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, join_coarse_result_count,0);
 8 | GRAPPA_DEFINE_METRIC(SimpleMetric<uint64_t>, emit_count,0);
 9 | 
10 | 


--------------------------------------------------------------------------------
/applications/join/stats.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Metrics.hpp>
 4 | 
 5 | GRAPPA_DECLARE_METRIC(SimpleMetric<double>, query_runtime);
 6 | GRAPPA_DECLARE_METRIC(SimpleMetric<double>, scan_runtime);
 7 | GRAPPA_DECLARE_METRIC(SimpleMetric<double>, in_memory_runtime);
 8 | GRAPPA_DECLARE_METRIC(SimpleMetric<double>, init_runtime);
 9 | GRAPPA_DECLARE_METRIC(SimpleMetric<uint64_t>, join_coarse_result_count);
10 | GRAPPA_DECLARE_METRIC(SimpleMetric<uint64_t>, emit_count);
11 | 


--------------------------------------------------------------------------------
/applications/join/triangles.sql:
--------------------------------------------------------------------------------
1 | \timing
2 | 
3 | select E1.src,E2.src,E3.src 
4 | from followedby E1, followedby E2, followedby E3
5 | where E1.dest=E2.src and E2.dest=E3.src and E3.dest=E1.src -- triangle select
6 | and E1.src < E2.src and E2.src < E3.src; -- no duplicates
7 | 


--------------------------------------------------------------------------------
/applications/join/utility.cpp:
--------------------------------------------------------------------------------
 1 | #include "utility.hpp"
 2 | #include <vector>
 3 | 
 4 | int64_t fourth_root(int64_t x) {
 5 |   // index pow 4
 6 |   std::vector<int64_t> powers = {0, 1, 16, 81, 256, 625, 1296, 2401};
 7 |   int64_t ind = powers.size() / 2;
 8 |   int64_t hi = powers.size()-1;
 9 |   int64_t lo = 0;
10 |   while(true) {
11 |     if (x == powers[ind]) {
12 |       return ind;
13 |     } else if (x > powers[ind]) {
14 |       int64_t next = (ind+hi)/2;
15 |       if (next - ind == 0) {
16 |         return ind;
17 |       }
18 |       lo = ind;
19 |       ind = next;
20 |     } else {
21 |       int64_t next = (ind+lo)/2;
22 |       hi = ind;
23 |       ind = next;
24 |     }
25 |   }
26 | }
27 | 
28 | 
29 | std::function<int64_t (int64_t)> makeHash( int64_t dim ) {
30 |   // identity
31 |   return [dim](int64_t x) { return x % dim; };
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/applications/join/utility.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <unordered_set>
 5 | #include <utility>
 6 | #include <functional>
 7 | 
 8 | 
 9 | typedef std::pair<int64_t,int64_t> pair_t;
10 | namespace std {
11 |   template <> struct hash<pair_t> {
12 |     size_t operator()(const pair_t& x) const {
13 |       static int64_t p = 32416152883; // prime
14 |       return p*x.first + x.second; 
15 |     }
16 |   };
17 | }
18 | 
19 | 
20 | 
21 | int64_t fourth_root(int64_t x);
22 | 
23 | std::function<int64_t (int64_t)> makeHash( int64_t dim );
24 | 


--------------------------------------------------------------------------------
/applications/nativegraph/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(bfs)
2 | add_subdirectory(cc)
3 | add_subdirectory(sssp)
4 | 


--------------------------------------------------------------------------------
/applications/nativegraph/README.md:
--------------------------------------------------------------------------------
1 | Simple Graph Algorithms
2 | -----------------------
3 | 
4 | This directory contains some graph algorithms implemented directly against Grappa's Graph data structure. These can be contrasted against the implementations in `applications/graphlab`, which are implemented at a higher level using the GraphLab API emulation.
5 | 
6 | Be warned, in some cases, for instance `bfs/bfs_beamer`, this "native" version is the fastest implementation, but in many cases, the GraphLab version is better optimized and more efficient, and this `simplegraph` version is more for demonstration purposes.
7 | 


--------------------------------------------------------------------------------
/applications/nativegraph/bfs/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(SOURCES main.cpp common.hpp)
2 | 
3 | add_grappa_application(bfs_queues.exe bfs_queues.cpp ${SOURCES})
4 | add_grappa_application(bfs_spmd.exe bfs_spmd.cpp ${SOURCES})
5 | add_grappa_application(bfs_beamer.exe bfs_beamer.cpp ${SOURCES})
6 | 


--------------------------------------------------------------------------------
/applications/nativegraph/bfs/common.hpp:
--------------------------------------------------------------------------------
 1 | #include <Grappa.hpp>
 2 | #include <GlobalVector.hpp>
 3 | #include <graph/Graph.hpp>
 4 | #include "../verifier.hpp"
 5 | 
 6 | using namespace Grappa;
 7 | 
 8 | // additional data to attach to each vertex in the graph
 9 | struct BFSData {
10 |   int64_t parent;
11 |   int64_t level;
12 |   bool seen;
13 |   
14 |   void init() {
15 |     parent = -1;
16 |     level = 0;
17 |     seen = false;
18 |   }
19 | };
20 | 
21 | using G = Graph<BFSData,Empty>;
22 | 
23 | extern int64_t nedge_traversed;
24 | 
25 | void bfs(GlobalAddress<G> g, int nbfs, TupleGraph tg);
26 | 
27 | template< typename V, typename E >
28 | inline int64_t choose_root(GlobalAddress<Graph<V,E>> g) {
29 |   int64_t root;
30 |   do {
31 |     root = random() % g->nv;
32 |   } while (delegate::call(g->vs+root,[](typename G::Vertex& v){ return v.nadj; }) == 0);
33 |   return root;
34 | }
35 | 
36 | inline int64_t verify(TupleGraph tg, GlobalAddress<G> g, int64_t root) {
37 |   return VerificatorBase<G>::verify(tg, g, root);
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/applications/nativegraph/cc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_grappa_application(cc_kahan.exe main.cpp cc_kahan.hpp)
3 | 


--------------------------------------------------------------------------------
/applications/nativegraph/sssp/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_grappa_application(sssp.exe sssp.cpp sssp.hpp ../verifier.hpp)
2 | 


--------------------------------------------------------------------------------
/applications/pagerank/.gitignore:
--------------------------------------------------------------------------------
1 | *.d
2 | *.igor
3 | 


--------------------------------------------------------------------------------
/applications/pagerank/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(SOURCES
 2 |   spmv_mult.cpp
 3 |   spmv_mult.hpp
 4 | )
 5 | 
 6 | add_definitions(-Drestrict=__restrict__ -DGRAPH_GENERATOR_GRAPPA -D_GRAPPA)
 7 | 
 8 | add_grappa_application(pagerank.exe ${SOURCES} pagerank.cpp)
 9 | 
10 | add_grappa_application(mult.exe
11 |   ${SOURCES} mult_main.cpp
12 | )
13 | target_link_libraries(mult.exe generator)
14 | 


--------------------------------------------------------------------------------
/applications/pagerank/README.md:
--------------------------------------------------------------------------------
1 | This directory contains three Grappa programs.
2 | * pagerank.exe: A version of Pagerank using our current graph representation
3 | * pagerank_old.exe: A version of Pagerank using our previous graph representation
4 | * mult.exe: A matrix multiply example
5 | 


--------------------------------------------------------------------------------
/applications/pagerank/spmv_mult.hpp:
--------------------------------------------------------------------------------
 1 | // graph500/grappa/
 2 | // XXX shouldn't have to include this first: common.h and oned_csr.h have cyclic dependency
 3 | #pragma once
 4 | 
 5 | #include <Grappa.hpp>
 6 | #include <graph/Graph.hpp>
 7 | 
 8 | #include <iostream>
 9 | 
10 | using vindex = int;
11 | 
12 | struct PagerankData {
13 |   double * weights;
14 |   double v[2];
15 | };
16 | using PagerankVertex = Grappa::Vertex<PagerankData>;
17 | 
18 | void spmv_mult(GlobalAddress<Grappa::Graph<PagerankVertex>> g, vindex x, vindex y);
19 | 


--------------------------------------------------------------------------------
/applications/sort/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(grappa)
2 | 


--------------------------------------------------------------------------------
/applications/sort/grappa/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set(SOURCES
 3 |   main.cpp
 4 |   npb_intsort.h
 5 | )
 6 | 
 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
 8 | add_definitions(-Drestrict=__restrict__)
 9 | 
10 | add_grappa_application(sort.exe ${SOURCES})
11 | 


--------------------------------------------------------------------------------
/applications/sort/grappa/npb_intsort.h:
--------------------------------------------------------------------------------
 1 | 
 2 | enum npb_class                  {  S,  W,  A,  B,  C,  D, None = -1 };
 3 | static const int NKEY_LOG2[]    = { 16, 20, 23, 25, 27, 29 };
 4 | static const int MAX_KEY_LOG2[] = { 11, 16, 19, 21, 23, 27 };
 5 | static const int NBUCKET_LOG2[] = { 10, 10, 10, 10, 10, 10 };
 6 | 
 7 | inline npb_class get_npb_class(char c) {
 8 |   switch (c) {
 9 |     case 'S': return S;
10 |     case 'W': return W;
11 |     case 'A': return A;
12 |     case 'B': return B;
13 |     case 'C': return C;
14 |     case 'D': return D;
15 |     default: return None;
16 |   }
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/applications/sort/grappa/test.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require "./sort.rb"
 3 | 
 4 | $params = {
 5 |   scale: [16],
 6 |   log2buckets: [7],
 7 |   log2maxkey: [10],
 8 |   nnode: [12],
 9 |   ppn: [2],
10 |   nworkers: [1024],
11 |   flushticks: [2000000],
12 |   pollticks: [20000],
13 |   chunksize: [64],
14 |   threshold: [64],
15 |   io_blocks_per_node: [1],
16 |   io_blocksize_mb: [512],
17 |   nproc: expr('nnode*ppn'),
18 |   machine: [$machinename],
19 | }
20 | $opt_force = true
21 | 
22 | if __FILE__ == $PROGRAM_NAME
23 |   run_experiments($cmd, $params, $dbpath, $table, &$json_plus_fields_parser)
24 | end
25 | 


--------------------------------------------------------------------------------
/applications/util/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | file(GLOB UTILS
 3 |   "*/*.cpp"
 4 | )
 5 | 
 6 | # make separate build targets for each utility
 7 | foreach(file ${UTILS})
 8 |   get_filename_component(base ${file} NAME_WE)
 9 |   add_grappa_exe(util-${base} ${base}.exe ${file})
10 |   set_property(TARGET ${name} PROPERTY FOLDER "Applications")      
11 | endforeach()
12 | 
13 | 
14 | 
15 | # TODO: this should really just enabled with tracing
16 | 
17 | macro(add_vampir_exe target exe )
18 |   add_executable(${target} EXCLUDE_FROM_ALL ${ARGN})
19 |   set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_NAME "${exe}")
20 |   target_link_libraries(${target} 
21 |     Grappa
22 |     open-trace-format
23 |     sqlite3
24 |   )
25 | endmacro(add_vampir_exe)
26 | 
27 | macro(add_vampir_application name)
28 |   add_vampir_exe(${name} ${name} ${ARGN})
29 |   set_property(TARGET ${name} PROPERTY FOLDER "Applications") # For organization in Xcode project
30 | endmacro(add_vampir_application)
31 | 
32 | add_vampir_application(otf2sqlite.exe "otf2sqlite.cpp")
33 | 


--------------------------------------------------------------------------------
/applications/uts/.gitignore:
--------------------------------------------------------------------------------
1 | config.in
2 | out.txt
3 | uts-mem-shm
4 | .igor
5 | 


--------------------------------------------------------------------------------
/applications/uts/AUTHORS:
--------------------------------------------------------------------------------
 1 | The Unbalanced Tree Search (UTS) Project Team:
 2 | ---------------------------------------------
 3 | 
 4 |   University of Maryland:
 5 |     Bill Pugh        <pugh,
 6 |     Chau-Wen Tseng*   tseng at cs.umd.edu>
 7 | 
 8 |   The Ohio State University:
 9 |     James Dinan      <dinan,
10 |     Gerald Sabin      sabin,
11 |     P. Sadayappan*    saday at cse.ohio-state.edu>
12 | 
13 |   University of North Carolina, Chapel Hill:
14 |     Stephen Olivier  <olivier,
15 |     Jun Huan          huan,
16 |     Jinze Liu         liu,
17 |     Jan Prins*        prins at cs.unc.edu>
18 |   
19 |   Supercomputing Research Center:
20 |     Daniel Pryor
21 | 
22 |   * - indicates project PI
23 | 


--------------------------------------------------------------------------------
/applications/uts/Changelog:
--------------------------------------------------------------------------------
1 | 1.0.1 - Released 2/10/2010
2 | 
3 |  * Added uts_upc_enhanced to the distribution.
4 | 
5 | 1.0 - Released 11/11/2009
6 | 
7 |  * Initial public release of the benchmark.
8 | 


--------------------------------------------------------------------------------
/applications/uts/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 See AUTHORS file for copyright holders
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/applications/uts/check_ctrk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # On Cray:
 3 | #!/opt/open/open/bin/perl
 4 | 
 5 | 
 6 | while (<>) {
 7 | 	next unless (/^CTRK/);
 8 | 
 9 | 	# Capture the work ID into $1
10 | 	/(0x[0-9A-Fa-f]+$)/;
11 | 	$id = $1;
12 | 
13 | 	if (/put chunk/) {
14 | 		$hash{$id}++;
15 | 		$nreleased++;
16 | 	}
17 | 	elsif (/got chunk/) {
18 | 		$hash{$id}--;
19 | 		$nacquired++;
20 | 	}
21 | #	elsif (!/TERMINATING/) {
22 | #		print "Warning: malformed entry.  $_";
23 | #	}
24 | }
25 | 
26 | print "Total Put = " . $nreleased . ", Total Got = " . $nacquired . "\n";
27 | 
28 | $errors = 0;
29 | 
30 | while(($key, $value) = each %hash) {
31 | 	($value > 0) and print "Never got: $key ($value)\n" and $errors++;
32 | 	($value < 0) and print "Never put: $key ($value)\n" and $errors++;
33 | }
34 | 
35 | print "$errors errors\n";
36 | 


--------------------------------------------------------------------------------
/applications/uts/configure.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONFDIR="config"
 4 | CONFFILE="config.in"
 5 | MYNAME="configure.sh"
 6 | 
 7 | function usage () {
 8 | 
 9 | 	echo "UTS - Unbalanced Tree Search Configuration.  Selects from available"
10 | 	echo "      configurations in the '$CONFDIR' directory."
11 | 	echo
12 | 	echo " Usage: $MYNAME CONFIGURATION_NAME"
13 | 	echo
14 | 	echo " Available Configurations:"
15 | 
16 | 	for file in ${CONFDIR}/*
17 | 	do
18 | 		[ -r $file ] && [ ! -d $file ] && echo "   $(echo $file | cut -d/ -f2)"
19 | 	done
20 | 
21 | }
22 | 
23 | if [ ! -d $CONFDIR ]
24 | then
25 | 	echo "Fatal error: Unable to access the config file directory, '$CONFDIR'!"
26 | 	exit 1
27 | fi
28 | 
29 | if [ $# -lt 1 ] || [ $1 = '-h' ] || [ $1 = '--help' ]
30 | then
31 | 	usage
32 | 	exit 0
33 | fi
34 | 
35 | if [ -r $CONFDIR/$1 ]
36 | then
37 | 	ln -sf $CONFDIR/$1 $CONFFILE
38 | 
39 | 	echo
40 | 	echo "Configuration changed.  Please review '$CONFDIR/$1' to ensure the"
41 | 	echo "new settings are correct."
42 | 	echo
43 | else
44 | 	echo "Could not find configuration file: $1"
45 | 	exit 1
46 | fi
47 | 


--------------------------------------------------------------------------------
/applications/uts/dlist.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *         ---- The Unbalanced Tree Search (UTS) Benchmark ----
 3 |  *  
 4 |  *  Copyright (c) 2010 See AUTHORS file for copyright holders
 5 |  *
 6 |  *  This file is part of the unbalanced tree search benchmark.  This
 7 |  *  project is licensed under the MIT Open Source license.  See the LICENSE
 8 |  *  file for copyright and licensing information.
 9 |  *
10 |  *  UTS is a collaborative project between researchers at the University of
11 |  *  Maryland, the University of North Carolina at Chapel Hill, and the Ohio
12 |  *  State University.  See AUTHORS file for more information.
13 |  *
14 |  */
15 | 
16 | #ifndef DLIST_H
17 | #define DLIST_H
18 | 
19 | typedef struct dcell *dlist;
20 | 
21 | struct dcell
22 | {
23 |   void *element;
24 |   dlist next;
25 |   dlist prev;
26 | };
27 | 
28 | extern dlist dcons(void *element, dlist prev, dlist next);
29 | extern dlist create_and_link(void *element, dlist prev, dlist next);
30 | extern void* unlink_and_free(dlist l);
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/applications/uts/rng/rng.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RNG_H
 2 | #define _RNG_H
 3 | 
 4 | /***********************************************************
 5 |  *                                                         *
 6 |  *  splitable random number generator to use:              *
 7 |  *     (default)  sha1 hash                                *
 8 |  *     (UTS_ALFG) additive lagged fibonacci generator      *
 9 |  *                                                         *
10 |  ***********************************************************/
11 | 
12 | #if defined(UTS_ALFG)
13 | #  include "alfg.h"
14 | #  define RNG_TYPE 1
15 | #elif defined(BRG_RNG)
16 | #  include "brg_sha1.h"
17 | #  define RNG_TYPE 0
18 | #elif defined(DEVINE_RNG)
19 | #  include "devine_sha1.h"
20 | #  define RNG_TYPE 0
21 | #else
22 | #  error "No random number generator selected."
23 | #endif
24 | 
25 | #endif /* _RNG_H */
26 | 


--------------------------------------------------------------------------------
/applications/uts/shared_dlist.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *         ---- The Unbalanced Tree Search (UTS) Benchmark ----
 3 |  *  
 4 |  *  Copyright (c) 2010 See AUTHORS file for copyright holders
 5 |  *
 6 |  *  This file is part of the unbalanced tree search benchmark.  This
 7 |  *  project is licensed under the MIT Open Source license.  See the LICENSE
 8 |  *  file for copyright and licensing information.
 9 |  *
10 |  *  UTS is a collaborative project between researchers at the University of
11 |  *  Maryland, the University of North Carolina at Chapel Hill, and the Ohio
12 |  *  State University.  See AUTHORS file for more information.
13 |  *
14 |  */
15 | 
16 | #ifndef SHARED_DLIST_H
17 | #define SHARED_DLIST_H
18 | 
19 | #include <upc_relaxed.h>
20 | 
21 | typedef shared struct shr_dcell * shr_dlist;
22 | 
23 | struct shr_dcell
24 | {
25 |   shared void *element;
26 |   shr_dlist next;
27 |   shr_dlist prev;
28 | };
29 | 
30 | extern shr_dlist shr_dcons(shared void *element, shr_dlist prev, shr_dlist next);
31 | extern shr_dlist shr_create_and_link(shared void *element, shr_dlist prev, shr_dlist next);
32 | extern shared void* shr_unlink_and_free(shr_dlist l);
33 | 
34 | #endif /* SHARED_DLIST_H */
35 | 


--------------------------------------------------------------------------------
/bin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # file(COPY . DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 2 | file(GLOB scripts "*")
 3 | foreach(file ${scripts})
 4 |   get_filename_component(name ${file} NAME)
 5 |   file(RELATIVE_PATH relative_file ${CMAKE_CURRENT_BINARY_DIR} ${file})
 6 |   execute_process(COMMAND
 7 |     ln -sf ${relative_file} ${CMAKE_CURRENT_BINARY_DIR}/${name}
 8 |   )
 9 | endforeach()
10 | 
11 | install(PROGRAMS settings.sh DESTINATION "bin")
12 | 


--------------------------------------------------------------------------------
/bin/distcc_make:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # calls 'salloc', launches distcc on each of the nodes of the allocation, 
 3 | # and fires up a new bash shell with  DISTCC_HOSTS set up
 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 5 | 
 6 | nnode=${DISTCC_NNODE-8}
 7 | partition=${DISTCC_PARTITION} # use default slurm partition if none specified
 8 | 
 9 | if [ ! -z $partition ]; then
10 |   partitionarg=-p$partition
11 | fi
12 | 
13 | exec salloc -N$nnode $partitionarg "$DIR/launch_distcc.sh" make "$@"
14 | 
15 | # note: for Bash Completion to work with this, find the bash_completion/make and add 'distcc_make' to the list of make commands to complete for (near the end of the file)
16 | 


--------------------------------------------------------------------------------
/bin/distcc_ninja:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # calls 'salloc', launches distcc on each of the nodes of the allocation, 
3 | # and fires up a new bash shell with  DISTCC_HOSTS set up
4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5 | 
6 | nnode=${DISTCC_NNODE-8}
7 | 
8 | exec salloc -N$nnode "$DIR/launch_distcc.sh" ninja -j $((nnode*4)) "$@"
9 | 


--------------------------------------------------------------------------------
/bin/launch_distcc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | ####################################################
 3 | # launch distcc on slurm allocation
 4 | # assumes that distcc is already running on those nodes
 5 | # usage: salloc -N4 distcc.sh make -j
 6 | ####################################################
 7 | nodelist=`scontrol show hostname $SLURM_JOB_NODELIST | xargs`
 8 | # hosts="--randomize"
 9 | # for n in $nodelist; do
10 | #   hosts="$hosts $n,cpp,lzo"
11 | # done
12 | hosts="--randomize $nodelist" # non-pump mode
13 | export DISTCC_HOSTS="$hosts"
14 | export PS1="(distcc) $PS1"
15 | echo "export DISTCC_HOSTS='$hosts'"
16 | exec "$@"
17 | 


--------------------------------------------------------------------------------
/bin/settings.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # find Grappa installation location
 4 | SCRIPT_PATH="${BASH_SOURCE[0]}";
 5 | if ([ -h "${SCRIPT_PATH}" ])
 6 | then
 7 |     while([ -h "${SCRIPT_PATH}" ])
 8 |     do
 9 |         SCRIPT_PATH=`readlink "${SCRIPT_PATH}"`
10 |     done
11 | fi
12 | pushd . > /dev/null
13 | cd `dirname ${SCRIPT_PATH}` > /dev/null
14 | SCRIPT_PATH=`pwd`
15 | cd ..
16 | GRAPPA_PREFIX=`pwd`
17 | popd  > /dev/null
18 | 
19 | # make Grappa installation location visible
20 | export GRAPPA_PREFIX
21 | 
22 | # load important Grappa environment variables
23 | source $GRAPPA_PREFIX/bin/env.sh
24 | 
25 | 


--------------------------------------------------------------------------------
/bin/srun_epilog.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # for i in `ipcs -m | grep bholt | cut -d" " -f1`; do ipcrm -M $i; done
3 | ipcs -m | grep $USER | awk '{print $2}' | xargs -n1 -r ipcrm -m
4 | rm -f /dev/shm/GrappaLocaleSharedMemory
5 | 


--------------------------------------------------------------------------------
/doc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(tutorial)
2 | 


--------------------------------------------------------------------------------
/doc/testing.md:
--------------------------------------------------------------------------------
 1 | Testing in Grappa
 2 | ===============================================================================
 3 | We use Boost::Test to test our code.
 4 | 
 5 | The full list of unit tests is found in `system/CMakeLists.txt`. Here, a macro `add_check` is used to define a test and tell whether it is currently expected to pass or fail.
 6 | 
 7 | Each test defined in this way creates two targets: `*.test` which builds the test, and `check-*`, which runs the test. In addition, there are aggregate targets `check-all-{pass,fail}` which build and run all the passing or failing tests respectively, and `check-all-{pass,fail}-compile-only` which, as the name implies, only compiles them.
 8 | 
 9 | Non-exhaustive list of test targets:
10 | - `New_loop_tests.test`: build loop tests
11 | - `check-New_loop_tests`: build and run loop tests
12 | - `check-all-pass`: build and run all passing tests
13 | - `check-all-pass-compile-only`: just build all the tests expected to pass
14 | 
15 | Someday we'll get this up and running with some CI server, but until then, we just try and run it whenever we make significant changes.
16 | 


--------------------------------------------------------------------------------
/doc/tutorial/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB TUTORIAL_SOURCES
 2 |   "*.cpp"
 3 | )
 4 | 
 5 | foreach(file ${TUTORIAL_SOURCES})
 6 |   get_filename_component(base ${file} NAME_WE)
 7 |   add_grappa_exe(tutorial-${base} ${base}.exe ${file})
 8 |   set_property(TARGET ${name} PROPERTY FOLDER "Tutorial")
 9 | endforeach()
10 | 


--------------------------------------------------------------------------------
/doc/tutorial/addressing_linear.cpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////
 2 | // tutorial/addressing_linear.cpp
 3 | //////////////////////////////////
 4 | #include <Grappa.hpp>
 5 | #include <GlobalAllocator.hpp>
 6 | 
 7 | using namespace Grappa;
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   init(&argc, &argv);
11 |   run([]{
12 |     auto array = global_alloc<long>(48);
13 |     for (auto i=0; i<48; i++) {
14 |       std::cout << "[" << i << ": core " << (array+i).core() << "] ";
15 |     }
16 |     std::cout << "\n";
17 |   });
18 |   finalize();
19 | }
20 | 
21 | //> srun --nodes=2 --ntasks-per-node=2 -- tutorial/addressing_linear.exe
22 | 


--------------------------------------------------------------------------------
/doc/tutorial/addressing_symmetric.cpp:
--------------------------------------------------------------------------------
 1 | /////////////////////////////////////
 2 | // tutorial/addressing_symmetric.cpp
 3 | /////////////////////////////////////
 4 | #include <Grappa.hpp>
 5 | #include <Collective.hpp>
 6 | #include <GlobalAllocator.hpp>
 7 | 
 8 | using namespace Grappa;
 9 | 
10 | struct Data {
11 |   size_t N;
12 |   long *buffer;
13 |   
14 |   void init(size_t N) {
15 |     this->N = N;
16 |     this->buffer = new long[32];
17 |   }
18 | } GRAPPA_BLOCK_ALIGNED;
19 | 
20 | int main(int argc, char *argv[]) {
21 |   init(&argc, &argv);
22 |   run([]{    
23 |     // allocate a copy of Data on every core out of the global heap
24 |     GlobalAddress<Data> d = symmetric_global_alloc< Data >();
25 |     
26 |     on_all_cores([d]{
27 |       // use `->` overload to get pointer to local copy to call the method on
28 |       d->init(1024);
29 |     });
30 |     
31 |     // now we have a local copy of the struct available anywhere
32 |     on_all_cores([d]{
33 |       d->buffer[0] = d->N;
34 |     });
35 |   });
36 |   finalize();
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/doc/tutorial/delegates.cpp:
--------------------------------------------------------------------------------
 1 | ///////////////////////////
 2 | // tutorial/delegates.cpp
 3 | ///////////////////////////
 4 | #include <Grappa.hpp>
 5 | #include <Delegate.hpp>
 6 | #include <Collective.hpp>
 7 | #include <GlobalAllocator.hpp>
 8 | #include <iostream>
 9 | 
10 | using namespace Grappa;
11 | 
12 | int main(int argc, char *argv[]) {
13 |   init(&argc, &argv);
14 |   run([]{
15 |     
16 |     size_t N = 50;
17 |     GlobalAddress<long> array = global_alloc<long>(N);
18 |     
19 |     // simple global write
20 |     for (size_t i = 0; i < N; i++) {
21 |       // array[i] = i
22 |       delegate::write( array+i, i );
23 |     }
24 |     
25 |     for (size_t i = 0; i < N; i += 10) {
26 |       // simple remote read
27 |       // value = array[i]
28 |       long value = delegate::read( array+i );
29 |       std::cout << "[" << i << "] = " << value;
30 |       
31 |       // do some arbitrary computation on the core that owns `array+i`
32 |       double v = delegate::call(array+i, [](long *a){ return tan(*a); });
33 |       std::cout << ", tan = " << v << std::endl;
34 |     }
35 |        
36 |   });
37 |   finalize();
38 | }
39 | 


--------------------------------------------------------------------------------
/doc/tutorial/hello_world_1.cpp:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////
 2 | // tutorial/hello_world_1.cpp
 3 | ///////////////////////////////
 4 | #include <Grappa.hpp>
 5 | #include <iostream>
 6 | int main(int argc, char *argv[]) {
 7 |   // this code is running on all cores
 8 |   
 9 |   // initialize Grappa
10 |   Grappa::init(&argc, &argv);
11 | 
12 |   // spawn the root task
13 |   Grappa::run([]{
14 |     // this code is running as a task on a single core
15 |     std::cout << "Hello world from the root task!\n";
16 |   });
17 | 
18 |   // shutdown Grappa
19 |   Grappa::finalize();
20 | }
21 | 


--------------------------------------------------------------------------------
/doc/tutorial/hello_world_2.cpp:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////
 2 | // tutorial/hello_world_2.cpp
 3 | ///////////////////////////////
 4 | #include <Grappa.hpp>
 5 | #include <Collective.hpp>
 6 | #include <iostream>
 7 | int main(int argc, char *argv[]) {
 8 | 
 9 |   Grappa::init(&argc, &argv);
10 | 
11 |   Grappa::run([]{
12 |     std::cout << "Hello world from the root task!\n";
13 | 
14 |     // SPMD execution on all cores
15 |     Grappa::on_all_cores([]{
16 |       std::cout << "Hello world from Core " << Grappa::mycore() << " of " << Grappa::cores()
17 |                 << " (locale " << Grappa::mylocale() << ")"<< "\n";
18 |     });
19 |     std::cout << "Exiting root task.\n";
20 |   });
21 | 
22 |   Grappa::finalize();
23 | }
24 | 


--------------------------------------------------------------------------------
/scratch/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # make separate build targets for each cpp file in scratch/ (must reconfigure after adding file)
 2 | # ex:
 3 | # > touch scratch/test.cpp
 4 | # > make rebuild_cache
 5 | # > make scratch-test
 6 | # > grappa_srun -- scratch/test.exe
 7 | 
 8 | file(GLOB SCRATCHES
 9 |   "*.cpp"
10 | )
11 | 
12 | foreach(file ${SCRATCHES})
13 |   get_filename_component(base ${file} NAME_WE)
14 |   add_grappa_exe(scratch-${base} ${base}.exe ${file})
15 |   set_property(TARGET ${name} PROPERTY FOLDER "Scratch")
16 | endforeach()
17 | 


--------------------------------------------------------------------------------
/system/Grappa.md:
--------------------------------------------------------------------------------
1 | Grappa: Developer Documentation    {#mainpage}
2 | ===========================
3 | These pages are the API documentation for the Grappa runtime system. For beginners, we recommend first reading through the <a href="https://github.com/uwsampa/grappa/blob/master/doc/tutorial/tutorial.md">tutorial</a> on Github, as it will explain the programming model and main ideas. For other information about the project, including technical papers about the techniques, we refer readers to the project website: [grappa.io](http://grappa.io).
4 | 
5 | Grappa is a runtime system for scaling irregular applications on commodity clusters. It's a PGAS library and runtime system that allows you to write global-view C++11 code that runs on distributed-memory computers.
6 | 
7 | Grappa is a research project and is still young! Please expect things to break. Please do not expect amazing performance yet. Please ask for help if you run into problems. We're excited for you to use the software and to help make Grappa a great tool for the irregular applications community! To find answers to questions or submit new ones, please use [Github Issues](https://github.com/uwsampa/grappa/issues).
8 | 


--------------------------------------------------------------------------------
/system/NTBuffer.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "NTBuffer.hpp"
 3 | 
 4 | namespace Grappa {
 5 | namespace impl {
 6 | 
 7 | int NTBuffer::initial_offset = 0;
 8 | 
 9 | } // namespace impl
10 | } // namespace Grappa
11 | 


--------------------------------------------------------------------------------
/system/NTMessage.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "NTMessage.hpp"
 3 | 
 4 | #include <x86intrin.h>
 5 | 
 6 | namespace Grappa {
 7 | namespace impl {
 8 | 
 9 | std::ostream& operator<<( std::ostream& o, const NTMessageBase& m ) {
10 |   uint64_t fp = m.fp_;
11 |   return o << "<Amessage core:" << m.dest_ << " size:" << m.size_ << " fp:" << (void*) fp << ">";
12 | }
13 | 
14 | char * deaggregate_nt_buffer( char * buf, size_t size ) {
15 |   const char * end = buf + size;
16 |   while( buf < end ) {
17 | #ifdef USE_NT_OPS
18 |     _mm_prefetch( buf, _MM_HINT_NTA );
19 |     _mm_prefetch( buf+64, _MM_HINT_NTA );
20 | #endif
21 |     char * next = buf + 8;
22 |     if( 0 != *(reinterpret_cast<uint64_t*>(buf)) ) {
23 |       auto mb = reinterpret_cast<NTMessageBase*>(buf);
24 |       uint64_t fp_int = mb->fp_;
25 |       auto fp = reinterpret_cast<deserializer_t>(fp_int);
26 |       DVLOG(5) << "Deserializing with " << (void*) fp << "/" << *mb << " at " << (void*) buf;
27 |       next = (*fp)(buf);
28 |     } else {
29 |       DVLOG(5) << "Skipping a word at " << (void*) buf;
30 |     }
31 |     buf = next;
32 |   }
33 |   return buf;
34 | }
35 | 
36 | } // namespace impl
37 | } // namespace Grappa
38 | 
39 | 


--------------------------------------------------------------------------------
/system/doxygen_footer.html:
--------------------------------------------------------------------------------
 1 | <!-- HTML footer for doxygen 1.8.6-->
 2 | <!-- start footer part -->
 3 | <!--BEGIN GENERATE_TREEVIEW-->
 4 | <!-- <div id="nav-path" class="navpath">
 5 |   <ul>
 6 |     $navpath
 7 |     <li class="footer">$generatedby
 8 |     <a href="http://www.doxygen.org/index.html">
 9 |     <img class="footer" src="$relpath^doxygen.png" alt="doxygen"/></a> $doxygenversion </li>
10 |   </ul>
11 | </div> -->
12 | <!--END GENERATE_TREEVIEW-->
13 | <!--BEGIN !GENERATE_TREEVIEW-->
14 | <!-- <hr class="footer"/><address class="footer"><small> -->
15 | <!-- $generatedby &#160;<a href="http://www.doxygen.org/index.html"> -->
16 | <!-- <img class="footer" src="$relpath^doxygen.png" alt="doxygen"/> -->
17 | <!-- </a> $doxygenversion -->
18 | <!-- </small></address> -->
19 | <!--END !GENERATE_TREEVIEW-->
20 | </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/system/runlatencyswitch.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require '../experiment_utils'
 3 | 
 4 | 
 5 | db = "context_switch.db"
 6 | table = :context_switch_latency
 7 | 
 8 | cmd = "make mpi_test TARGET=ContextSwitchLatency_tests.test \
 9 | NNODE=%{nnode} \
10 | PPN=%{ppn} \
11 | VERBOSE_TESTS=1 \
12 | SRUN_FLAGS=--time=5 \
13 | GARGS=' \
14 | --lines=%{touched_cachelines}' 2>&1 |tee out.txt"
15 | 
16 | 
17 | params = {
18 |     trial: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
19 |     nnode: [1],
20 |     ppn: [1],
21 |     #total_iterations: [51200000],
22 |     machine: ['cluster'],
23 |     touched_cachelines: [1,4,16,32,128,512,1024,8192,16000,50000,150000,500000,1000000,1500000,4000000],
24 |     problem: ['switch_latency'],
25 | }
26 | 
27 | 
28 | parser = lambda{ |cmdout|
29 |     records = {}
30 | 
31 |     # parse experiment specific results
32 |     dict = /time = (?<switch_time>\d+\.\d+e-\d+)/.match(cmdout).dictionize
33 |     
34 |     if dict.empty? then
35 |         raise "Output string does not match"
36 |     end
37 | 
38 |     records.merge!(dict)
39 | 
40 |     records
41 | }
42 | 
43 | run_experiments(cmd, params, db, table, &parser)
44 | 


--------------------------------------------------------------------------------
/system/tests/igor_datastructs.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'igor'
 3 | 
 4 | # inherit parser, sbatch_flags
 5 | require_relative '../util/igor_common.rb'
 6 | 
 7 | Igor do
 8 |   database '~/exp/pgas.sqlite', :queue
 9 |   
10 |   @params.merge! GFLAGS
11 |   
12 |   @sbatch_flags.delete_if{|e| e =~ /--time/} << "--time=1:00:00"
13 |   
14 |   @test_cmd = -> test, extras { %Q[ ../bin/grappa_srun --test=#{test} --no-verbose -- #{GFLAGS.expand} #{extras}] }
15 |   command @test_cmd['GlobalVector_tests','']
16 |     
17 |   params {
18 |     nnode 2
19 |     ppn   1
20 |   }
21 |   
22 |   interact # enter interactive mode
23 | end
24 | 


--------------------------------------------------------------------------------
/system/tests/igor_hashmap.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require_relative 'igor_hashset'
 3 | 
 4 | Igor do
 5 |   @dbtable = :hashmap
 6 |   
 7 |   command @test_cmd['GlobalHash_tests', '--map_perf']
 8 |   
 9 |   interact
10 | end
11 | 


--------------------------------------------------------------------------------
/system/tests/igor_hashset.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require_relative 'igor_datastructs'
 3 | 
 4 | Igor do
 5 |   @dbtable = :hashset
 6 |   
 7 |   include Isolatable
 8 |   isolate 'GlobalHash_tests.test'
 9 |   
10 |   GFLAGS.merge!({
11 |     nelems: [1024],
12 |     ntrials: [1],
13 |     max_key: [1024],
14 |     global_hash_size: [1024],
15 |     fraction_lookups: [0.5],
16 |     insert_async: [0],
17 |   })
18 |   @params.merge!(GFLAGS)
19 |   command @test_cmd['GlobalHash_tests', '--set_perf']
20 |   
21 |   params {
22 |     version 'fc_looks_fixed'
23 |     log_nelems 10; nelems expr('2**log_nelems')
24 |     log_max_key 10; max_key expr('2**log_max_key')
25 |     global_hash_size expr('2**log_max_key')
26 |     ntrials 1
27 |   }
28 |   
29 |   interact
30 | end
31 | 


--------------------------------------------------------------------------------
/system/tests/igor_queue.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require_relative 'igor_datastructs'
 3 | 
 4 | Igor do
 5 |   include Isolatable
 6 |   
 7 |   @dbtable = :queue
 8 |   
 9 |   isolate "GlobalVector_tests.test"
10 |   
11 |   GFLAGS.merge!({
12 |     ntrials: [1],
13 |     nelems: [1024],
14 |     vector_size: [1024],
15 |     fraction_push: [0.5],
16 |     flat_combining_local_only: [0],
17 |   })
18 |   @params.merge! GFLAGS
19 |   command @test_cmd['GlobalVector_tests', '--queue_perf']
20 |   
21 |   params {
22 |     version 'fixed_random'
23 |     log_nelems 10
24 |     nelems expr('2**log_nelems')
25 |     vector_size expr('(2**log_nelems)*2')
26 |     ntrials 1
27 |   }
28 |   
29 |   interact
30 | end
31 | 


--------------------------------------------------------------------------------
/system/tests/igor_stack.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require_relative 'igor_queue'
 3 | 
 4 | Igor do
 5 |   include Isolatable
 6 |   
 7 |   @dbtable = :stack
 8 |   
 9 |   isolate "GlobalVector_tests.test"
10 |   
11 |   command @test_cmd['GlobalVector_tests', '--stack_perf']
12 |   
13 |   params {
14 |     version 'matching_better'
15 |   }
16 |   
17 |   interact
18 | end
19 | 


--------------------------------------------------------------------------------
/system/tests/igor_tests.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'igor'
 3 | 
 4 | # inherit parser, sbatch_flags
 5 | require_relative '../util/igor_common.rb'
 6 | 
 7 | Igor do
 8 |   database '~/exp/test.sqlite', :vector
 9 |   
10 |   # isolate everything needed for the executable so we can sbcast them for local execution
11 |   params.merge!(GFLAGS)
12 |   
13 |   $cmd = -> { %Q[ ../bin/grappa_srun --no-verbose --test=%{name} -- #{GFLAGS.expand}] }
14 |   command $cmd[]
15 |   
16 |   sbatch_flags.delete_if{|e| e =~ /--time/} << "--time=15:00"
17 |   
18 |   params {
19 |     name 'GlobalVector_tests'
20 |     nnode 2
21 |     ppn   1
22 |     scale 10
23 |     nelems expr('2**scale')
24 |   }
25 |   
26 |   interact # enter interactive mode
27 | end
28 | 


--------------------------------------------------------------------------------
/system/utils/README:
--------------------------------------------------------------------------------
1 | Various misc utility functions,
2 | TODO: move general ones out of system/
3 | 


--------------------------------------------------------------------------------
/third-party/bashflags/test/bool.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DIR="${BASH_SOURCE%/*}"
 3 | source $DIR/../flags.bash
 4 | 
 5 | define_bool_flag 'foo' 'help text' 'f'
 6 | define_bool_flag 'bar' 'useless flag' 'b'
 7 | 
 8 | parse_flags $@
 9 | 
10 | if flags_true $FLAGS_foo && [ $FLAGS_foo = true ] && $FLAGS_foo; then
11 |   echo "$FLAGS_foo"
12 | else
13 |   echo "$FLAGS_foo"
14 | fi
15 | 


--------------------------------------------------------------------------------
/third-party/bashflags/test/echo.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DIR="${BASH_SOURCE%/*}"
 3 | source $DIR/../flags.bash
 4 | 
 5 | define_flag 'text' 'default' 'sample description' 't'
 6 | 
 7 | parse_flags $@
 8 | 
 9 | echo "text=$FLAGS_text,extra=$FLAGS_extra"
10 | 


--------------------------------------------------------------------------------
/third-party/downloads/README.md:
--------------------------------------------------------------------------------
1 | 
2 | Satisfying Grappa's third-party dependences without web access
3 | --------------------------------------------------------------
4 | 
5 | If you want to build Grappa on a machine without access to the web, and that machine doesn't already have all the third-party libraries installed that Grappa needs, you'll have to provide the source archives for those dependences yourself. 
6 | 
7 | To do so, download and untar the following file in ```third-party/downloads```. Then run ```configure```, including the ```--no-downloads``` flag.
8 | 
9 | [http://grappa.cs.washington.edu/files/grappa-third-party-downloads.tar](http://grappa.cs.washington.edu/files/grappa-third-party-downloads.tar)


--------------------------------------------------------------------------------
/third-party/google-glog/AUTHORS:
--------------------------------------------------------------------------------
1 | opensource@google.com
2 | 
3 | 


--------------------------------------------------------------------------------
/third-party/google-glog/NEWS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampa/grappa/69f2f3674d6f8e512e0bf55264bb75b972fd82de/third-party/google-glog/NEWS


--------------------------------------------------------------------------------
/third-party/google-glog/README:
--------------------------------------------------------------------------------
1 | This repository contains a C++ implementation of the Google logging
2 | module.  Documentation for the implementation is in doc/.
3 | 
4 | See INSTALL for (generic) installation instructions for C++: basically
5 |    ./configure && make && make install
6 | 


--------------------------------------------------------------------------------
/third-party/google-glog/README.windows:
--------------------------------------------------------------------------------
 1 | This project has begun being ported to Windows.  A working solution
 2 | file exists in this directory:
 3 |     google-glog.sln
 4 | 
 5 | You can load this solution file into VC++ 9.0 (Visual Studio
 6 | 2008).  You may also be able to use this solution file with older
 7 | Visual Studios by converting the solution file.
 8 | 
 9 | Note that stack tracing and some unittests are not ported
10 | yet.
11 | 
12 | You can also link glog code in statically -- see the example project
13 | libglog_static and logging_unittest_static, which does this.  For this
14 | to work, you'll need to add "/D GOOGLE_GLOG_DLL_DECL=" to the compile
15 | line of every glog's .cc file.
16 | 
17 | I have little experience with Windows programming, so there may be
18 | better ways to set this up than I've done!  If you run across any
19 | problems, please post to the google-glog Google Group, or report
20 | them on the google-glog Google Code site:
21 |    http://groups.google.com/group/google-glog
22 |    http://code.google.com/p/google-glog/issues/list
23 | 
24 | -- Shinichiro Hamaji
25 | 
26 | Last modified: 23 January 2009
27 | 


--------------------------------------------------------------------------------
/third-party/google-glog/libglog.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@prefix@
 2 | exec_prefix=@exec_prefix@
 3 | libdir=@libdir@
 4 | includedir=@includedir@
 5 | 
 6 | Name: libglog
 7 | Description: Google Log (glog) C++ logging framework
 8 | Version: @VERSION@
 9 | Libs: -L${libdir} -lglog
10 | Cflags: -I${includedir}
11 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/ac_have_attribute.m4:
--------------------------------------------------------------------------------
 1 | AC_DEFUN([AX_C___ATTRIBUTE__], [
 2 |   AC_MSG_CHECKING(for __attribute__)
 3 |   AC_CACHE_VAL(ac_cv___attribute__, [
 4 |     AC_TRY_COMPILE(
 5 |       [#include <stdlib.h>
 6 |        static void foo(void) __attribute__ ((unused));
 7 |        void foo(void) { exit(1); }],
 8 |       [],
 9 |       ac_cv___attribute__=yes,
10 |       ac_cv___attribute__=no
11 |     )])
12 |   if test "$ac_cv___attribute__" = "yes"; then
13 |     AC_DEFINE(HAVE___ATTRIBUTE__, 1, [define if your compiler has __attribute__])
14 |   fi
15 |   AC_MSG_RESULT($ac_cv___attribute__)
16 | ])
17 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/ac_have_builtin_expect.m4:
--------------------------------------------------------------------------------
 1 | AC_DEFUN([AX_C___BUILTIN_EXPECT], [
 2 |   AC_MSG_CHECKING(for __builtin_expect)
 3 |   AC_CACHE_VAL(ac_cv___builtin_expect, [
 4 |     AC_TRY_COMPILE(
 5 |       [int foo(void) { if (__builtin_expect(0, 0)) return 1; return 0; }],
 6 |       [],
 7 |       ac_cv___builtin_expect=yes,
 8 |       ac_cv___builtin_expect=no
 9 |     )])
10 |   if test "$ac_cv___builtin_expect" = "yes"; then
11 |     AC_DEFINE(HAVE___BUILTIN_EXPECT, 1, [define if your compiler has __builtin_expect])
12 |   fi
13 |   AC_MSG_RESULT($ac_cv___builtin_expect)
14 | ])
15 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/ac_have_sync_val_compare_and_swap.m4:
--------------------------------------------------------------------------------
 1 | AC_DEFUN([AX_C___SYNC_VAL_COMPARE_AND_SWAP], [
 2 |   AC_MSG_CHECKING(for __sync_val_compare_and_swap)
 3 |   AC_CACHE_VAL(ac_cv___sync_val_compare_and_swap, [
 4 |     AC_TRY_LINK(
 5 |       [],
 6 |       [int a; if (__sync_val_compare_and_swap(&a, 0, 1)) return 1; return 0;],
 7 |       ac_cv___sync_val_compare_and_swap=yes,
 8 |       ac_cv___sync_val_compare_and_swap=no
 9 |     )])
10 |   if test "$ac_cv___sync_val_compare_and_swap" = "yes"; then
11 |     AC_DEFINE(HAVE___SYNC_VAL_COMPARE_AND_SWAP, 1, [define if your compiler has __sync_val_compare_and_swap])
12 |   fi
13 |   AC_MSG_RESULT($ac_cv___sync_val_compare_and_swap)
14 | ])
15 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/ltversion.m4:
--------------------------------------------------------------------------------
 1 | # ltversion.m4 -- version numbers			-*- Autoconf -*-
 2 | #
 3 | #   Copyright (C) 2004 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # Generated from ltversion.in.
11 | 
12 | # serial 3017 ltversion.m4
13 | # This file is part of GNU Libtool
14 | 
15 | m4_define([LT_PACKAGE_VERSION], [2.2.6b])
16 | m4_define([LT_PACKAGE_REVISION], [1.3017])
17 | 
18 | AC_DEFUN([LTVERSION_VERSION],
19 | [macro_version='2.2.6b'
20 | macro_revision='1.3017'
21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
22 | _LT_DECL(, macro_revision, 0)
23 | ])
24 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/namespaces.m4:
--------------------------------------------------------------------------------
 1 | # Checks whether the compiler implements namespaces
 2 | AC_DEFUN([AC_CXX_NAMESPACES],
 3 |  [AC_CACHE_CHECK(whether the compiler implements namespaces,
 4 |                  ac_cv_cxx_namespaces,
 5 |                  [AC_LANG_SAVE
 6 |                   AC_LANG_CPLUSPLUS
 7 |                   AC_TRY_COMPILE([namespace Outer {
 8 |                                     namespace Inner { int i = 0; }}],
 9 |                                  [using namespace Outer::Inner; return i;],
10 |                                  ac_cv_cxx_namespaces=yes,
11 |                                  ac_cv_cxx_namespaces=no)
12 |                   AC_LANG_RESTORE])
13 |   if test "$ac_cv_cxx_namespaces" = yes; then
14 |     AC_DEFINE(HAVE_NAMESPACES, 1, [define if the compiler implements namespaces])
15 |   fi])
16 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/stl_namespace.m4:
--------------------------------------------------------------------------------
 1 | # We check what namespace stl code like vector expects to be executed in
 2 | 
 3 | AC_DEFUN([AC_CXX_STL_NAMESPACE],
 4 |   [AC_CACHE_CHECK(
 5 |       what namespace STL code is in,
 6 |       ac_cv_cxx_stl_namespace,
 7 |       [AC_REQUIRE([AC_CXX_NAMESPACES])
 8 |       AC_LANG_SAVE
 9 |       AC_LANG_CPLUSPLUS
10 |       AC_TRY_COMPILE([#include <vector>],
11 |                      [vector<int> t; return 0;],
12 |                      ac_cv_cxx_stl_namespace=none)
13 |       AC_TRY_COMPILE([#include <vector>],
14 |                      [std::vector<int> t; return 0;],
15 |                      ac_cv_cxx_stl_namespace=std)
16 |       AC_LANG_RESTORE])
17 |    if test "$ac_cv_cxx_stl_namespace" = none; then
18 |       AC_DEFINE(STL_NAMESPACE,,
19 |                 [the namespace where STL code like vector<> is defined])
20 |    fi
21 |    if test "$ac_cv_cxx_stl_namespace" = std; then
22 |       AC_DEFINE(STL_NAMESPACE,std,
23 |                 [the namespace where STL code like vector<> is defined])
24 |    fi
25 | ])
26 | 


--------------------------------------------------------------------------------
/third-party/google-glog/m4/using_operator.m4:
--------------------------------------------------------------------------------
 1 | AC_DEFUN([AC_CXX_USING_OPERATOR],
 2 |   [AC_CACHE_CHECK(
 3 |       whether compiler supports using ::operator<<,
 4 |       ac_cv_cxx_using_operator,
 5 |       [AC_LANG_SAVE
 6 |        AC_LANG_CPLUSPLUS
 7 |        AC_TRY_COMPILE([#include <iostream>
 8 |                        std::ostream& operator<<(std::ostream&, struct s);],
 9 |                       [using ::operator<<; return 0;],
10 |                       ac_cv_cxx_using_operator=1,
11 | 		      ac_cv_cxx_using_operator=0)
12 |       AC_LANG_RESTORE])
13 |   if test "$ac_cv_cxx_using_operator" = 1; then
14 |     AC_DEFINE(HAVE_USING_OPERATOR, 1, [define if the compiler supports using expression for operator])
15 |   fi])
16 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/README:
--------------------------------------------------------------------------------
1 | The list of files here isn't complete.  For a step-by-step guide on
2 | how to set this package up correctly, check out
3 |     http://www.debian.org/doc/maint-guide/
4 | 
5 | Most of the files that are in this directory are boilerplate.
6 | However, you may need to change the list of binary-arch dependencies
7 | in 'rules'.
8 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/compat:
--------------------------------------------------------------------------------
1 | 4
2 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/control:
--------------------------------------------------------------------------------
 1 | Source: google-glog
 2 | Priority: optional
 3 | Maintainer: Google Inc. <opensource@google.com>
 4 | Build-Depends: debhelper (>= 4.0.0), binutils
 5 | Standards-Version: 3.6.1
 6 | 
 7 | Package: libgoogle-glog-dev
 8 | Section: libdevel
 9 | Architecture: any
10 | Depends: libgoogle-glog0 (= ${Source-Version})
11 | Description:  a library that implements application-level logging.
12 |  This library provides logging APIs based on C++-style streams and
13 |  various helper macros.  The devel package contains static and debug
14 |  libraries and header files for developing applications that use the
15 |  google-glog package.
16 | 
17 | Package: libgoogle-glog0
18 | Section: libs
19 | Architecture: any
20 | Depends: ${shlibs:Depends}
21 | Description:  a library that implements application-level logging.
22 |  This library provides logging APIs based on C++-style streams and
23 |  various helper macros.
24 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/docs:
--------------------------------------------------------------------------------
1 | AUTHORS
2 | COPYING
3 | ChangeLog
4 | INSTALL
5 | NEWS
6 | README
7 | doc/designstyle.css
8 | doc/glog.html
9 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/libgoogle-glog-dev.dirs:
--------------------------------------------------------------------------------
1 | usr/lib
2 | usr/lib/pkgconfig
3 | usr/include
4 | usr/include/glog
5 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/libgoogle-glog-dev.install:
--------------------------------------------------------------------------------
 1 | usr/include/glog/*
 2 | usr/lib/lib*.so
 3 | usr/lib/lib*.a
 4 | usr/lib/*.la
 5 | usr/lib/pkgconfig/*
 6 | debian/tmp/usr/include/glog/*
 7 | debian/tmp/usr/lib/lib*.so
 8 | debian/tmp/usr/lib/lib*.a
 9 | debian/tmp/usr/lib/*.la
10 | debian/tmp/usr/lib/pkgconfig/*
11 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/libgoogle-glog0.dirs:
--------------------------------------------------------------------------------
1 | usr/lib
2 | 


--------------------------------------------------------------------------------
/third-party/google-glog/packages/deb/libgoogle-glog0.install:
--------------------------------------------------------------------------------
1 | usr/lib/lib*.so.*
2 | debian/tmp/usr/lib/lib*.so.*
3 | 


--------------------------------------------------------------------------------
/third-party/graph500-generator/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set(GENERATOR_SOURCES
 3 |   graph_generator.h
 4 |   graph_generator.c
 5 |   make_graph.h
 6 |   make_graph.c
 7 |   splittable_mrg.h
 8 |   splittable_mrg.c
 9 |   utils.h
10 |   utils.c
11 |   user_settings.h
12 |   mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h
13 | )
14 | 
15 | add_library(graph500-generator STATIC ${GENERATOR_SOURCES})
16 | set_target_properties(graph500-generator PROPERTIES
17 |   COMPILE_FLAGS "-Drestrict=__restrict__ -DGRAPH_GENERATOR_SEQ -ffast-math ${STATIC_FLAGS}"
18 |   FOLDER "Third Party"
19 | )
20 | 
21 | install(TARGETS graph500-generator DESTINATION "lib")
22 | 


--------------------------------------------------------------------------------
/third-party/graph500-generator/Makefile.mpi:
--------------------------------------------------------------------------------
 1 | CC = mpicc
 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg
 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g
 4 | LDFLAGS = -O3
 5 | # LDFLAGS = -g
 6 | MPICC = mpicc
 7 | 
 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
10 | 
11 | all: generator_test_mpi
12 | 
13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
14 | 	$(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm
15 | 
16 | clean:
17 | 	-rm -f generator_test_mpi
18 | 


--------------------------------------------------------------------------------
/third-party/graph500-generator/Makefile.omp:
--------------------------------------------------------------------------------
 1 | CC = gcc -fopenmp
 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg
 3 | LDFLAGS = -O3
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_omp
 9 | 
10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_omp
15 | 


--------------------------------------------------------------------------------
/third-party/graph500-generator/Makefile.seq:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ # -g -pg
 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__
 4 | LDFLAGS = -g # -g -pg
 5 | 
 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 8 | 
 9 | all: generator_test_seq
10 | 
11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
12 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm
13 | 
14 | clean:
15 | 	-rm -f generator_test_seq
16 | 


--------------------------------------------------------------------------------
/third-party/graph500-generator/Makefile.xmt:
--------------------------------------------------------------------------------
 1 | CC = cc
 2 | CFLAGS = -DNDEBUG 
 3 | LDFLAGS = $(CFLAGS) # -g -pg
 4 | 
 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c
 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c
 7 | 
 8 | all: generator_test_xmt
 9 | 
10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm
12 | 
13 | clean:
14 | 	-rm -f generator_test_xmt
15 | 


--------------------------------------------------------------------------------
/third-party/vampirtrace.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'fileutils'; include FileUtils
 3 | require 'optparse'; require 'ostruct'
 4 | 
 5 | def `(cmd)
 6 |   system cmd
 7 |   if not $?.success?
 8 |     warn "error! debugging..."
 9 |     require 'pry'; binding.pry
10 |   end
11 | end
12 | 
13 | opt = OpenStruct.new
14 | opt.prefix = '/opt/vampir'
15 | 
16 | OptionParser.new {|p|
17 |   p.on('--prefix=path'){|p| opt.prefix = p }
18 | }.parse!
19 | 
20 | `wget http://sampa.cs.washington.edu/grappa/VampirTrace-5.14.4.tar.gz`
21 | `tar xzf VampirTrace-5.14.4.tar.gz`
22 | 
23 | cd ("VampirTrace-5.14.4") do
24 |   `./configure --prefix=#{opt.prefix}`
25 |   `make -j4`
26 |   `make install`
27 | end
28 | 
29 | rmdir "VampirTrace-5.14.4"
30 | rm "VampirTrace-5.14.4.tar.gz"
31 | 


--------------------------------------------------------------------------------
/util/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # copy everything into build dir
 2 | # file(COPY . DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 3 | file(GLOB scripts "*")
 4 | foreach(file ${scripts})
 5 |   get_filename_component(name ${file} NAME)
 6 |   file(RELATIVE_PATH relative_file ${CMAKE_CURRENT_BINARY_DIR} ${file})
 7 |   execute_process(COMMAND
 8 |     ln -sf ${relative_file} ${CMAKE_CURRENT_BINARY_DIR}/${name}
 9 |   )
10 | endforeach()
11 | 
12 | 
13 | #
14 | # installation
15 | #
16 | 
17 | install(PROGRAMS env.sh DESTINATION "bin")
18 | 


--------------------------------------------------------------------------------
/util/common.sh:
--------------------------------------------------------------------------------
1 | #####################################################################
2 | # Common BASH helpers, including a mini flag-parsing library.
3 | #####################################################################
4 | 
5 | function has_srun {
6 |   type srun >/dev/null 2>&1
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/util/env.sh:
--------------------------------------------------------------------------------
 1 | ## set up Google logging defaults
 2 | export GLOG_logtostderr="1"
 3 | export GLOG_v="1"
 4 | 
 5 | ## set Google profiler sample rate
 6 | export CPUPROFILE_FREQUENCY="50"
 7 | 
 8 | ## set VampirTrace options
 9 | #export VT_VERBOSE="10"
10 | export VT_MAX_FLUSHES="0"
11 | export VT_PFORM_GDIR="."
12 | export VT_PFORM_LDIR="/scratch"
13 | export VT_FILE_UNIQUE="yes"
14 | export VT_MPITRACE="no"
15 | export VT_UNIFY="no"
16 | 
17 | ## set MVAPICH2 options to avoid keeping around malloced memory
18 | ## (and some performance tweaks which may be irrelevant)
19 | export MV2_USE_LAZY_MEM_UNREGISTER="0"
20 | export MV2_HOMOGENEOUS_CLUSTER="1"
21 | 
22 | export MV2_USE_RDMA_FAST_PATH="0"
23 | 
24 | export MV2_SRQ_MAX_SIZE="8192"
25 | #export MV2_USE_XRC="1" # doesn't seem to work with 1.9b on pal
26 | 
27 | #export MV2_USE_MCAST="1" # doesn't always work on pal
28 | 
29 | ## set MVAPICH2 options to avoid keeping around malloced memory
30 | export OMPI_MCA_mpi_leave_pinned="0"
31 | export OMPI_MCA_mpi_yield_when_idle="0"
32 | 
33 | # in case $USER isn't set
34 | USER=${USER-$(whoami)}
35 | 


--------------------------------------------------------------------------------
/util/histogram.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'sequel'
 3 | 
 4 | dbpath = ARGV[0]
 5 | user_glob = ARGV[1]
 6 | 
 7 | puts "database: #{dbpath}"
 8 | puts "user_glob: #{user_glob}"
 9 | 
10 | db = Sequel.sqlite(dbpath)
11 | table = :histograms
12 | histable = db[table]
13 | 
14 | db.create_table?(table){
15 |   primary_key :id
16 |   Integer     :jobid
17 |   Integer     :core
18 |   String      :stat
19 |   Integer     :value
20 |   index :jobid
21 |   index :stat
22 | }
23 | 
24 | Dir.glob(user_glob).each do |f|
25 |   m = f.match(/histogram\.(?<jobid>\d+)\/(?<stat>[\w_]+)\.(?<core>\d+)\.out/)
26 |   r = {jobid:m[:jobid].to_i,core:m[:core].to_i,stat:m[:stat],value:0}
27 |   puts "#{f} -- #{r}"
28 |   data = []
29 |   File.open(f,"r") do |f|
30 |     while b = f.read(8) do
31 |       v = b.unpack("q")[0]
32 |       data << r.merge({value:v})
33 |     end
34 |   end
35 |   histable.multi_insert(data)
36 | end
37 | 


--------------------------------------------------------------------------------