├── .gitattributes ├── .gitignore ├── .travis.yml ├── AUTHORS ├── BUILD.md ├── CMakeLists.txt ├── COPYING ├── NOTICE ├── README.md ├── applications ├── CMakeLists.txt ├── NPB │ ├── GRAPPA │ │ ├── IS │ │ │ ├── CMakeLists.txt │ │ │ ├── igor_grappa_intsort.rb │ │ │ ├── intsort.cpp │ │ │ ├── npb_intsort.hpp │ │ │ ├── npbparams.h │ │ │ ├── randlc.cpp │ │ │ └── randlc.hpp │ │ ├── common │ │ │ ├── c_print_results.c │ │ │ ├── c_timers.c │ │ │ ├── print_results.f │ │ │ ├── randdp.c │ │ │ ├── randdp.f │ │ │ ├── randdpvec.f │ │ │ ├── randi8.f │ │ │ ├── randi8_safe.f │ │ │ └── timers.f │ │ ├── config │ │ │ ├── NAS.samples │ │ │ │ ├── README │ │ │ │ ├── make.def.dec_alpha │ │ │ │ ├── make.def.ibm_aix64 │ │ │ │ ├── make.def.irix6.2 │ │ │ │ ├── make.def.origin │ │ │ │ ├── make.def.pgi_mpich │ │ │ │ ├── make.def.sgi_altix │ │ │ │ ├── make.def.sgi_powerchallenge │ │ │ │ ├── make.def.sp2_babbage │ │ │ │ ├── make.def.sun_ultra_sparc │ │ │ │ ├── make.def.t3d_cosmos │ │ │ │ ├── make.def_sun_mpich │ │ │ │ ├── suite.def.bt │ │ │ │ ├── suite.def.cg │ │ │ │ ├── suite.def.ep │ │ │ │ ├── suite.def.ft │ │ │ │ ├── suite.def.is │ │ │ │ ├── suite.def.lu │ │ │ │ ├── suite.def.mg │ │ │ │ ├── suite.def.small │ │ │ │ └── suite.def.sp │ │ │ ├── make.def │ │ │ ├── make.def.template │ │ │ ├── make.dummy │ │ │ ├── suite.def │ │ │ └── suite.def.template │ │ └── sys │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── make.common │ │ │ ├── print_header │ │ │ ├── print_instructions │ │ │ ├── setparams.c │ │ │ └── suite.awk │ ├── MPI │ │ ├── .gitignore │ │ ├── BT │ │ │ ├── Makefile │ │ │ ├── add.f │ │ │ ├── adi.f │ │ │ ├── bt.f │ │ │ ├── btio.f │ │ │ ├── btio_common.f │ │ │ ├── copy_faces.f │ │ │ ├── define.f │ │ │ ├── epio.f │ │ │ ├── error.f │ │ │ ├── exact_rhs.f │ │ │ ├── exact_solution.f │ │ │ ├── fortran_io.f │ │ │ ├── full_mpiio.f │ │ │ ├── header.h │ │ │ ├── initialize.f │ │ │ ├── inputbt.data.sample │ │ │ ├── make_set.f │ │ │ ├── mpinpb.h │ │ │ ├── rhs.f │ │ │ ├── set_constants.f │ │ │ ├── setup_mpi.f │ │ │ ├── simple_mpiio.f │ │ │ ├── solve_subs.f │ │ │ ├── verify.f │ │ │ ├── work_lhs.h │ │ │ ├── work_lhs_vec.h │ │ │ ├── x_solve.f │ │ │ ├── x_solve_vec.f │ │ │ ├── y_solve.f │ │ │ ├── y_solve_vec.f │ │ │ ├── z_solve.f │ │ │ └── z_solve_vec.f │ │ ├── CG │ │ │ ├── Makefile │ │ │ ├── cg.f │ │ │ ├── mpinpb.h │ │ │ ├── runexps.rb │ │ │ └── timing.h │ │ ├── DT │ │ │ ├── DGraph.c │ │ │ ├── DGraph.h │ │ │ ├── Makefile │ │ │ ├── README │ │ │ └── dt.c │ │ ├── EP │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── ep.f │ │ │ └── mpinpb.h │ │ ├── FT │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── ft.f │ │ │ ├── global.h │ │ │ ├── inputft.data.sample │ │ │ └── mpinpb.h │ │ ├── IS │ │ │ ├── Makefile │ │ │ ├── is.c │ │ │ └── npbparams.h │ │ ├── LU │ │ │ ├── Makefile │ │ │ ├── applu.incl │ │ │ ├── bcast_inputs.f │ │ │ ├── blts.f │ │ │ ├── blts_vec.f │ │ │ ├── buts.f │ │ │ ├── buts_vec.f │ │ │ ├── erhs.f │ │ │ ├── error.f │ │ │ ├── exact.f │ │ │ ├── exchange_1.f │ │ │ ├── exchange_3.f │ │ │ ├── exchange_4.f │ │ │ ├── exchange_5.f │ │ │ ├── exchange_6.f │ │ │ ├── init_comm.f │ │ │ ├── inputlu.data.sample │ │ │ ├── jacld.f │ │ │ ├── jacu.f │ │ │ ├── l2norm.f │ │ │ ├── lu.f │ │ │ ├── mpinpb.h │ │ │ ├── neighbors.f │ │ │ ├── nodedim.f │ │ │ ├── pintgr.f │ │ │ ├── proc_grid.f │ │ │ ├── read_input.f │ │ │ ├── rhs.f │ │ │ ├── setbv.f │ │ │ ├── setcoeff.f │ │ │ ├── sethyper.f │ │ │ ├── setiv.f │ │ │ ├── ssor.f │ │ │ ├── subdomain.f │ │ │ ├── timing.h │ │ │ └── verify.f │ │ ├── MG │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── globals.h │ │ │ ├── mg.f │ │ │ ├── mg.input.sample │ │ │ └── mpinpb.h │ │ ├── MPI_dummy │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── mpi.h │ │ │ ├── mpi_dummy.c │ │ │ ├── mpi_dummy.f │ │ │ ├── mpif.h │ │ │ ├── test.f │ │ │ ├── wtime.c │ │ │ ├── wtime.f │ │ │ ├── wtime.h │ │ │ └── wtime_sgi64.c │ │ ├── Makefile │ │ ├── README │ │ ├── README.install │ │ ├── SP │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── add.f │ │ │ ├── adi.f │ │ │ ├── copy_faces.f │ │ │ ├── define.f │ │ │ ├── error.f │ │ │ ├── exact_rhs.f │ │ │ ├── exact_solution.f │ │ │ ├── header.h │ │ │ ├── initialize.f │ │ │ ├── inputsp.data.sample │ │ │ ├── lhsx.f │ │ │ ├── lhsy.f │ │ │ ├── lhsz.f │ │ │ ├── make_set.f │ │ │ ├── mpinpb.h │ │ │ ├── ninvr.f │ │ │ ├── pinvr.f │ │ │ ├── rhs.f │ │ │ ├── set_constants.f │ │ │ ├── setup_mpi.f │ │ │ ├── sp.f │ │ │ ├── txinvr.f │ │ │ ├── tzetar.f │ │ │ ├── verify.f │ │ │ ├── x_solve.f │ │ │ ├── y_solve.f │ │ │ └── z_solve.f │ │ ├── common │ │ │ ├── c_print_results.c │ │ │ ├── c_timers.c │ │ │ ├── print_results.f │ │ │ ├── randdp.c │ │ │ ├── randdp.f │ │ │ ├── randdpvec.f │ │ │ ├── randi8.f │ │ │ ├── randi8_safe.f │ │ │ └── timers.f │ │ ├── config │ │ │ ├── NAS.samples │ │ │ │ ├── README │ │ │ │ ├── make.def.dec_alpha │ │ │ │ ├── make.def.ibm_aix64 │ │ │ │ ├── make.def.irix6.2 │ │ │ │ ├── make.def.origin │ │ │ │ ├── make.def.pgi_mpich │ │ │ │ ├── make.def.sgi_altix │ │ │ │ ├── make.def.sgi_powerchallenge │ │ │ │ ├── make.def.sp2_babbage │ │ │ │ ├── make.def.sun_ultra_sparc │ │ │ │ ├── make.def.t3d_cosmos │ │ │ │ ├── make.def_sun_mpich │ │ │ │ ├── suite.def.bt │ │ │ │ ├── suite.def.cg │ │ │ │ ├── suite.def.ep │ │ │ │ ├── suite.def.ft │ │ │ │ ├── suite.def.is │ │ │ │ ├── suite.def.lu │ │ │ │ ├── suite.def.mg │ │ │ │ ├── suite.def.small │ │ │ │ └── suite.def.sp │ │ │ ├── make.def │ │ │ ├── make.def.template │ │ │ ├── make.dummy │ │ │ ├── suite.def │ │ │ └── suite.def.template │ │ ├── igor_mpi_intsort.rb │ │ ├── sort.rb │ │ └── sys │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── make.common │ │ │ ├── print_header │ │ │ ├── print_instructions │ │ │ ├── setparams.c │ │ │ └── suite.awk │ ├── NPB3.3-HPF.README │ ├── NPB3.3-JAV.README │ ├── OMP │ │ ├── BT │ │ │ ├── Makefile │ │ │ ├── add.f │ │ │ ├── adi.f │ │ │ ├── bt.f │ │ │ ├── error.f │ │ │ ├── exact_rhs.f │ │ │ ├── exact_solution.f │ │ │ ├── header.h │ │ │ ├── initialize.f │ │ │ ├── inputbt.data.sample │ │ │ ├── rhs.f │ │ │ ├── set_constants.f │ │ │ ├── solve_subs.f │ │ │ ├── verify.f │ │ │ ├── work_lhs.h │ │ │ ├── work_lhs_vec.h │ │ │ ├── x_solve.f │ │ │ ├── x_solve_vec.f │ │ │ ├── y_solve.f │ │ │ ├── y_solve_vec.f │ │ │ ├── z_solve.f │ │ │ └── z_solve_vec.f │ │ ├── CG │ │ │ ├── Makefile │ │ │ ├── README.carefully │ │ │ ├── cg.f │ │ │ ├── globals.h │ │ │ └── runexps.rb │ │ ├── DC │ │ │ ├── ADC.par │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── adc.c │ │ │ ├── adc.h │ │ │ ├── adcc.h │ │ │ ├── dc.c │ │ │ ├── extbuild.c │ │ │ ├── jobcntl.c │ │ │ ├── macrodef.h │ │ │ ├── protots.h │ │ │ ├── rbt.c │ │ │ └── rbt.h │ │ ├── EP │ │ │ ├── Makefile │ │ │ ├── README │ │ │ └── ep.f │ │ ├── FT │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── ft.f │ │ │ ├── global.h │ │ │ └── inputft.data.sample │ │ ├── IS │ │ │ ├── Makefile │ │ │ ├── README.carefully │ │ │ └── is.c │ │ ├── LU │ │ │ ├── Makefile │ │ │ ├── applu.incl │ │ │ ├── blts.f │ │ │ ├── blts_vec.f │ │ │ ├── buts.f │ │ │ ├── buts_vec.f │ │ │ ├── domain.f │ │ │ ├── erhs.f │ │ │ ├── error.f │ │ │ ├── exact.f │ │ │ ├── inputlu.data.sample │ │ │ ├── jacld.f │ │ │ ├── jacu.f │ │ │ ├── l2norm.f │ │ │ ├── lu.f │ │ │ ├── pintgr.f │ │ │ ├── read_input.f │ │ │ ├── rhs.f │ │ │ ├── rhs_vec.f │ │ │ ├── setbv.f │ │ │ ├── setcoeff.f │ │ │ ├── setiv.f │ │ │ ├── ssor.f │ │ │ ├── ssor_vec.f │ │ │ ├── syncs.f │ │ │ └── verify.f │ │ ├── MG │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── globals.h │ │ │ ├── mg.f │ │ │ └── mg.input.sample │ │ ├── Makefile │ │ ├── README │ │ ├── README.install │ │ ├── SP │ │ │ ├── Makefile │ │ │ ├── add.f │ │ │ ├── adi.f │ │ │ ├── error.f │ │ │ ├── exact_rhs.f │ │ │ ├── exact_solution.f │ │ │ ├── header.h │ │ │ ├── initialize.f │ │ │ ├── inputsp.data.sample │ │ │ ├── ninvr.f │ │ │ ├── pinvr.f │ │ │ ├── rhs.f │ │ │ ├── set_constants.f │ │ │ ├── sp.f │ │ │ ├── txinvr.f │ │ │ ├── tzetar.f │ │ │ ├── verify.f │ │ │ ├── x_solve.f │ │ │ ├── y_solve.f │ │ │ └── z_solve.f │ │ ├── UA │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── adapt.f │ │ │ ├── convect.f │ │ │ ├── diffuse.f │ │ │ ├── header.h │ │ │ ├── mason.f │ │ │ ├── move.f │ │ │ ├── precond.f │ │ │ ├── setup.f │ │ │ ├── transfer.f │ │ │ ├── transfer_au.f │ │ │ ├── ua.f │ │ │ ├── utils.f │ │ │ └── verify.f │ │ ├── common │ │ │ ├── c_print_results.c │ │ │ ├── c_timers.c │ │ │ ├── print_results.f │ │ │ ├── randdp.f │ │ │ ├── randdpvec.f │ │ │ ├── randi8.f │ │ │ ├── randi8_safe.f │ │ │ ├── timers.f │ │ │ ├── wtime.c │ │ │ ├── wtime.h │ │ │ └── wtime_sgi64.c │ │ ├── config │ │ │ ├── NAS.samples │ │ │ │ ├── README │ │ │ │ ├── make.def.gcc_x86 │ │ │ │ ├── make.def_ia64 │ │ │ │ ├── make.def_ibm │ │ │ │ ├── make.def_ibm64 │ │ │ │ ├── make.def_intel │ │ │ │ ├── make.def_omni │ │ │ │ ├── make.def_pgi │ │ │ │ ├── make.def_sgi │ │ │ │ ├── make.def_sgi64 │ │ │ │ ├── make.def_sun │ │ │ │ ├── make.def_sun64 │ │ │ │ ├── suite.def.bt │ │ │ │ ├── suite.def.cg │ │ │ │ ├── suite.def.ep │ │ │ │ ├── suite.def.ft │ │ │ │ ├── suite.def.is │ │ │ │ ├── suite.def.lu │ │ │ │ ├── suite.def.mg │ │ │ │ └── suite.def.sp │ │ │ ├── make.def.template │ │ │ └── suite.def.template │ │ └── sys │ │ │ ├── Makefile │ │ │ ├── README │ │ │ ├── make.common │ │ │ ├── print_header │ │ │ ├── print_instructions │ │ │ ├── setparams.c │ │ │ └── suite.awk │ ├── README │ └── SERIAL │ │ ├── BT │ │ ├── Makefile │ │ ├── add.f │ │ ├── adi.f │ │ ├── bt.f │ │ ├── error.f │ │ ├── exact_rhs.f │ │ ├── exact_solution.f │ │ ├── header.h │ │ ├── initialize.f │ │ ├── inputbt.data.sample │ │ ├── rhs.f │ │ ├── set_constants.f │ │ ├── solve_subs.f │ │ ├── verify.f │ │ ├── work_lhs.h │ │ ├── work_lhs_vec.h │ │ ├── x_solve.f │ │ ├── x_solve_vec.f │ │ ├── y_solve.f │ │ ├── y_solve_vec.f │ │ ├── z_solve.f │ │ └── z_solve_vec.f │ │ ├── CG │ │ ├── Makefile │ │ ├── README.carefully │ │ ├── cg.f │ │ └── globals.h │ │ ├── DC │ │ ├── ADC.par │ │ ├── Makefile │ │ ├── README │ │ ├── adc.c │ │ ├── adc.h │ │ ├── adcc.h │ │ ├── dc.c │ │ ├── extbuild.c │ │ ├── jobcntl.c │ │ ├── macrodef.h │ │ ├── protots.h │ │ ├── rbt.c │ │ └── rbt.h │ │ ├── EP │ │ ├── Makefile │ │ ├── README │ │ └── ep.f │ │ ├── FT │ │ ├── Makefile │ │ ├── appft.f │ │ ├── auxfnct.f │ │ ├── fft3d.f │ │ ├── global.h │ │ ├── mainft.f │ │ └── verify.f │ │ ├── IS │ │ ├── Makefile │ │ ├── README.carefully │ │ └── is.c │ │ ├── LU │ │ ├── Makefile │ │ ├── applu.incl │ │ ├── blts.f │ │ ├── blts_vec.f │ │ ├── buts.f │ │ ├── buts_vec.f │ │ ├── domain.f │ │ ├── erhs.f │ │ ├── error.f │ │ ├── exact.f │ │ ├── inputlu.data.sample │ │ ├── jacld.f │ │ ├── jacu.f │ │ ├── l2norm.f │ │ ├── lu.f │ │ ├── pintgr.f │ │ ├── read_input.f │ │ ├── rhs.f │ │ ├── rhs_vec.f │ │ ├── setbv.f │ │ ├── setcoeff.f │ │ ├── setiv.f │ │ ├── ssor.f │ │ ├── ssor_vec.f │ │ └── verify.f │ │ ├── MG │ │ ├── Makefile │ │ ├── README │ │ ├── globals.h │ │ ├── mg.f │ │ └── mg.input.sample │ │ ├── Makefile │ │ ├── README │ │ ├── README.install │ │ ├── SP │ │ ├── Makefile │ │ ├── add.f │ │ ├── adi.f │ │ ├── error.f │ │ ├── exact_rhs.f │ │ ├── exact_solution.f │ │ ├── header.h │ │ ├── initialize.f │ │ ├── inputsp.data.sample │ │ ├── ninvr.f │ │ ├── pinvr.f │ │ ├── rhs.f │ │ ├── set_constants.f │ │ ├── sp.f │ │ ├── txinvr.f │ │ ├── tzetar.f │ │ ├── verify.f │ │ ├── x_solve.f │ │ ├── y_solve.f │ │ └── z_solve.f │ │ ├── UA │ │ ├── Makefile │ │ ├── README │ │ ├── adapt.f │ │ ├── convect.f │ │ ├── diffuse.f │ │ ├── header.h │ │ ├── mason.f │ │ ├── move.f │ │ ├── precond.f │ │ ├── setup.f │ │ ├── transfer.f │ │ ├── ua.f │ │ ├── utils.f │ │ └── verify.f │ │ ├── common │ │ ├── c_print_results.c │ │ ├── c_timers.c │ │ ├── print_results.f │ │ ├── randdp.f │ │ ├── randdpvec.f │ │ ├── randi8.f │ │ ├── randi8_safe.f │ │ ├── timers.f │ │ ├── wtime.c │ │ ├── wtime.h │ │ └── wtime_sgi64.c │ │ ├── config │ │ ├── NAS.samples │ │ │ ├── README │ │ │ ├── make.def_crayx1 │ │ │ ├── make.def_gcc_x86 │ │ │ ├── make.def_ia64 │ │ │ ├── make.def_ibm │ │ │ ├── make.def_ibm64 │ │ │ ├── make.def_intel │ │ │ ├── make.def_pgi │ │ │ ├── make.def_sgi │ │ │ ├── make.def_sgi64 │ │ │ ├── make.def_sun │ │ │ ├── make.def_sun64 │ │ │ ├── suite.def.bt │ │ │ ├── suite.def.cg │ │ │ ├── suite.def.ep │ │ │ ├── suite.def.ft │ │ │ ├── suite.def.is │ │ │ ├── suite.def.lu │ │ │ ├── suite.def.mg │ │ │ └── suite.def.sp │ │ ├── make.def.template │ │ └── suite.def.template │ │ └── sys │ │ ├── Makefile │ │ ├── README │ │ ├── make.common │ │ ├── print_header │ │ ├── print_instructions │ │ ├── setparams.c │ │ └── suite.awk ├── demos │ ├── CMakeLists.txt │ ├── gups │ │ ├── gups-lcg.cpp │ │ ├── gups.cpp │ │ ├── gups1.cpp │ │ ├── gups2.cpp │ │ ├── gups3.cpp │ │ └── gups4.cpp │ ├── hello_world │ │ └── hello_world.cpp │ ├── nqueens │ │ └── nqueens.cpp │ ├── standalone │ │ ├── Makefile │ │ └── standalone.cpp │ └── tree_search │ │ └── tree_search.cpp ├── graph500 │ ├── .gitignore │ ├── CMakeLists.txt │ ├── COPYING │ ├── Graph500.html │ ├── Graph500.org │ ├── Makefile │ ├── README │ ├── README-Grappa.md │ ├── compat.h │ ├── compatio.h │ ├── generator │ │ ├── CMakeLists.txt │ │ ├── LICENSE_1_0.txt │ │ ├── Makefile.grappa │ │ ├── Makefile.mpi │ │ ├── Makefile.omp │ │ ├── Makefile.seq │ │ ├── Makefile.xmt │ │ ├── README │ │ ├── generator_test_mpi.c │ │ ├── generator_test_omp.c │ │ ├── generator_test_seq.c │ │ ├── generator_test_xmt.c │ │ ├── graph_generator.c │ │ ├── graph_generator.h │ │ ├── graph_generator.xmt.c │ │ ├── make_graph.c │ │ ├── make_graph.h │ │ ├── make_graph.xmt.c │ │ ├── mod_arith.h │ │ ├── mod_arith_32bit.h │ │ ├── mod_arith_64bit.h │ │ ├── mod_arith_xmt.h │ │ ├── mrg_transitions.c │ │ ├── splittable_mrg.c │ │ ├── splittable_mrg.h │ │ ├── user_settings.h │ │ ├── utils.c │ │ └── utils.h │ ├── graph500.c │ ├── graph500.h │ ├── grappa │ │ ├── .gitignore │ │ ├── README │ │ ├── asciize.rb │ │ ├── beamer.rb │ │ ├── beamer2.rb │ │ ├── beamer_alg.md │ │ ├── bfs.rb │ │ ├── common.h │ │ ├── graph.cpp │ │ ├── graph.hpp │ │ ├── oned_csr.cpp │ │ ├── oned_csr.h │ │ ├── options.cpp │ │ ├── options.h │ │ ├── test.rb │ │ ├── timer.h │ │ ├── trace.rb │ │ └── vampir.rb │ ├── kronecker.c │ ├── kronecker.h │ ├── make-edgelist.c │ ├── make-incs │ │ ├── make.inc-gcc │ │ ├── make.inc-osx │ │ └── make.inc-xmt │ ├── mpi │ │ ├── Makefile │ │ ├── README │ │ ├── bfs_custom.c │ │ ├── bfs_one_sided.c │ │ ├── bfs_replicated.c │ │ ├── bfs_replicated_csc.c │ │ ├── bfs_simple.c │ │ ├── common.h │ │ ├── igor_mpi_bfs.rb │ │ ├── main.c │ │ ├── mpi_workarounds.h │ │ ├── oned_csc.c │ │ ├── oned_csc.h │ │ ├── oned_csr.c │ │ ├── oned_csr.h │ │ ├── onesided.c │ │ ├── onesided.h │ │ ├── onesided_emul.c │ │ ├── redistribute.h │ │ ├── utils.c │ │ └── validate.c │ ├── octave │ │ ├── Graph500.m │ │ ├── kernel_1.m │ │ ├── kernel_2.m │ │ ├── kronecker_generator.m │ │ ├── output.m │ │ └── validate.m │ ├── omp-csr │ │ └── omp-csr.c │ ├── options.c │ ├── options.h │ ├── prng.c │ ├── prng.h │ ├── rmat.c │ ├── rmat.h │ ├── run_exps.rb │ ├── seq-csr │ │ └── seq-csr.c │ ├── seq-list │ │ └── seq-list.c │ ├── timer.c │ ├── timer.h │ ├── verify.c │ ├── verify.h │ ├── xalloc.c │ ├── xalloc.h │ ├── xmt-csr-local │ │ └── xmt-csr-local.c │ └── xmt-csr │ │ ├── xmt-csr.c │ │ └── xmt-csr.xmt.c ├── graphlab │ ├── CMakeLists.txt │ ├── README.md │ ├── bfs.cpp │ ├── cc.cpp │ ├── graphlab.cpp │ ├── graphlab.hpp │ ├── graphlab_borrowed.hpp │ ├── graphlab_naive.hpp │ ├── graphlab_splitv.hpp │ ├── igor_graphlab_bfs.rb │ ├── igor_graphlab_pagerank.rb │ ├── igor_graphlab_sssp.rb │ ├── pagerank.cpp │ ├── pagerank_new.cpp │ ├── sssp.cpp │ └── test.cpp ├── isopath │ ├── CMakeLists.txt │ ├── Makefile │ ├── compat.h │ ├── compatio.h │ ├── generator │ │ ├── LICENSE_1_0.txt │ │ ├── Makefile.grappa │ │ ├── Makefile.mpi │ │ ├── Makefile.omp │ │ ├── Makefile.seq │ │ ├── Makefile.xmt │ │ ├── README │ │ ├── generator_test_mpi.c │ │ ├── generator_test_omp.c │ │ ├── generator_test_seq.c │ │ ├── generator_test_xmt.c │ │ ├── graph_generator.c │ │ ├── graph_generator.h │ │ ├── graph_generator.xmt.c │ │ ├── make_graph.c │ │ ├── make_graph.h │ │ ├── make_graph.xmt.c │ │ ├── mod_arith.h │ │ ├── mod_arith_32bit.h │ │ ├── mod_arith_64bit.h │ │ ├── mod_arith_xmt.h │ │ ├── mrg_transitions.c │ │ ├── splittable_mrg.c │ │ ├── splittable_mrg.h │ │ ├── user_settings.h │ │ ├── utils.c │ │ └── utils.h │ ├── grappa │ │ ├── .gitignore │ │ ├── CMakeLists.txt │ │ ├── Makefile │ │ ├── README │ │ ├── common.h │ │ ├── graph.cpp │ │ ├── graph.hpp │ │ ├── isopath.cpp │ │ ├── oned_csr.cpp │ │ ├── oned_csr.h │ │ ├── options.cpp │ │ ├── options.h │ │ ├── simple_graphs.cpp │ │ ├── simple_graphs.hpp │ │ └── timer.h │ ├── options.c │ ├── options.h │ ├── prng.c │ ├── prng.h │ ├── timer.c │ ├── timer.h │ ├── verify.c │ ├── verify.h │ ├── xalloc.c │ └── xalloc.h ├── join │ ├── .gitignore │ ├── Aggregates.hpp │ ├── CMakeLists.txt │ ├── DHT.hpp │ ├── DHT_old.hpp │ ├── DHT_symmetric.hpp │ ├── DoubleDHT.hpp │ ├── DoubleDHT_test.cpp │ ├── HashJoin.cpp │ ├── HashJoin.hpp │ ├── HashJoin_tests.cpp │ ├── HashSet.hpp │ ├── Hypercube.cpp │ ├── Hypercube.hpp │ ├── Hypercube_tests.cpp │ ├── KMeansMR.cpp │ ├── Local_graph_tests.cpp │ ├── MapReduce.cpp │ ├── MapReduce.hpp │ ├── MapReduce_tests.cpp │ ├── MatchesDHT.cpp │ ├── MatchesDHT.hpp │ ├── Query.cpp │ ├── Query.hpp │ ├── Relation_io_tests.cpp │ ├── Tuple.cpp │ ├── Tuple.hpp │ ├── convert2bin.cpp │ ├── double.txt │ ├── extract_timestamps.sh │ ├── hex_tri.soln.txt │ ├── hex_tri.txt │ ├── igor_grappa_baseline.rb │ ├── igor_grappa_sp2bench.rb │ ├── igor_grappa_squares_partition.rb │ ├── igor_grappa_triangles.rb │ ├── igor_grappa_twohop.rb │ ├── igor_interact_kmeans.rb │ ├── igor_interact_sp2bench.rb │ ├── igor_kmeans.rb │ ├── join.cpp │ ├── local_graph.cpp │ ├── local_graph.hpp │ ├── overlapping.txt │ ├── relation.hpp │ ├── relation_io.cpp │ ├── relation_io.hpp │ ├── scripts │ │ ├── activenodes.sh │ │ ├── forall.sh │ │ ├── getcolumn.sh │ │ ├── nodes_nested2names.sh │ │ └── pidlist.sh │ ├── single.txt │ ├── small_tri.soln.txt │ ├── small_tri.txt │ ├── sp2b.100mb.sh │ ├── sp2b.100t.sh │ ├── sp2b.1gb.sh │ ├── squares.cpp │ ├── squares.hpp │ ├── squares_bushy.cpp │ ├── squares_bushy.hpp │ ├── squares_partition.cpp │ ├── squares_partition.hpp │ ├── squares_partition_bushy.cpp │ ├── squares_partition_bushy.hpp │ ├── stats.cpp │ ├── stats.h │ ├── triangles.OldDHT.cpp │ ├── triangles.cpp │ ├── triangles.sql │ ├── triangles_partition.cpp │ ├── twohop.cpp │ ├── utility.cpp │ └── utility.hpp ├── nativegraph │ ├── CMakeLists.txt │ ├── README.md │ ├── bfs │ │ ├── CMakeLists.txt │ │ ├── bfs_beamer.cpp │ │ ├── bfs_queues.cpp │ │ ├── bfs_spmd.cpp │ │ ├── common.hpp │ │ ├── igor_bfs.rb │ │ └── main.cpp │ ├── cc │ │ ├── CMakeLists.txt │ │ ├── cc_kahan.hpp │ │ ├── igor_cc.rb │ │ └── main.cpp │ ├── sssp │ │ ├── CMakeLists.txt │ │ ├── sssp.cpp │ │ └── sssp.hpp │ └── verifier.hpp ├── pagerank │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── igor_grappa_pagerank.rb │ ├── mult_main.cpp │ ├── pagerank.R │ ├── pagerank.cpp │ ├── runexps.rb │ ├── runpr.rb │ ├── single.rb │ ├── spmv_mult.cpp │ └── spmv_mult.hpp ├── sort │ ├── CMakeLists.txt │ └── grappa │ │ ├── CMakeLists.txt │ │ ├── main.cpp │ │ ├── npb_intsort.h │ │ ├── sort.hpp │ │ ├── sort.rb │ │ ├── sort_test.cpp │ │ └── test.rb ├── util │ ├── CMakeLists.txt │ ├── convert │ │ └── convert.cpp │ └── otf2sqlite.cpp └── uts │ ├── .gitignore │ ├── AUTHORS │ ├── Changelog │ ├── LICENSE │ ├── Makefile.uts │ ├── README │ ├── README-Grappa.md │ ├── check_ctrk.pl │ ├── config │ ├── cray-mta │ ├── cray-x1 │ ├── cray-xmt │ ├── cray-xt │ ├── linux-c99 │ ├── linux-cluster │ ├── linux-workstation │ └── sgi-altix │ ├── configure.sh │ ├── ctrk.h │ ├── dequeue.c │ ├── dequeue.h │ ├── dlist.c │ ├── dlist.h │ ├── igor_grappa_uts.rb │ ├── mpi_worksharing.c │ ├── mpi_workstealing.c │ ├── rng │ ├── alfg.c │ ├── alfg.h │ ├── brg_endian.h │ ├── brg_sha1.c │ ├── brg_sha1.h │ ├── brg_types.h │ └── rng.h │ ├── runuts.rb │ ├── sample_trees.csh │ ├── sample_trees.sh │ ├── shared_dequeue.c │ ├── shared_dequeue.h │ ├── shared_dlist.c │ ├── shared_dlist.h │ ├── stats.c │ ├── time_poll.c │ ├── time_rng.c │ ├── upc_worksharing.c │ ├── uts.c │ ├── uts.h │ ├── uts_dfs.c │ ├── uts_dfs_review.c │ ├── uts_dm.c │ ├── uts_dm.h │ ├── uts_grappa.cpp │ ├── uts_shm.c │ └── uts_upc_enhanced.c ├── bin ├── CMakeLists.txt ├── distcc_make ├── distcc_ninja ├── grappa_run ├── grappa_srun ├── launch_distcc.sh ├── settings.sh ├── srun_epilog.sh └── srun_prolog.rb ├── configure ├── doc ├── CMakeLists.txt ├── debugging.md ├── running.md ├── testing.md └── tutorial │ ├── CMakeLists.txt │ ├── addressing_linear.cpp │ ├── addressing_symmetric.cpp │ ├── delegates.cpp │ ├── hello_world_1.cpp │ ├── hello_world_2.cpp │ ├── search1.cpp │ ├── search2.cpp │ ├── tree.hpp │ └── tutorial.md ├── scratch └── CMakeLists.txt ├── system ├── Addressing.hpp ├── Addressing_tests.cpp ├── Aggregator.cpp ├── Aggregator.hpp ├── Aggregator_tests.cpp ├── Allocator.cpp ├── Allocator.hpp ├── Allocator_tests.cpp ├── Array.hpp ├── Array_tests.cpp ├── AsyncDelegate.cpp ├── AsyncDelegate.hpp ├── Barrier.cpp ├── Barrier.hpp ├── BufferVector.hpp ├── BufferVector_tests.cpp ├── CMakeLists.txt ├── Cache.cpp ├── Cache.hpp ├── Cache_tests.cpp ├── CallbackMetric.cpp ├── CallbackMetric.hpp ├── CallbackMetricImpl.hpp ├── ChunkAllocator.cpp ├── ChunkAllocator.hpp ├── Collective.cpp ├── Collective.hpp ├── Collective_tests.cpp ├── Communicator.cpp ├── Communicator.hpp ├── CommunicatorImpl.hpp ├── Communicator_tests.cpp ├── CompletionEvent.hpp ├── CompletionEvent_tests.cpp ├── ConditionVariable.hpp ├── ConditionVariableLocal.hpp ├── ContextSwitchLatency_tests.cpp ├── ContextSwitchRate_bench.cpp ├── CountingSemaphoreLocal.hpp ├── Delegate.cpp ├── Delegate.hpp ├── DelegateBase.hpp ├── Delegate_tests.cpp ├── Doxyfile.in ├── ExternalCountPayloadMessage.hpp ├── FileIO.cpp ├── FileIO.hpp ├── FileIO_tests.cpp ├── FlatCombiner.cpp ├── FlatCombiner.hpp ├── FlatCombiner_tests.cpp ├── FullEmpty.hpp ├── FullEmptyLocal.hpp ├── FullEmpty_tests.cpp ├── GlobalAllocator.cpp ├── GlobalAllocator.hpp ├── GlobalAllocator_tests.cpp ├── GlobalBag.hpp ├── GlobalCompletionEvent.cpp ├── GlobalCompletionEvent.hpp ├── GlobalCounter.hpp ├── GlobalHashMap.cpp ├── GlobalHashMap.hpp ├── GlobalHashSet.cpp ├── GlobalHashSet.hpp ├── GlobalHash_tests.cpp ├── GlobalMemory.cpp ├── GlobalMemory.hpp ├── GlobalMemoryChunk.cpp ├── GlobalMemoryChunk.hpp ├── GlobalMemoryChunk_tests.cpp ├── GlobalMemory_tests.cpp ├── GlobalVector.cpp ├── GlobalVector.hpp ├── GlobalVector_tests.cpp ├── Grappa.cpp ├── Grappa.hpp ├── Grappa.md ├── Gups_tests.cpp ├── HistogramMetric.cpp ├── HistogramMetric.hpp ├── IncoherentAcquirer.cpp ├── IncoherentAcquirer.hpp ├── IncoherentReleaser.cpp ├── IncoherentReleaser.hpp ├── LocaleSharedMemory.cpp ├── LocaleSharedMemory.hpp ├── LocaleSharedMemory_tests.cpp ├── Makefile.tau ├── Malloc_tests.cpp ├── MaxMetric.cpp ├── MaxMetric.hpp ├── MaxMetricImpl.hpp ├── Message.hpp ├── MessageBase.cpp ├── MessageBase.hpp ├── MessageBaseImpl.hpp ├── MessagePool.cpp ├── MessagePool.hpp ├── Message_tests.cpp ├── MetricBase.hpp ├── Metrics.cpp ├── Metrics.hpp ├── MetricsTools.hpp ├── Metrics_tests.cpp ├── Mutex.hpp ├── Mutex_tests.cpp ├── NTBuffer.cpp ├── NTBuffer.hpp ├── NTBuffer_tests.cpp ├── NTMessage.cpp ├── NTMessage.hpp ├── NTMessage_aggregator_tests.cpp ├── NTMessage_tests.cpp ├── New_delegate_tests.cpp ├── New_loop_tests.cpp ├── ParallelLoop.cpp ├── ParallelLoop.hpp ├── PerformanceTools.cpp ├── PerformanceTools.hpp ├── PoolAllocator.hpp ├── PoolAllocator_tests.cpp ├── Public_tasks_tests.cpp ├── PushBuffer.hpp ├── RDMAAggregator.cpp ├── RDMAAggregator.hpp ├── RDMAAggregator_tests.cpp ├── RDMABuffer.hpp ├── RateMeasure_tests.cpp ├── Reducer.hpp ├── Reducer_tests.cpp ├── ReuseList.hpp ├── ReuseMessage.hpp ├── ReuseMessageList.hpp ├── ReusePool.hpp ├── Scheduler_benchmarking_tests.cpp ├── Semaphore.hpp ├── Semaphore_tests.cpp ├── SharedMessagePool.cpp ├── SharedMessagePool.hpp ├── SimpleMetric.cpp ├── SimpleMetric.hpp ├── SimpleMetricImpl.hpp ├── SmallLocalSet.hpp ├── StateTimer.cpp ├── StateTimer.hpp ├── Stealing_tests.cpp ├── StringMetric.cpp ├── StringMetric.hpp ├── StringMetricImpl.hpp ├── SummarizingMetric.cpp ├── SummarizingMetric.hpp ├── SummarizingMetricImpl.hpp ├── SuspendedDelegate.hpp ├── Synchronization.hpp ├── Tasking.hpp ├── Tasking_tests.cpp ├── ThreadQueue.cpp ├── ThreadQueue.hpp ├── ThreadQueue_tests.cpp ├── Timestamp.cpp ├── Timestamp.hpp ├── Worker.cpp ├── Worker.hpp ├── boost_helpers.hpp ├── cluster_tau.sh ├── common.hpp ├── doxygen_footer.html ├── function_traits.hpp ├── graph │ ├── Graph.cpp │ ├── Graph.hpp │ ├── Graph_tests.cpp │ ├── KroneckerGenerator.cpp │ ├── TupleGraph.cpp │ └── TupleGraph.hpp ├── grappa-valgrind.supp ├── grappa_gdb.macros ├── runcontextswitch.rb ├── runlatencyswitch.rb ├── stack.S ├── stack.h ├── tasks │ ├── BasicScheduler.cpp │ ├── BasicScheduler.hpp │ ├── DictOut.hpp │ ├── GlobalQueue.cpp │ ├── GlobalQueue.hpp │ ├── Scheduler.hpp │ ├── StealQueue.cpp │ ├── StealQueue.hpp │ ├── Task.cpp │ ├── Task.hpp │ ├── TaskingScheduler.cpp │ └── TaskingScheduler.hpp ├── tests │ ├── igor_context_switch.rb │ ├── igor_datastructs.rb │ ├── igor_hashmap.rb │ ├── igor_hashset.rb │ ├── igor_queue.rb │ ├── igor_stack.rb │ └── igor_tests.rb └── utils │ ├── README │ ├── obj_grep.rb │ └── uniq.rb ├── third-party ├── CMakeLists.txt ├── bashflags │ ├── README.md │ ├── flags.bash │ └── test │ │ ├── bool.bash │ │ ├── echo.bash │ │ └── test.bash ├── downloads │ └── README.md ├── google-glog │ ├── AUTHORS │ ├── COPYING │ ├── ChangeLog │ ├── INSTALL │ ├── Makefile.am │ ├── Makefile.in │ ├── NEWS │ ├── README │ ├── README.windows │ ├── aclocal.m4 │ ├── compile │ ├── config.guess │ ├── config.sub │ ├── configure │ ├── configure.ac │ ├── depcomp │ ├── doc │ │ ├── designstyle.css │ │ └── glog.html │ ├── google-glog.sln │ ├── install-sh │ ├── libglog.pc.in │ ├── ltmain.sh │ ├── m4 │ │ ├── ac_have_attribute.m4 │ │ ├── ac_have_builtin_expect.m4 │ │ ├── ac_have_sync_val_compare_and_swap.m4 │ │ ├── ac_rwlock.m4 │ │ ├── acx_pthread.m4 │ │ ├── google_namespace.m4 │ │ ├── libtool.m4 │ │ ├── ltoptions.m4 │ │ ├── ltsugar.m4 │ │ ├── ltversion.m4 │ │ ├── lt~obsolete.m4 │ │ ├── namespaces.m4 │ │ ├── pc_from_ucontext.m4 │ │ ├── stl_namespace.m4 │ │ └── using_operator.m4 │ ├── missing │ ├── mkinstalldirs │ ├── packages │ │ ├── deb.sh │ │ ├── deb │ │ │ ├── README │ │ │ ├── changelog │ │ │ ├── compat │ │ │ ├── control │ │ │ ├── copyright │ │ │ ├── docs │ │ │ ├── libgoogle-glog-dev.dirs │ │ │ ├── libgoogle-glog-dev.install │ │ │ ├── libgoogle-glog0.dirs │ │ │ ├── libgoogle-glog0.install │ │ │ └── rules │ │ ├── rpm.sh │ │ └── rpm │ │ │ └── rpm.spec │ ├── src │ │ ├── base │ │ │ ├── commandlineflags.h │ │ │ ├── googleinit.h │ │ │ └── mutex.h │ │ ├── config.h.in │ │ ├── config_for_unittests.h │ │ ├── demangle.cc │ │ ├── demangle.h │ │ ├── demangle_unittest.cc │ │ ├── demangle_unittest.sh │ │ ├── demangle_unittest.txt │ │ ├── glog │ │ │ ├── log_severity.h │ │ │ ├── logging.h.in │ │ │ ├── raw_logging.h.in │ │ │ ├── stl_logging.h.in │ │ │ └── vlog_is_on.h.in │ │ ├── googletest.h │ │ ├── logging.cc │ │ ├── logging_striplog_test.sh │ │ ├── logging_striptest10.cc │ │ ├── logging_striptest2.cc │ │ ├── logging_striptest_main.cc │ │ ├── logging_unittest.cc │ │ ├── logging_unittest.err │ │ ├── mock-log.h │ │ ├── mock-log_test.cc │ │ ├── raw_logging.cc │ │ ├── signalhandler.cc │ │ ├── signalhandler_unittest.cc │ │ ├── signalhandler_unittest.sh │ │ ├── stacktrace.h │ │ ├── stacktrace_generic-inl.h │ │ ├── stacktrace_libunwind-inl.h │ │ ├── stacktrace_powerpc-inl.h │ │ ├── stacktrace_unittest.cc │ │ ├── stacktrace_x86-inl.h │ │ ├── stacktrace_x86_64-inl.h │ │ ├── stl_logging_unittest.cc │ │ ├── symbolize.cc │ │ ├── symbolize.h │ │ ├── symbolize_unittest.cc │ │ ├── utilities.cc │ │ ├── utilities.h │ │ ├── utilities_unittest.cc │ │ ├── vlog_is_on.cc │ │ └── windows │ │ │ ├── config.h │ │ │ ├── glog │ │ │ ├── log_severity.h │ │ │ ├── logging.h │ │ │ ├── raw_logging.h │ │ │ ├── stl_logging.h │ │ │ └── vlog_is_on.h │ │ │ ├── port.cc │ │ │ ├── port.h │ │ │ └── preprocess.sh │ └── vsprojects │ │ ├── libglog │ │ └── libglog.vcproj │ │ ├── libglog_static │ │ └── libglog_static.vcproj │ │ ├── logging_unittest │ │ └── logging_unittest.vcproj │ │ └── logging_unittest_static │ │ └── logging_unittest_static.vcproj ├── graph500-generator │ ├── CMakeLists.txt │ ├── LICENSE_1_0.txt │ ├── Makefile.mpi │ ├── Makefile.omp │ ├── Makefile.seq │ ├── Makefile.xmt │ ├── README │ ├── generator_test_mpi.c │ ├── generator_test_omp.c │ ├── generator_test_seq.c │ ├── generator_test_xmt.c │ ├── graph_generator.c │ ├── graph_generator.h │ ├── make_graph.c │ ├── make_graph.h │ ├── mod_arith.h │ ├── mod_arith_32bit.h │ ├── mod_arith_64bit.h │ ├── mod_arith_xmt.h │ ├── mrg_transitions.c │ ├── splittable_mrg.c │ ├── splittable_mrg.h │ ├── user_settings.h │ ├── utils.c │ └── utils.h └── vampirtrace.rb └── util ├── CMakeLists.txt ├── common.sh ├── env.sh ├── experiment_utils.rb ├── grappa.cmake ├── grappa.mk ├── histogram.rb └── igor_common.rb /.gitattributes: -------------------------------------------------------------------------------- 1 | third-party/* linguist-vendored 2 | applications/NPB/* linguist-vendored 3 | applications/graph500/* linguist-vendored 4 | 5 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | These people have contributed to Grappa: 2 | * Jacob Nelson 3 | * Brandon Holt 4 | * Brandon Myers 5 | * Vincent Lee 6 | * Andrew Hunter 7 | * Simon Kahan 8 | * Luis Ceze 9 | * Mark Oskin 10 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This distribution contains Grappa, a system for scaling irregular 2 | applications on commodity clusters. 3 | 4 | This software was created with Government support under DE 5 | AC05-76RL01830 awarded by the United States Department of Energy. The 6 | Government has certain rights in the software. 7 | 8 | Most Grappa files are licensed under the New BSD License as described 9 | in the file COPYING. See individual files for details. 10 | 11 | As an exception, the files under the third-party/ and applications/ 12 | directories are licensed under their own separate licenses. Please see 13 | those files for their licensing terms. 14 | 15 | -------------------------------------------------------------------------------- /applications/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(nativegraph) 2 | add_subdirectory(graph500) 3 | add_subdirectory(sort) 4 | add_subdirectory(pagerank) 5 | add_subdirectory(demos) 6 | add_subdirectory(NPB/GRAPPA/IS) 7 | add_subdirectory(join) 8 | add_subdirectory(isopath) 9 | add_subdirectory(graphlab) 10 | add_subdirectory(util) 11 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/IS/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_grappa_application(intsort.exe 2 | intsort.cpp 3 | randlc.cpp 4 | randlc.hpp 5 | npb_intsort.hpp 6 | ) 7 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/IS/npb_intsort.hpp: -------------------------------------------------------------------------------- 1 | 2 | enum NPBClass { S, W, A, B, C, D, E }; 3 | static const int NKEY_LOG2[] = { 16, 20, 23, 25, 27, 29, 31 }; 4 | static const int MAX_KEY_LOG2[] = { 11, 16, 19, 21, 23, 27, 27 }; 5 | static const int NBUCKET_LOG2[] = { 10, 10, 10, 10, 10, 10, 10 }; 6 | 7 | inline NPBClass get_npb_class(char c) { 8 | switch (c) { 9 | case 'S': return NPBClass::S; 10 | case 'W': return NPBClass::W; 11 | case 'A': return NPBClass::A; 12 | case 'B': return NPBClass::B; 13 | case 'C': return NPBClass::C; 14 | case 'D': return NPBClass::D; 15 | case 'E': return NPBClass::E; 16 | default: return NPBClass::S; 17 | } 18 | } 19 | inline char npb_class_char(NPBClass c) { 20 | switch (c) { 21 | case NPBClass::S: return 'S'; 22 | case NPBClass::W: return 'W'; 23 | case NPBClass::A: return 'A'; 24 | case NPBClass::B: return 'B'; 25 | case NPBClass::C: return 'C'; 26 | case NPBClass::D: return 'D'; 27 | case NPBClass::E: return 'E'; 28 | default: return 'S'; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/IS/npbparams.h: -------------------------------------------------------------------------------- 1 | #define NUM_PROCS 32 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. */ 6 | 7 | #define COMPILETIME "11 Sep 2012" 8 | #define NPBVERSION "3.3" 9 | #define MPICC "cc" 10 | #define CFLAGS "-O3 -g" 11 | #define CLINK "$(MPICC)" 12 | #define CLINKFLAGS "-O3" 13 | #define CMPI_LIB "-L/usr/lib64/openmpi/lib -lmpi" 14 | #define CMPI_INC "-I/usr/include/openmpi-x86_64" 15 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/IS/randlc.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | double randlc( double *X, const double *A ); 4 | double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */ 5 | int np, /* np = num procs */ 6 | long nn, /* total num of ran numbers, all procs */ 7 | double s, /* Ran num seed, for ex.: 314159265.00 */ 8 | double a ); /* Ran num gen mult, try 1220703125.00 */ 9 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/README: -------------------------------------------------------------------------------- 1 | This directory contains examples of make.def files that were used 2 | by the NPB team in testing the benchmarks on different platforms. 3 | They can be used as starting points for make.def files for your 4 | own platform, but you may need to taylor them for best performance 5 | on your installation. A clean template can be found in directory 6 | `config'. 7 | Some examples of suite.def files are also provided. -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.dec_alpha: -------------------------------------------------------------------------------- 1 | #This is for a DEC Alpha 8400. The code will execute on a 2 | #single processor 3 | #Warning: parallel make does not work properly in general 4 | MPIF77 = f77 5 | FLINK = f77 6 | #Optimization -O5 breaks SP; works fine for all other codes 7 | FFLAGS = -O4 8 | 9 | MPICC = cc 10 | CLINK = cc 11 | CFLAGS = -O5 12 | 13 | include ../config/make.dummy 14 | 15 | CC = cc -g 16 | BINDIR = ../bin 17 | 18 | RAND = randi8 19 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.irix6.2: -------------------------------------------------------------------------------- 1 | #This is for a generic single-processor SGI workstation 2 | MPIF77 = f77 3 | FLINK = f77 4 | FFLAGS = -O3 5 | 6 | MPICC = cc 7 | CLINK = cc 8 | CFLAGS = -O3 9 | 10 | include ../config/make.dummy 11 | 12 | CC = cc -g 13 | BINDIR = ../bin 14 | 15 | RAND = randi8 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.origin: -------------------------------------------------------------------------------- 1 | # This is for a an SGI Origin 2000 or 3000 with vendor MPI. The Fortran 2 | # record length is specified, so it can be used for the I/O benchmark. 3 | # as well 4 | MPIF77 = f77 5 | FMPI_LIB = -lmpi 6 | FLINK = f77 -64 7 | FFLAGS = -O3 -64 8 | 9 | MPICC = cc 10 | CMPI_LIB = -lmpi 11 | CLINK = cc 12 | CFLAGS = -O3 13 | 14 | CC = cc -g 15 | BINDIR = ../bin 16 | 17 | RAND = randi8 18 | 19 | CONVERTFLAG = -DFORTRAN_REC_SIZE=4 20 | 21 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.sgi_powerchallenge: -------------------------------------------------------------------------------- 1 | # This is for the SGI PowerChallenge Array at NASA Ames. mrf77 and 2 | # mrcc are local scripts that invoke the proper MPI library. 3 | MPIF77 = mrf77 4 | FLINK = mrf77 5 | FFLAGS = -O3 -OPT:fold_arith_limit=1204 6 | 7 | MPICC = mrcc 8 | CLINK = mrcc 9 | CFLAGS = -O3 -OPT:fold_arith_limit=1204 10 | 11 | CC = cc -g 12 | BINDIR = ../bin 13 | 14 | RAND = randi8 15 | 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.sp2_babbage: -------------------------------------------------------------------------------- 1 | #This is for the IBM SP2 at Ames; mrf77 and mrcc are local scripts 2 | MPIF77 = mrf77 3 | FLINK = mrf77 4 | FFLAGS = -O3 5 | FLINKFLAGS = -bmaxdata:0x60000000 6 | 7 | MPICC = mrcc 8 | CLINK = mrcc 9 | CFLAGS = -O3 10 | CLINKFLAGS = -bmaxdata:0x60000000 11 | 12 | CC = cc -g 13 | 14 | BINDIR = ../bin 15 | 16 | RAND = randi8 17 | 18 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/make.def.t3d_cosmos: -------------------------------------------------------------------------------- 1 | #This is for the Cray T3D at the Jet Propulsion Laboratory 2 | MPIF77 = cf77 3 | FLINK = cf77 4 | FMPI_LIB = -L/usr/local/mpp/lib -lmpi 5 | FMPI_INC = -I/usr/local/mpp/lib/include/mpp 6 | FFLAGS = -dp -Wf-onoieeedivide -C cray-t3d 7 | #The following flags provide more effective optimization, but may 8 | #cause the random number generator randi8(_safe) to break in EP 9 | #FFLAGS = -dp -Wf-oaggress -Wf-onoieeedivide -C cray-t3d 10 | FLINKFLAGS = -Wl-Drdahead=on -C cray-t3d 11 | 12 | MPICC = cc 13 | CLINK = cc 14 | CMPI_LIB = -L/usr/local/mpp/lib -lmpi 15 | CMPI_INC = -I/usr/local/mpp/lib/include/mpp 16 | CFLAGS = -O3 -Tcray-t3d 17 | CLINKFLAGS = -Tcray-t3d 18 | 19 | CC = cc -g -Tcray-ymp 20 | BINDIR = ../bin 21 | 22 | CONVERTFLAG= -DCONVERTDOUBLE 23 | 24 | RAND = randi8 25 | 26 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.bt: -------------------------------------------------------------------------------- 1 | bt S 1 2 | bt S 4 3 | bt S 9 4 | bt S 16 5 | bt A 1 6 | bt A 4 7 | bt A 9 8 | bt A 16 9 | bt A 25 10 | bt A 36 11 | bt A 49 12 | bt A 64 13 | bt A 81 14 | bt A 100 15 | bt A 121 16 | bt B 1 17 | bt B 4 18 | bt B 9 19 | bt B 16 20 | bt B 25 21 | bt B 36 22 | bt B 49 23 | bt B 64 24 | bt B 81 25 | bt B 100 26 | bt B 121 27 | bt C 1 28 | bt C 4 29 | bt C 9 30 | bt C 16 31 | bt C 25 32 | bt C 36 33 | bt C 49 34 | bt C 64 35 | bt C 81 36 | bt C 100 37 | bt C 121 38 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.cg: -------------------------------------------------------------------------------- 1 | cg S 1 2 | cg S 2 3 | cg S 4 4 | cg S 8 5 | cg S 16 6 | cg A 1 7 | cg A 2 8 | cg A 4 9 | cg A 8 10 | cg A 16 11 | cg A 32 12 | cg A 64 13 | cg A 128 14 | cg B 1 15 | cg B 2 16 | cg B 4 17 | cg B 8 18 | cg B 16 19 | cg B 32 20 | cg B 64 21 | cg B 128 22 | cg C 1 23 | cg C 2 24 | cg C 4 25 | cg C 8 26 | cg C 16 27 | cg C 32 28 | cg C 64 29 | cg C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.ep: -------------------------------------------------------------------------------- 1 | ep S 1 2 | ep S 2 3 | ep S 4 4 | ep S 8 5 | ep S 16 6 | ep A 1 7 | ep A 2 8 | ep A 4 9 | ep A 8 10 | ep A 16 11 | ep A 32 12 | ep A 64 13 | ep A 128 14 | ep B 1 15 | ep B 2 16 | ep B 4 17 | ep B 8 18 | ep B 16 19 | ep B 32 20 | ep B 64 21 | ep B 128 22 | ep C 1 23 | ep C 2 24 | ep C 4 25 | ep C 8 26 | ep C 16 27 | ep C 32 28 | ep C 64 29 | ep C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.ft: -------------------------------------------------------------------------------- 1 | ft S 1 2 | ft S 2 3 | ft S 4 4 | ft S 8 5 | ft S 16 6 | ft A 1 7 | ft A 2 8 | ft A 4 9 | ft A 8 10 | ft A 16 11 | ft A 32 12 | ft A 64 13 | ft A 128 14 | ft B 1 15 | ft B 2 16 | ft B 4 17 | ft B 8 18 | ft B 16 19 | ft B 32 20 | ft B 64 21 | ft B 128 22 | ft C 1 23 | ft C 2 24 | ft C 4 25 | ft C 8 26 | ft C 16 27 | ft C 32 28 | ft C 64 29 | ft C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.is: -------------------------------------------------------------------------------- 1 | is S 1 2 | is S 2 3 | is S 4 4 | is S 8 5 | is S 16 6 | is A 1 7 | is A 2 8 | is A 4 9 | is A 8 10 | is A 16 11 | is A 32 12 | is A 64 13 | is A 128 14 | is B 1 15 | is B 2 16 | is B 4 17 | is B 8 18 | is B 16 19 | is B 32 20 | is B 64 21 | is B 128 22 | is C 1 23 | is C 2 24 | is C 4 25 | is C 8 26 | is C 16 27 | is C 32 28 | is C 64 29 | is C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.lu: -------------------------------------------------------------------------------- 1 | lu S 1 2 | lu S 2 3 | lu S 4 4 | lu S 8 5 | lu S 16 6 | lu A 1 7 | lu A 2 8 | lu A 4 9 | lu A 8 10 | lu A 16 11 | lu A 32 12 | lu A 64 13 | lu A 128 14 | lu B 1 15 | lu B 2 16 | lu B 4 17 | lu B 8 18 | lu B 16 19 | lu B 32 20 | lu B 64 21 | lu B 128 22 | lu C 1 23 | lu C 2 24 | lu C 4 25 | lu C 8 26 | lu C 16 27 | lu C 32 28 | lu C 64 29 | lu C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.mg: -------------------------------------------------------------------------------- 1 | mg S 1 2 | mg S 2 3 | mg S 4 4 | mg S 8 5 | mg S 16 6 | mg A 1 7 | mg A 2 8 | mg A 4 9 | mg A 8 10 | mg A 16 11 | mg A 32 12 | mg A 64 13 | mg A 128 14 | mg B 1 15 | mg B 2 16 | mg B 4 17 | mg B 8 18 | mg B 16 19 | mg B 32 20 | mg B 64 21 | mg B 128 22 | mg C 1 23 | mg C 2 24 | mg C 4 25 | mg C 8 26 | mg C 16 27 | mg C 32 28 | mg C 64 29 | mg C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.small: -------------------------------------------------------------------------------- 1 | bt S 1 2 | cg S 1 3 | ep S 1 4 | ft S 1 5 | is S 1 6 | lu S 1 7 | mg S 1 8 | sp S 1 9 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/NAS.samples/suite.def.sp: -------------------------------------------------------------------------------- 1 | sp S 1 2 | sp S 4 3 | sp S 9 4 | sp S 16 5 | sp A 1 6 | sp A 4 7 | sp A 9 8 | sp A 16 9 | sp A 25 10 | sp A 36 11 | sp A 49 12 | sp A 64 13 | sp A 81 14 | sp A 100 15 | sp A 121 16 | sp B 1 17 | sp B 4 18 | sp B 9 19 | sp B 16 20 | sp B 25 21 | sp B 36 22 | sp B 49 23 | sp B 64 24 | sp B 81 25 | sp B 100 26 | sp B 121 27 | sp C 1 28 | sp C 4 29 | sp C 9 30 | sp C 16 31 | sp C 25 32 | sp C 36 33 | sp C 49 34 | sp C 64 35 | sp C 81 36 | sp C 100 37 | sp C 121 38 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/make.dummy: -------------------------------------------------------------------------------- 1 | FMPI_LIB = -L../MPI_dummy -lmpi 2 | FMPI_INC = -I../MPI_dummy 3 | CMPI_LIB = -L../MPI_dummy -lmpi 4 | CMPI_INC = -I../MPI_dummy 5 | default:: ${PROGRAM} libmpi.a 6 | libmpi.a: 7 | cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC) 8 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/suite.def: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name, class, and number 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 7 | # "lu", and "dt". 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E" 9 | # (except that no classes C, D and E for DT, and no class E for IS). 10 | # The number of nodes must be a legal number for a particular 11 | # benchmark. The utility which parses this file is primitive, so 12 | # formatting is inflexible. Separate name/class/number by tabs. 13 | # Comments start with "#" as the first character on a line. 14 | # No blank lines. 15 | # The following example builds 1 processor sample sizes of all benchmarks. 16 | is S 2 17 | 18 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/config/suite.def.template: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name, class, and number 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 7 | # "lu", and "dt". 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E" 9 | # (except that no classes C, D and E for DT, and no class E for IS). 10 | # The number of nodes must be a legal number for a particular 11 | # benchmark. The utility which parses this file is primitive, so 12 | # formatting is inflexible. Separate name/class/number by tabs. 13 | # Comments start with "#" as the first character on a line. 14 | # No blank lines. 15 | # The following example builds 1 processor sample sizes of all benchmarks. 16 | ft S 1 17 | mg S 1 18 | sp S 1 19 | lu S 1 20 | bt S 1 21 | is S 1 22 | ep S 1 23 | cg S 1 24 | dt S 1 25 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/sys/.gitignore: -------------------------------------------------------------------------------- 1 | setparams 2 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/sys/Makefile: -------------------------------------------------------------------------------- 1 | include ../config/make.def 2 | 3 | # Note that COMPILE is also defined in make.common and should 4 | # be the same. We can't include make.common because it has a lot 5 | # of other garbage. LINK is not defined in make.common because 6 | # ${MPI_LIB} needs to go at the end of the line. 7 | FCOMPILE = $(MPIF77) -c $(FMPI_INC) $(FFLAGS) 8 | 9 | all: setparams 10 | 11 | # setparams creates an npbparam.h file for each benchmark 12 | # configuration. npbparams.h also contains info about how a benchmark 13 | # was compiled and linked 14 | 15 | setparams: setparams.c ../config/make.def 16 | $(CC) ${CONVERTFLAG} -o setparams setparams.c 17 | 18 | 19 | clean: 20 | -rm -f setparams setparams.h npbparams.h 21 | -rm -f *~ *.o 22 | 23 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/sys/print_header: -------------------------------------------------------------------------------- 1 | echo ' =========================================' 2 | echo ' = NAS Parallel Benchmarks 3.3 =' 3 | echo ' = MPI/F77/C =' 4 | echo ' =========================================' 5 | echo '' 6 | -------------------------------------------------------------------------------- /applications/NPB/GRAPPA/sys/suite.awk: -------------------------------------------------------------------------------- 1 | BEGIN { SMAKE = "make" } { 2 | if ($1 !~ /^#/ && NF > 2) { 3 | printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE; 4 | printf "%s CLASS=%s NPROCS=%s", SMAKE, $2, $3; 5 | if ( NF > 3 ) { 6 | if ( $4 ~ /^vec/ || $4 ~ /^VEC/ ) { 7 | printf " VERSION=%s", $4; 8 | if ( NF > 4 ) { 9 | printf " SUBTYPE=%s", $5; 10 | } 11 | } else { 12 | printf " SUBTYPE=%s", $4; 13 | if ( NF > 4 ) { 14 | printf " VERSION=%s", $5; 15 | } 16 | } 17 | } 18 | printf "; cd ..\n"; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /applications/NPB/MPI/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/add.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine add 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c addition of update to the vector u 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | integer c, i, j, k, m 16 | 17 | do c = 1, ncells 18 | do k = start(3,c), cell_size(3,c)-end(3,c)-1 19 | do j = start(2,c), cell_size(2,c)-end(2,c)-1 20 | do i = start(1,c), cell_size(1,c)-end(1,c)-1 21 | do m = 1, 5 22 | u(m,i,j,k,c) = u(m,i,j,k,c) + rhs(m,i,j,k,c) 23 | enddo 24 | enddo 25 | enddo 26 | enddo 27 | enddo 28 | 29 | return 30 | end 31 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/adi.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine adi 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | call copy_faces 10 | 11 | call x_solve 12 | 13 | call y_solve 14 | 15 | call z_solve 16 | 17 | call add 18 | 19 | return 20 | end 21 | 22 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/btio_common.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine clear_timestep 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | include 'header.h' 10 | include 'mpinpb.h' 11 | 12 | integer cio, kio, jio, ix 13 | 14 | do cio=1,ncells 15 | do kio=0, cell_size(3,cio)-1 16 | do jio=0, cell_size(2,cio)-1 17 | do ix=0,cell_size(1,cio)-1 18 | u(1,ix, jio,kio,cio) = 0 19 | u(2,ix, jio,kio,cio) = 0 20 | u(3,ix, jio,kio,cio) = 0 21 | u(4,ix, jio,kio,cio) = 0 22 | u(5,ix, jio,kio,cio) = 0 23 | enddo 24 | enddo 25 | enddo 26 | enddo 27 | 28 | return 29 | end 30 | 31 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/exact_solution.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine exact_solution(xi,eta,zeta,dtemp) 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c this function returns the exact solution at point xi, eta, zeta 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | double precision xi, eta, zeta, dtemp(5) 16 | integer m 17 | 18 | do m = 1, 5 19 | dtemp(m) = ce(m,1) + 20 | > xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + 21 | > eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ 22 | > zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 23 | > zeta*ce(m,13)))) 24 | enddo 25 | 26 | return 27 | end 28 | 29 | 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/inputbt.data.sample: -------------------------------------------------------------------------------- 1 | 200 number of time steps 2 | 0.0008d0 dt for class A = 0.0008d0. class B = 0.0003d0 class C = 0.0001d0 3 | 64 64 64 4 | 5 0 write interval (optional read interval) for BTIO 5 | 0 1000000 number of nodes in collective buffering and buffer size for BTIO 6 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer node, no_nodes, total_nodes, root, comm_setup, 8 | > comm_solve, comm_rhs, dp_type 9 | logical active 10 | common /mpistuff/ node, no_nodes, total_nodes, root, comm_setup, 11 | > comm_solve, comm_rhs, dp_type, active 12 | 13 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/work_lhs.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c work_lhs.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | double precision fjac(5, 5, -2:MAX_CELL_DIM+1), 10 | > njac(5, 5, -2:MAX_CELL_DIM+1), 11 | > lhsa(5, 5, -1:MAX_CELL_DIM), 12 | > lhsb(5, 5, -1:MAX_CELL_DIM), 13 | > tmp1, tmp2, tmp3 14 | common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3 15 | -------------------------------------------------------------------------------- /applications/NPB/MPI/BT/work_lhs_vec.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c work_lhs_vec.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | double precision fjac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1), 10 | > njac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1), 11 | > lhsa(5, 5, -1:MAX_CELL_DIM, -1:MAX_CELL_DIM), 12 | > lhsb(5, 5, -1:MAX_CELL_DIM, -1:MAX_CELL_DIM), 13 | > tmp1, tmp2, tmp3 14 | common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3 15 | -------------------------------------------------------------------------------- /applications/NPB/MPI/CG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=cg 3 | BENCHMARKU=CG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = cg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} 14 | 15 | cg.o: cg.f mpinpb.h npbparams.h timing.h 16 | ${FCOMPILE} cg.f 17 | 18 | 19 | MPITYPE?=SRUN 20 | NNODE?=2 21 | PPN?=1 22 | SRUN_PARTITION?=grappa 23 | SRUN_HOST?=--partition $(SRUN_PARTITION) 24 | SRUN_NPROC=--nodes=$(NNODE) --ntasks-per-node=$(PPN) 25 | SRUN_HOST=--partition $(SRUN_PARTITION) 26 | SRUN_RUN=srun --resv-ports --cpu_bind=verbose,rank --exclusive --label --kill-on-bad-exit $(SRUN_FLAGS) $(SRUN_HOST) $(SRUN_NPROC) 27 | 28 | run: $(TARGET) 29 | $(SRUN_RUN) ./$< 30 | 31 | clean: 32 | - rm -f *.o *~ 33 | - rm -f npbparams.h core 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /applications/NPB/MPI/CG/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer me, nprocs, root, dp_type 8 | common /mpistuff/ me, nprocs, root, dp_type 9 | 10 | -------------------------------------------------------------------------------- /applications/NPB/MPI/CG/runexps.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require '../../../../experiment_utils' 3 | 4 | 5 | db = "cg.db" 6 | table = :mpi 7 | 8 | 9 | cmd = "make run TARGET=../bin/cg.%{problem}.%{nproc} PPN=%{ppn} NNODE=%{nnode}" 10 | 11 | params = { 12 | trial: [1,2,3], 13 | problem: ['D','B','C','A'], 14 | nproc: [64], 15 | nnode: [8], 16 | ppn: expr('nproc / nnode') 17 | } 18 | 19 | parser = lambda{ |cmdout| 20 | records = {} 21 | 22 | cgreg = /(?[a-zA-Z\s\/]+)\s+=\s+(?.+)/ 23 | cmdout.scan(cgreg).each { |k,v| 24 | k = k.gsub(/\s+/,"_").gsub(/\//,"_per_") 25 | if v.match(/\d+\.\d+/) then 26 | v = v.to_f 27 | elsif v.match(/\d+/) then 28 | v = v.to_i 29 | end 30 | records[k.to_sym] = v 31 | } 32 | if records.length == 0 then 33 | raise "no records found" 34 | end 35 | 36 | records 37 | } 38 | 39 | run_experiments(cmd, params, db, table, &parser) 40 | -------------------------------------------------------------------------------- /applications/NPB/MPI/CG/timing.h: -------------------------------------------------------------------------------- 1 | integer t_total, t_conjg, t_rcomm, t_ncomm, t_last 2 | parameter (t_total=1, t_conjg=2, t_rcomm=3, t_ncomm=4, t_last=4) 3 | 4 | logical timeron 5 | common /timers/ timeron 6 | -------------------------------------------------------------------------------- /applications/NPB/MPI/DT/DGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef _DGRAPH 2 | #define _DGRAPH 3 | 4 | #define BLOCK_SIZE 128 5 | #define SMALL_BLOCK_SIZE 32 6 | 7 | typedef struct{ 8 | int id; 9 | void *tail,*head; 10 | int length,width,attribute,maxWidth; 11 | }DGArc; 12 | 13 | typedef struct{ 14 | int maxInDegree,maxOutDegree; 15 | int inDegree,outDegree; 16 | int id; 17 | char *name; 18 | DGArc **inArc,**outArc; 19 | int depth,height,width; 20 | int color,attribute,address,verified; 21 | void *feat; 22 | }DGNode; 23 | 24 | typedef struct{ 25 | int maxNodes,maxArcs; 26 | int id; 27 | char *name; 28 | int numNodes,numArcs; 29 | DGNode **node; 30 | DGArc **arc; 31 | } DGraph; 32 | 33 | DGArc *newArc(DGNode *tl,DGNode *hd); 34 | void arcShow(DGArc *ar); 35 | DGNode *newNode(char *nm); 36 | void nodeShow(DGNode* nd); 37 | 38 | DGraph* newDGraph(char *nm); 39 | int AttachNode(DGraph *dg,DGNode *nd); 40 | int AttachArc(DGraph *dg,DGArc* nar); 41 | void graphShow(DGraph *dg,int DetailsLevel); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /applications/NPB/MPI/DT/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=dt 3 | BENCHMARKU=DT 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | #Override PROGRAM 9 | DTPROGRAM = $(BINDIR)/$(BENCHMARK).$(CLASS).x 10 | 11 | OBJS = dt.o DGraph.o \ 12 | ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o 13 | 14 | 15 | ${PROGRAM}: config ${OBJS} 16 | ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB} 17 | 18 | .c.o: 19 | ${CCOMPILE} $< 20 | 21 | dt.o: dt.c npbparams.h 22 | DGraph.o: DGraph.c DGraph.h 23 | 24 | clean: 25 | - rm -f *.o *~ mputil* 26 | - rm -f dt npbparams.h core 27 | -------------------------------------------------------------------------------- /applications/NPB/MPI/DT/README: -------------------------------------------------------------------------------- 1 | Data Traffic benchmark DT is new in the NPB suite 2 | (released as part of NPB3.x-MPI package). 3 | ---------------------------------------------------- 4 | 5 | DT is written in C and same executable can run on any number of processors, 6 | provided this number is not less than the number of nodes in the communication 7 | graph. DT benchmark takes one argument: BH, WH, or SH. This argument 8 | specifies the communication graph Black Hole, White Hole, or SHuffle 9 | respectively. The current release contains verification numbers for 10 | CLASSES S, W, A, and B only. Classes C and D are defined, but verification 11 | numbers are not provided in this release. 12 | 13 | The following table summarizes the number of nodes in the communication 14 | graph based on CLASS and graph TYPE. 15 | 16 | CLASS N_Source N_Nodes(BH,WH) N_Nodes(SH) 17 | S 4 5 12 18 | W 8 11 32 19 | A 16 21 80 20 | B 32 43 192 21 | C 64 85 448 22 | D 128 171 1024 23 | -------------------------------------------------------------------------------- /applications/NPB/MPI/EP/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ep 3 | BENCHMARKU=EP 4 | 5 | include ../config/make.def 6 | 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o ${COMMON}/timers.o 8 | 9 | include ../sys/make.common 10 | 11 | ${PROGRAM}: config ${OBJS} 12 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} 13 | 14 | 15 | ep.o: ep.f mpinpb.h npbparams.h 16 | ${FCOMPILE} ep.f 17 | 18 | clean: 19 | - rm -f *.o *~ 20 | - rm -f npbparams.h core 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /applications/NPB/MPI/EP/README: -------------------------------------------------------------------------------- 1 | This code implements the random-number generator described in the 2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007. 3 | The code is "embarrassingly" parallel in that no communication is 4 | required for the generation of the random numbers itself. There is 5 | no special requirement on the number of processors used for running 6 | the benchmark. 7 | -------------------------------------------------------------------------------- /applications/NPB/MPI/EP/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer me, nprocs, root, dp_type 8 | common /mpistuff/ me, nprocs, root, dp_type 9 | 10 | -------------------------------------------------------------------------------- /applications/NPB/MPI/FT/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ft 3 | BENCHMARKU=FT 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = ft.o ${COMMON}/${RAND}.o ${COMMON}/print_results.o ${COMMON}/timers.o 10 | 11 | ${PROGRAM}: config ${OBJS} 12 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} 13 | 14 | 15 | 16 | .f.o: 17 | ${FCOMPILE} $< 18 | 19 | ft.o: ft.f global.h mpinpb.h npbparams.h 20 | 21 | clean: 22 | - rm -f *.o *~ mputil* 23 | - rm -f ft npbparams.h core 24 | -------------------------------------------------------------------------------- /applications/NPB/MPI/FT/README: -------------------------------------------------------------------------------- 1 | This code implements the time integration of a three-dimensional 2 | partial differential equation using the Fast Fourier Transform. 3 | Some of the dimension statements are not F77 conforming and will 4 | not work using the g77 compiler. All dimension statements, 5 | however, are legal F90. -------------------------------------------------------------------------------- /applications/NPB/MPI/FT/inputft.data.sample: -------------------------------------------------------------------------------- 1 | 6 ! number of iterations 2 | 2 ! layout type. 0 = 0d, 1 = 1d, 2 = 2d 3 | 2 4 ! processor layout. 0d must be "1 1"; 1d must be "1 N" 4 | -------------------------------------------------------------------------------- /applications/NPB/MPI/FT/mpinpb.h: -------------------------------------------------------------------------------- 1 | include 'mpif.h' 2 | c mpi data types 3 | integer dc_type 4 | common /mpistuff/ dc_type 5 | -------------------------------------------------------------------------------- /applications/NPB/MPI/IS/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=is 3 | BENCHMARKU=IS 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = is.o ${COMMON}/c_print_results.o ${COMMON}/c_timers.o 10 | 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB} 14 | 15 | .c.o: 16 | ${CCOMPILE} $< 17 | 18 | is.o: is.c npbparams.h 19 | 20 | 21 | clean: 22 | - rm -f *.o *~ mputil* 23 | - rm -f is npbparams.h core 24 | -------------------------------------------------------------------------------- /applications/NPB/MPI/IS/npbparams.h: -------------------------------------------------------------------------------- 1 | #define CLASS 'A' 2 | #define NUM_PROCS 32 3 | /* 4 | This file is generated automatically by the setparams utility. 5 | It sets the number of processors and the class of the NPB 6 | in this directory. Do not modify it by hand. */ 7 | 8 | #define COMPILETIME "11 Sep 2012" 9 | #define NPBVERSION "3.3" 10 | #define MPICC "cc" 11 | #define CFLAGS "-O3 -g" 12 | #define CLINK "$(MPICC)" 13 | #define CLINKFLAGS "-O3" 14 | #define CMPI_LIB "-L/usr/lib64/openmpi/lib -lmpi" 15 | #define CMPI_INC "-I/usr/include/openmpi-x86_64" 16 | -------------------------------------------------------------------------------- /applications/NPB/MPI/LU/inputlu.data.sample: -------------------------------------------------------------------------------- 1 | c 2 | c***controls printing of the progress of iterations: ipr inorm 3 | 1 250 4 | c 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax 6 | 250 7 | c 8 | c***magnitude of the time step: dt 9 | 2.0e+00 10 | c 11 | c***relaxation factor for SSOR iterations: omega 12 | 1.2 13 | c 14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5 15 | 1.0e-08 1.0e-08 1.0e-08 1.0e-08 1.0e-08 16 | c 17 | c***number of grid points in xi and eta and zeta directions: nx ny nz 18 | 64 64 64 19 | c 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/MPI/LU/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer node, no_nodes, root, comm_setup, 8 | > comm_solve, comm_rhs, dp_type 9 | common /mpistuff/ node, no_nodes, root, comm_setup, 10 | > comm_solve, comm_rhs, dp_type 11 | 12 | -------------------------------------------------------------------------------- /applications/NPB/MPI/LU/timing.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | 3 | integer t_total, t_rhs, t_blts, t_buts, t_jacld, t_jacu, 4 | > t_exch, t_lcomm, t_ucomm, t_rcomm, t_last 5 | parameter (t_total=1, t_rhs=2, t_blts=3, t_buts=4, t_jacld=5, 6 | > t_jacu=6, t_exch=7, t_lcomm=8, t_ucomm=9, t_rcomm=10, 7 | > t_last=10) 8 | 9 | double precision maxtime 10 | logical timeron 11 | common/timer/maxtime, timeron 12 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=mg 3 | BENCHMARKU=MG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = mg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} 14 | 15 | mg.o: mg.f globals.h mpinpb.h npbparams.h 16 | ${FCOMPILE} mg.f 17 | 18 | clean: 19 | - rm -f *.o *~ 20 | - rm -f npbparams.h core 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MG/mg.input.sample: -------------------------------------------------------------------------------- 1 | 8 = top level 2 | 256 256 256 = nx ny nz 3 | 20 = nit 4 | 0 0 0 0 0 0 0 0 = debug_vec 5 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MG/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer me, nprocs, root, dp_type 8 | common /mpistuff/ me, nprocs, root, dp_type 9 | 10 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for MPI dummy library. 2 | # Must be edited for a specific machine. Does NOT read in 3 | # the make.def file of NPB 2.3 4 | F77 = f77 5 | CC = cc 6 | AR = ar 7 | 8 | # Enable if either Cray or IBM: (no such flag for most machines: see wtime.h) 9 | # MACHINE = -DCRAY 10 | # MACHINE = -DIBM 11 | 12 | libmpi.a: mpi_dummy.o mpi_dummy_c.o wtime.o 13 | $(AR) r libmpi.a mpi_dummy.o mpi_dummy_c.o wtime.o 14 | 15 | mpi_dummy.o: mpi_dummy.f mpif.h 16 | $(F77) -c mpi_dummy.f 17 | # For a Cray C90, try: 18 | # cf77 -dp -c mpi_dummy.f 19 | # For an IBM 590, try: 20 | # xlf -c mpi_dummy.f 21 | 22 | mpi_dummy_c.o: mpi_dummy.c mpi.h 23 | $(CC) -c ${MACHINE} -o mpi_dummy_c.o mpi_dummy.c 24 | 25 | wtime.o: wtime.c 26 | # For most machines or CRAY or IBM 27 | $(CC) -c ${MACHINE} wtime.c 28 | # For a precise timer on an SGI Power Challenge, try: 29 | # $(CC) -o wtime.o -c wtime_sgi64.c 30 | 31 | test: test.f 32 | $(F77) -o test -I. test.f -L. -lmpi 33 | 34 | 35 | 36 | clean: 37 | - rm -f *~ *.o 38 | - rm -f test libmpi.a 39 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/mpif.h: -------------------------------------------------------------------------------- 1 | integer mpi_comm_world 2 | parameter (mpi_comm_world = 0) 3 | 4 | integer mpi_max, mpi_min, mpi_sum 5 | parameter (mpi_max = 1, mpi_sum = 2, mpi_min = 3) 6 | 7 | integer mpi_byte, mpi_integer, mpi_real, mpi_logical, 8 | > mpi_double_precision, mpi_complex, 9 | > mpi_double_complex 10 | parameter (mpi_double_precision = 1, 11 | $ mpi_integer = 2, 12 | $ mpi_byte = 3, 13 | $ mpi_real= 4, 14 | $ mpi_logical = 5, 15 | $ mpi_complex = 6, 16 | $ mpi_double_complex = 7) 17 | 18 | integer mpi_any_source 19 | parameter (mpi_any_source = -1) 20 | 21 | integer mpi_err_other 22 | parameter (mpi_err_other = -1) 23 | 24 | double precision mpi_wtime 25 | external mpi_wtime 26 | 27 | integer mpi_status_size 28 | parameter (mpi_status_size=3) 29 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/test.f: -------------------------------------------------------------------------------- 1 | program 2 | implicit none 3 | double precision t, mpi_wtime 4 | external mpi_wtime 5 | t = 0.0 6 | t = mpi_wtime() 7 | print *, t 8 | t = mpi_wtime() 9 | print *, t 10 | end 11 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/wtime.c: -------------------------------------------------------------------------------- 1 | #include "wtime.h" 2 | #include 3 | 4 | void wtime(double *t) 5 | { 6 | static int sec = -1; 7 | struct timeval tv; 8 | gettimeofday(&tv, (void *)0); 9 | if (sec < 0) sec = tv.tv_sec; 10 | *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; 11 | } 12 | 13 | 14 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/wtime.f: -------------------------------------------------------------------------------- 1 | subroutine wtime(tim) 2 | real*8 tim 3 | dimension tarray(2) 4 | call etime(tarray) 5 | tim = tarray(1) 6 | return 7 | end 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /applications/NPB/MPI/MPI_dummy/wtime.h: -------------------------------------------------------------------------------- 1 | /* C/Fortran interface is different on different machines. 2 | * You may need to tweak this. 3 | */ 4 | 5 | 6 | #if defined(IBM) 7 | #define wtime wtime 8 | #elif defined(CRAY) 9 | #define wtime WTIME 10 | #else 11 | #define wtime wtime_ 12 | #endif 13 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/README: -------------------------------------------------------------------------------- 1 | 2 | This code implements a 3D Multi-partition algorithm for the solution 3 | of the uncoupled systems of linear equations resulting from 4 | Beam-Warming approximate factorization. Consequently, the program 5 | must be run on a square number of processors. The included file 6 | "npbparams.h" contains a parameter statement which sets "maxcells" 7 | and "problem_size". The parameter maxcells must be set to the 8 | square root of the number of processors. For example, if running 9 | on 25 processors, then set max_cells=5. The standard problem sizes 10 | are problem_size=64 for class A, 102 for class B, and 162 for class C. 11 | 12 | The number of time steps and the time step size dt are set in the 13 | npbparams.h but may be overridden in the input deck "inputsp.data". 14 | The number of time steps is 400 for all three 15 | standard problems, and the appropriate time step sizes "dt" are 16 | 0.0015d0 for class A, 0.001d0 for class B, and 0.00067 for class C. 17 | 18 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/add.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine add 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | c--------------------------------------------------------------------- 11 | c addition of update to the vector u 12 | c--------------------------------------------------------------------- 13 | 14 | include 'header.h' 15 | 16 | integer c, i, j, k, m 17 | 18 | do c = 1, ncells 19 | do m = 1, 5 20 | do k = start(3,c), cell_size(3,c)-end(3,c)-1 21 | do j = start(2,c), cell_size(2,c)-end(2,c)-1 22 | do i = start(1,c), cell_size(1,c)-end(1,c)-1 23 | u(i,j,k,m,c) = u(i,j,k,m,c) + rhs(i,j,k,m,c) 24 | end do 25 | end do 26 | end do 27 | end do 28 | end do 29 | 30 | return 31 | end 32 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/adi.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine adi 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | call copy_faces 11 | 12 | call txinvr 13 | 14 | call x_solve 15 | 16 | call y_solve 17 | 18 | call z_solve 19 | 20 | call add 21 | 22 | return 23 | end 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/exact_solution.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine exact_solution(xi,eta,zeta,dtemp) 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | c--------------------------------------------------------------------- 11 | c this function returns the exact solution at point xi, eta, zeta 12 | c--------------------------------------------------------------------- 13 | 14 | include 'header.h' 15 | 16 | double precision xi, eta, zeta, dtemp(5) 17 | integer m 18 | 19 | do m = 1, 5 20 | dtemp(m) = ce(m,1) + 21 | > xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + 22 | > eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ 23 | > zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 24 | > zeta*ce(m,13)))) 25 | end do 26 | 27 | return 28 | end 29 | 30 | 31 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/inputsp.data.sample: -------------------------------------------------------------------------------- 1 | 400 number of time steps 2 | 0.0015d0 dt for class A = 0.0015d0. class B = 0.001d0 class C = 0.00067d0 3 | 64 64 64 4 | -------------------------------------------------------------------------------- /applications/NPB/MPI/SP/mpinpb.h: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | include 'mpif.h' 6 | 7 | integer node, no_nodes, total_nodes, root, comm_setup, 8 | > comm_solve, comm_rhs, dp_type 9 | logical active 10 | common /mpistuff/ node, no_nodes, total_nodes, root, comm_setup, 11 | > comm_solve, comm_rhs, dp_type, active 12 | integer DEFAULT_TAG 13 | parameter (DEFAULT_TAG = 0) 14 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/README: -------------------------------------------------------------------------------- 1 | This directory contains examples of make.def files that were used 2 | by the NPB team in testing the benchmarks on different platforms. 3 | They can be used as starting points for make.def files for your 4 | own platform, but you may need to taylor them for best performance 5 | on your installation. A clean template can be found in directory 6 | `config'. 7 | Some examples of suite.def files are also provided. -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.dec_alpha: -------------------------------------------------------------------------------- 1 | #This is for a DEC Alpha 8400. The code will execute on a 2 | #single processor 3 | #Warning: parallel make does not work properly in general 4 | MPIF77 = f77 5 | FLINK = f77 6 | #Optimization -O5 breaks SP; works fine for all other codes 7 | FFLAGS = -O4 8 | 9 | MPICC = cc 10 | CLINK = cc 11 | CFLAGS = -O5 12 | 13 | include ../config/make.dummy 14 | 15 | CC = cc -g 16 | BINDIR = ../bin 17 | 18 | RAND = randi8 19 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.irix6.2: -------------------------------------------------------------------------------- 1 | #This is for a generic single-processor SGI workstation 2 | MPIF77 = f77 3 | FLINK = f77 4 | FFLAGS = -O3 5 | 6 | MPICC = cc 7 | CLINK = cc 8 | CFLAGS = -O3 9 | 10 | include ../config/make.dummy 11 | 12 | CC = cc -g 13 | BINDIR = ../bin 14 | 15 | RAND = randi8 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.origin: -------------------------------------------------------------------------------- 1 | # This is for a an SGI Origin 2000 or 3000 with vendor MPI. The Fortran 2 | # record length is specified, so it can be used for the I/O benchmark. 3 | # as well 4 | MPIF77 = f77 5 | FMPI_LIB = -lmpi 6 | FLINK = f77 -64 7 | FFLAGS = -O3 -64 8 | 9 | MPICC = cc 10 | CMPI_LIB = -lmpi 11 | CLINK = cc 12 | CFLAGS = -O3 13 | 14 | CC = cc -g 15 | BINDIR = ../bin 16 | 17 | RAND = randi8 18 | 19 | CONVERTFLAG = -DFORTRAN_REC_SIZE=4 20 | 21 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.sgi_powerchallenge: -------------------------------------------------------------------------------- 1 | # This is for the SGI PowerChallenge Array at NASA Ames. mrf77 and 2 | # mrcc are local scripts that invoke the proper MPI library. 3 | MPIF77 = mrf77 4 | FLINK = mrf77 5 | FFLAGS = -O3 -OPT:fold_arith_limit=1204 6 | 7 | MPICC = mrcc 8 | CLINK = mrcc 9 | CFLAGS = -O3 -OPT:fold_arith_limit=1204 10 | 11 | CC = cc -g 12 | BINDIR = ../bin 13 | 14 | RAND = randi8 15 | 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.sp2_babbage: -------------------------------------------------------------------------------- 1 | #This is for the IBM SP2 at Ames; mrf77 and mrcc are local scripts 2 | MPIF77 = mrf77 3 | FLINK = mrf77 4 | FFLAGS = -O3 5 | FLINKFLAGS = -bmaxdata:0x60000000 6 | 7 | MPICC = mrcc 8 | CLINK = mrcc 9 | CFLAGS = -O3 10 | CLINKFLAGS = -bmaxdata:0x60000000 11 | 12 | CC = cc -g 13 | 14 | BINDIR = ../bin 15 | 16 | RAND = randi8 17 | 18 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/make.def.t3d_cosmos: -------------------------------------------------------------------------------- 1 | #This is for the Cray T3D at the Jet Propulsion Laboratory 2 | MPIF77 = cf77 3 | FLINK = cf77 4 | FMPI_LIB = -L/usr/local/mpp/lib -lmpi 5 | FMPI_INC = -I/usr/local/mpp/lib/include/mpp 6 | FFLAGS = -dp -Wf-onoieeedivide -C cray-t3d 7 | #The following flags provide more effective optimization, but may 8 | #cause the random number generator randi8(_safe) to break in EP 9 | #FFLAGS = -dp -Wf-oaggress -Wf-onoieeedivide -C cray-t3d 10 | FLINKFLAGS = -Wl-Drdahead=on -C cray-t3d 11 | 12 | MPICC = cc 13 | CLINK = cc 14 | CMPI_LIB = -L/usr/local/mpp/lib -lmpi 15 | CMPI_INC = -I/usr/local/mpp/lib/include/mpp 16 | CFLAGS = -O3 -Tcray-t3d 17 | CLINKFLAGS = -Tcray-t3d 18 | 19 | CC = cc -g -Tcray-ymp 20 | BINDIR = ../bin 21 | 22 | CONVERTFLAG= -DCONVERTDOUBLE 23 | 24 | RAND = randi8 25 | 26 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.bt: -------------------------------------------------------------------------------- 1 | bt S 1 2 | bt S 4 3 | bt S 9 4 | bt S 16 5 | bt A 1 6 | bt A 4 7 | bt A 9 8 | bt A 16 9 | bt A 25 10 | bt A 36 11 | bt A 49 12 | bt A 64 13 | bt A 81 14 | bt A 100 15 | bt A 121 16 | bt B 1 17 | bt B 4 18 | bt B 9 19 | bt B 16 20 | bt B 25 21 | bt B 36 22 | bt B 49 23 | bt B 64 24 | bt B 81 25 | bt B 100 26 | bt B 121 27 | bt C 1 28 | bt C 4 29 | bt C 9 30 | bt C 16 31 | bt C 25 32 | bt C 36 33 | bt C 49 34 | bt C 64 35 | bt C 81 36 | bt C 100 37 | bt C 121 38 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.cg: -------------------------------------------------------------------------------- 1 | cg S 1 2 | cg S 2 3 | cg S 4 4 | cg S 8 5 | cg S 16 6 | cg A 1 7 | cg A 2 8 | cg A 4 9 | cg A 8 10 | cg A 16 11 | cg A 32 12 | cg A 64 13 | cg A 128 14 | cg B 1 15 | cg B 2 16 | cg B 4 17 | cg B 8 18 | cg B 16 19 | cg B 32 20 | cg B 64 21 | cg B 128 22 | cg C 1 23 | cg C 2 24 | cg C 4 25 | cg C 8 26 | cg C 16 27 | cg C 32 28 | cg C 64 29 | cg C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.ep: -------------------------------------------------------------------------------- 1 | ep S 1 2 | ep S 2 3 | ep S 4 4 | ep S 8 5 | ep S 16 6 | ep A 1 7 | ep A 2 8 | ep A 4 9 | ep A 8 10 | ep A 16 11 | ep A 32 12 | ep A 64 13 | ep A 128 14 | ep B 1 15 | ep B 2 16 | ep B 4 17 | ep B 8 18 | ep B 16 19 | ep B 32 20 | ep B 64 21 | ep B 128 22 | ep C 1 23 | ep C 2 24 | ep C 4 25 | ep C 8 26 | ep C 16 27 | ep C 32 28 | ep C 64 29 | ep C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.ft: -------------------------------------------------------------------------------- 1 | ft S 1 2 | ft S 2 3 | ft S 4 4 | ft S 8 5 | ft S 16 6 | ft A 1 7 | ft A 2 8 | ft A 4 9 | ft A 8 10 | ft A 16 11 | ft A 32 12 | ft A 64 13 | ft A 128 14 | ft B 1 15 | ft B 2 16 | ft B 4 17 | ft B 8 18 | ft B 16 19 | ft B 32 20 | ft B 64 21 | ft B 128 22 | ft C 1 23 | ft C 2 24 | ft C 4 25 | ft C 8 26 | ft C 16 27 | ft C 32 28 | ft C 64 29 | ft C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.is: -------------------------------------------------------------------------------- 1 | is S 1 2 | is S 2 3 | is S 4 4 | is S 8 5 | is S 16 6 | is A 1 7 | is A 2 8 | is A 4 9 | is A 8 10 | is A 16 11 | is A 32 12 | is A 64 13 | is A 128 14 | is B 1 15 | is B 2 16 | is B 4 17 | is B 8 18 | is B 16 19 | is B 32 20 | is B 64 21 | is B 128 22 | is C 1 23 | is C 2 24 | is C 4 25 | is C 8 26 | is C 16 27 | is C 32 28 | is C 64 29 | is C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.lu: -------------------------------------------------------------------------------- 1 | lu S 1 2 | lu S 2 3 | lu S 4 4 | lu S 8 5 | lu S 16 6 | lu A 1 7 | lu A 2 8 | lu A 4 9 | lu A 8 10 | lu A 16 11 | lu A 32 12 | lu A 64 13 | lu A 128 14 | lu B 1 15 | lu B 2 16 | lu B 4 17 | lu B 8 18 | lu B 16 19 | lu B 32 20 | lu B 64 21 | lu B 128 22 | lu C 1 23 | lu C 2 24 | lu C 4 25 | lu C 8 26 | lu C 16 27 | lu C 32 28 | lu C 64 29 | lu C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.mg: -------------------------------------------------------------------------------- 1 | mg S 1 2 | mg S 2 3 | mg S 4 4 | mg S 8 5 | mg S 16 6 | mg A 1 7 | mg A 2 8 | mg A 4 9 | mg A 8 10 | mg A 16 11 | mg A 32 12 | mg A 64 13 | mg A 128 14 | mg B 1 15 | mg B 2 16 | mg B 4 17 | mg B 8 18 | mg B 16 19 | mg B 32 20 | mg B 64 21 | mg B 128 22 | mg C 1 23 | mg C 2 24 | mg C 4 25 | mg C 8 26 | mg C 16 27 | mg C 32 28 | mg C 64 29 | mg C 128 30 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.small: -------------------------------------------------------------------------------- 1 | bt S 1 2 | cg S 1 3 | ep S 1 4 | ft S 1 5 | is S 1 6 | lu S 1 7 | mg S 1 8 | sp S 1 9 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/NAS.samples/suite.def.sp: -------------------------------------------------------------------------------- 1 | sp S 1 2 | sp S 4 3 | sp S 9 4 | sp S 16 5 | sp A 1 6 | sp A 4 7 | sp A 9 8 | sp A 16 9 | sp A 25 10 | sp A 36 11 | sp A 49 12 | sp A 64 13 | sp A 81 14 | sp A 100 15 | sp A 121 16 | sp B 1 17 | sp B 4 18 | sp B 9 19 | sp B 16 20 | sp B 25 21 | sp B 36 22 | sp B 49 23 | sp B 64 24 | sp B 81 25 | sp B 100 26 | sp B 121 27 | sp C 1 28 | sp C 4 29 | sp C 9 30 | sp C 16 31 | sp C 25 32 | sp C 36 33 | sp C 49 34 | sp C 64 35 | sp C 81 36 | sp C 100 37 | sp C 121 38 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/make.dummy: -------------------------------------------------------------------------------- 1 | FMPI_LIB = -L../MPI_dummy -lmpi 2 | FMPI_INC = -I../MPI_dummy 3 | CMPI_LIB = -L../MPI_dummy -lmpi 4 | CMPI_INC = -I../MPI_dummy 5 | default:: ${PROGRAM} libmpi.a 6 | libmpi.a: 7 | cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC) 8 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/suite.def: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name, class, and number 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 7 | # "lu", and "dt". 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E" 9 | # (except that no classes C, D and E for DT, and no class E for IS). 10 | # The number of nodes must be a legal number for a particular 11 | # benchmark. The utility which parses this file is primitive, so 12 | # formatting is inflexible. Separate name/class/number by tabs. 13 | # Comments start with "#" as the first character on a line. 14 | # No blank lines. 15 | # The following example builds 1 processor sample sizes of all benchmarks. 16 | is S 1 17 | is S 2 18 | is S 4 19 | is S 8 20 | is S 16 21 | is S 32 22 | is S 48 23 | is S 32 24 | is S 32 25 | is S 32 26 | 27 | -------------------------------------------------------------------------------- /applications/NPB/MPI/config/suite.def.template: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name, class, and number 6 | # of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", 7 | # "lu", and "dt". 8 | # The class is one of "S", "W", "A", "B", "C", "D", and "E" 9 | # (except that no classes C, D and E for DT, and no class E for IS). 10 | # The number of nodes must be a legal number for a particular 11 | # benchmark. The utility which parses this file is primitive, so 12 | # formatting is inflexible. Separate name/class/number by tabs. 13 | # Comments start with "#" as the first character on a line. 14 | # No blank lines. 15 | # The following example builds 1 processor sample sizes of all benchmarks. 16 | ft S 1 17 | mg S 1 18 | sp S 1 19 | lu S 1 20 | bt S 1 21 | is S 1 22 | ep S 1 23 | cg S 1 24 | dt S 1 25 | -------------------------------------------------------------------------------- /applications/NPB/MPI/sys/.gitignore: -------------------------------------------------------------------------------- 1 | setparams 2 | -------------------------------------------------------------------------------- /applications/NPB/MPI/sys/Makefile: -------------------------------------------------------------------------------- 1 | include ../config/make.def 2 | 3 | # Note that COMPILE is also defined in make.common and should 4 | # be the same. We can't include make.common because it has a lot 5 | # of other garbage. LINK is not defined in make.common because 6 | # ${MPI_LIB} needs to go at the end of the line. 7 | FCOMPILE = $(MPIF77) -c $(FMPI_INC) $(FFLAGS) 8 | 9 | all: setparams 10 | 11 | # setparams creates an npbparam.h file for each benchmark 12 | # configuration. npbparams.h also contains info about how a benchmark 13 | # was compiled and linked 14 | 15 | setparams: setparams.c ../config/make.def 16 | $(CC) ${CONVERTFLAG} -o setparams setparams.c 17 | 18 | 19 | clean: 20 | -rm -f setparams setparams.h npbparams.h 21 | -rm -f *~ *.o 22 | 23 | -------------------------------------------------------------------------------- /applications/NPB/MPI/sys/print_header: -------------------------------------------------------------------------------- 1 | echo ' =========================================' 2 | echo ' = NAS Parallel Benchmarks 3.3 =' 3 | echo ' = MPI/F77/C =' 4 | echo ' =========================================' 5 | echo '' 6 | -------------------------------------------------------------------------------- /applications/NPB/MPI/sys/suite.awk: -------------------------------------------------------------------------------- 1 | BEGIN { SMAKE = "make" } { 2 | if ($1 !~ /^#/ && NF > 2) { 3 | printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE; 4 | printf "%s CLASS=%s NPROCS=%s", SMAKE, $2, $3; 5 | if ( NF > 3 ) { 6 | if ( $4 ~ /^vec/ || $4 ~ /^VEC/ ) { 7 | printf " VERSION=%s", $4; 8 | if ( NF > 4 ) { 9 | printf " SUBTYPE=%s", $5; 10 | } 11 | } else { 12 | printf " SUBTYPE=%s", $4; 13 | if ( NF > 4 ) { 14 | printf " VERSION=%s", $5; 15 | } 16 | } 17 | } 18 | printf "; cd ..\n"; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /applications/NPB/NPB3.3-HPF.README: -------------------------------------------------------------------------------- 1 | The HPF version of NPB is not included in this distribution. 2 | Please download it from NPB3.0 instead. 3 | 4 | http://www.nas.nasa.gov/Software/NPB 5 | -------------------------------------------------------------------------------- /applications/NPB/NPB3.3-JAV.README: -------------------------------------------------------------------------------- 1 | The Java version of NPB is not included in this distribution. 2 | Please download it from NPB3.0 instead. 3 | 4 | http://www.nas.nasa.gov/Software/NPB 5 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/add.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine add 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c addition of update to the vector u 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | integer i, j, k, m 16 | 17 | if (timeron) call timer_start(t_add) 18 | !$omp parallel do default(shared) private(i,j,k,m) 19 | do k = 1, grid_points(3)-2 20 | do j = 1, grid_points(2)-2 21 | do i = 1, grid_points(1)-2 22 | do m = 1, 5 23 | u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k) 24 | enddo 25 | enddo 26 | enddo 27 | enddo 28 | if (timeron) call timer_stop(t_add) 29 | 30 | return 31 | end 32 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/adi.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine adi 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | call compute_rhs 10 | 11 | call x_solve 12 | 13 | call y_solve 14 | 15 | call z_solve 16 | 17 | call add 18 | 19 | return 20 | end 21 | 22 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/exact_solution.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine exact_solution(xi,eta,zeta,dtemp) 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c this function returns the exact solution at point xi, eta, zeta 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | double precision xi, eta, zeta, dtemp(5) 16 | integer m 17 | 18 | do m = 1, 5 19 | dtemp(m) = ce(m,1) + 20 | > xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + 21 | > eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ 22 | > zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 23 | > zeta*ce(m,13)))) 24 | enddo 25 | 26 | return 27 | end 28 | 29 | 30 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/inputbt.data.sample: -------------------------------------------------------------------------------- 1 | 60 number of time steps 2 | 0.01d0 dt for class A = 0.0008d0. class B = 0.0003d0 class C = 0.0001d0 3 | 12 12 12 4 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/work_lhs.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c work_lhs.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | c 9 | double precision fjac(5, 5, 0:problem_size), 10 | > njac(5, 5, 0:problem_size), 11 | > lhs (5, 5, 3, 0:problem_size), 12 | > tmp1, tmp2, tmp3 13 | common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3 14 | !$omp threadprivate (/work_lhs/) 15 | -------------------------------------------------------------------------------- /applications/NPB/OMP/BT/work_lhs_vec.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c work_lhs_vec.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | c 9 | double precision fjac(5, 5, 0:problem_size, 0:problem_size), 10 | > njac(5, 5, 0:problem_size, 0:problem_size), 11 | > lhs (5, 5, 3, 0:problem_size, 0:problem_size), 12 | > tmp1, tmp2, tmp3 13 | common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3 14 | !$omp threadprivate (/work_lhs/) 15 | -------------------------------------------------------------------------------- /applications/NPB/OMP/CG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=cg 3 | BENCHMARKU=CG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = cg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | cg.o: cg.f globals.h npbparams.h 16 | ${FCOMPILE} cg.f 17 | 18 | run: 19 | OMP_NUM_THREADS=$(OMP_NUM_THREADS) ../bin/cg.${PROBLEM}.x 20 | 21 | clean: 22 | - rm -f *.o *~ 23 | - rm -f npbparams.h core 24 | - if [ -d rii_files ]; then rm -r rii_files; fi 25 | -------------------------------------------------------------------------------- /applications/NPB/OMP/CG/README.carefully: -------------------------------------------------------------------------------- 1 | Note: please observe that in the routine conj_grad three 2 | implementations of the sparse matrix-vector multiply have 3 | been supplied. The default matrix-vector multiply is not 4 | loop unrolled. The alternate implementations are unrolled 5 | to a depth of 2 and unrolled to a depth of 8. Please 6 | experiment with these to find the fastest for your particular 7 | architecture. If reporting timing results, any of these three may 8 | be used without penalty. 9 | 10 | Performance examples: 11 | The non-unrolled version of the multiply is actually (slightly: 12 | maybe %5) faster on the sp2-66MHz-WN on 16 nodes than is the 13 | unrolled-by-2 version below. On the Cray t3d, the reverse is true, 14 | i.e., the unrolled-by-two version is some 10% faster. 15 | The unrolled-by-8 version below is significantly faster 16 | on the Cray t3d - overall speed of code is 1.5 times faster. 17 | -------------------------------------------------------------------------------- /applications/NPB/OMP/CG/runexps.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require '../../../../experiment_utils' 3 | 4 | 5 | db = "cg.db" 6 | table = :omp_n02 7 | 8 | 9 | cmd = "make run PROBLEM=%{problem} OMP_NUM_THREADS=%{nthreads}" 10 | 11 | params = { 12 | trial: [1,2,3], 13 | problem: ['A','B','C','D'], 14 | nthreads: [32],#[16,24,32,48,64], 15 | } 16 | 17 | parser = lambda{ |cmdout| 18 | records = {} 19 | 20 | cgreg = /(?[a-zA-Z\s\/]+)\s+=\s+(?.+)/ 21 | cmdout.scan(cgreg).each { |k,v| 22 | k = k.gsub(/\s+/,"_").gsub(/\//,"_per_") 23 | if v.match(/\d+\.\d+/) then 24 | v = v.to_f 25 | elsif v.match(/\d+/) then 26 | v = v.to_i 27 | end 28 | records[k.to_sym] = v 29 | } 30 | 31 | records 32 | } 33 | 34 | run_experiments(cmd, params, db, table, &parser) 35 | -------------------------------------------------------------------------------- /applications/NPB/OMP/DC/ADC.par: -------------------------------------------------------------------------------- 1 | attrNum=12 2 | measuresNum=1 3 | tuplesNum=100 4 | INVERSE_ENDIAN=0 5 | fileName=ADC 6 | -------------------------------------------------------------------------------- /applications/NPB/OMP/DC/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=dc 3 | BENCHMARKU=DC 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = adc.o dc.o extbuild.o rbt.o jobcntl.o \ 10 | ${COMMON}/c_print_results.o \ 11 | ${COMMON}/c_timers.o ${COMMON}/c_wtime.o 12 | 13 | 14 | # npbparams.h is provided for backward compatibility with NPB compilation 15 | # header.h: npbparams.h 16 | 17 | ${PROGRAM}: config ${OBJS} 18 | ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB} 19 | 20 | .c.o: 21 | ${CCOMPILE} $< 22 | 23 | adc.o: adc.c npbparams.h 24 | dc.o: dc.c adcc.h adc.h macrodef.h npbparams.h 25 | extbuild.o: extbuild.c adcc.h adc.h macrodef.h npbparams.h 26 | rbt.o: rbt.c adcc.h adc.h rbt.h macrodef.h npbparams.h 27 | jobcntl.o: jobcntl.c adcc.h adc.h macrodef.h npbparams.h 28 | 29 | clean: 30 | - rm -f *.o 31 | - rm -f npbparams.h core 32 | - rm -f {../,}ADC.{logf,view,dat,viewsz,groupby,chunks}.* 33 | 34 | -------------------------------------------------------------------------------- /applications/NPB/OMP/DC/macrodef.h: -------------------------------------------------------------------------------- 1 | #define PutErrMsg(msg) {fprintf(stderr," %s, errno = %d\n", msg, errno);} 2 | 3 | #define WriteToFile(ptr,size,nitems,stream,logf) if( fwrite(ptr,size,nitems,stream) != nitems )\ 4 | {\ 5 | fprintf(stderr,"\n Write error from WriteToFile()\n"); return ADC_WRITE_FAILED; \ 6 | } 7 | 8 | #ifdef WINNT 9 | #define FSEEK(stream,offset,whence) fseek(stream, (long)offset,whence); 10 | #else 11 | #define FSEEK(stream,offset,whence) fseek(stream,offset,whence); 12 | #endif 13 | 14 | #define GetRecSize(nd,nm) (DIM_FSZ*nd+MSR_FSZ*nm) 15 | -------------------------------------------------------------------------------- /applications/NPB/OMP/DC/rbt.h: -------------------------------------------------------------------------------- 1 | #ifndef _ADC_PARVIEW_TREE_DEF_H_ 2 | #define _ADC_PARVIEW_TREE_DEF_H_ 3 | 4 | #define MAX_TREE_HEIGHT 64 5 | enum{BLACK,RED}; 6 | 7 | typedef struct treeNode{ 8 | struct treeNode *left; 9 | struct treeNode *right; 10 | uint32 clr; 11 | int64 nodeMemPool[1]; 12 | } treeNode; 13 | 14 | typedef struct RBTree{ 15 | treeNode root; 16 | treeNode * mp; 17 | uint32 count; 18 | uint32 treeNodeSize; 19 | uint32 nodeDataSize; 20 | uint32 memoryLimit; 21 | uint32 memaddr; 22 | uint32 memoryIsFull; 23 | uint32 freeNodeCounter; 24 | uint32 nNodesLimit; 25 | uint32 nd; 26 | uint32 nm; 27 | uint32 *drcts; 28 | treeNode **nodes; 29 | unsigned char * memPool; 30 | } RBTree; 31 | 32 | #define NEW_TREE_NODE(node_ptr,memPool,memaddr,treeNodeSize, \ 33 | freeNodeCounter,memoryIsFull) \ 34 | node_ptr=(struct treeNode*)(memPool+memaddr); \ 35 | memaddr+=treeNodeSize; \ 36 | (freeNodeCounter)--; \ 37 | if( freeNodeCounter == 0 ) { \ 38 | memoryIsFull = 1; \ 39 | } 40 | 41 | int32 TreeInsert(RBTree *tree, uint32 *attrs); 42 | 43 | #endif /* _ADC_PARVIEW_TREE_DEF_H_ */ 44 | -------------------------------------------------------------------------------- /applications/NPB/OMP/EP/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ep 3 | BENCHMARKU=EP 4 | 5 | include ../config/make.def 6 | 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o \ 8 | ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | 16 | ep.o: ep.f npbparams.h 17 | ${FCOMPILE} ep.f 18 | 19 | clean: 20 | - rm -f *.o *~ 21 | - rm -f npbparams.h core 22 | - if [ -d rii_files ]; then rm -r rii_files; fi 23 | -------------------------------------------------------------------------------- /applications/NPB/OMP/EP/README: -------------------------------------------------------------------------------- 1 | This code implements the random-number generator described in the 2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007. 3 | The code is "embarrassingly" parallel in that no communication is 4 | required for the generation of the random numbers itself. 5 | -------------------------------------------------------------------------------- /applications/NPB/OMP/FT/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ft 3 | BENCHMARKU=FT 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = ft.o ${COMMON}/${RAND}.o ${COMMON}/print_results.o \ 10 | ${COMMON}/timers.o ${COMMON}/wtime.o 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | 16 | 17 | .f.o: 18 | ${FCOMPILE} $< 19 | 20 | ft.o: ft.f global.h npbparams.h 21 | 22 | clean: 23 | - rm -f *.o *~ mputil* 24 | - rm -f ft npbparams.h core 25 | - if [ -d rii_files ]; then rm -r rii_files; fi 26 | -------------------------------------------------------------------------------- /applications/NPB/OMP/FT/README: -------------------------------------------------------------------------------- 1 | This code implements the time integration of a three-dimensional 2 | partial differential equation using the Fast Fourier Transform. 3 | Some of the dimension statements are not F77 conforming and will 4 | not work using the g77 compiler. All dimension statements, 5 | however, are legal F90. -------------------------------------------------------------------------------- /applications/NPB/OMP/FT/inputft.data.sample: -------------------------------------------------------------------------------- 1 | 6 ! number of iterations 2 | 2 ! layout type. 0 = 0d, 1 = 1d, 2 = 2d 3 | 2 4 ! processor layout. 0d must be "1 1"; 1d must be "1 N" 4 | -------------------------------------------------------------------------------- /applications/NPB/OMP/IS/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=is 3 | BENCHMARKU=IS 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = is.o \ 10 | ${COMMON}/c_print_results.o \ 11 | ${COMMON}/c_timers.o \ 12 | ${COMMON}/c_wtime.o 13 | 14 | 15 | ${PROGRAM}: config ${OBJS} 16 | ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB} 17 | 18 | .c.o: 19 | ${CCOMPILE} $< 20 | 21 | is.o: is.c npbparams.h 22 | 23 | 24 | clean: 25 | - rm -f *.o *~ mputil* 26 | - rm -f npbparams.h core 27 | - if [ -d rii_files ]; then rm -r rii_files; fi 28 | -------------------------------------------------------------------------------- /applications/NPB/OMP/LU/inputlu.data.sample: -------------------------------------------------------------------------------- 1 | c 2 | c***controls printing of the progress of iterations: ipr inorm 3 | 1 250 4 | c 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax 6 | 250 7 | c 8 | c***magnitude of the time step: dt 9 | 2.0e+00 10 | c 11 | c***relaxation factor for SSOR iterations: omega 12 | 1.2 13 | c 14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5 15 | 1.0e-08 1.0e-08 1.0e-08 1.0e-08 1.0e-08 16 | c 17 | c***number of grid points in xi and eta and zeta directions: nx ny nz 18 | 64 64 64 19 | c 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/OMP/MG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=mg 3 | BENCHMARKU=MG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = mg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | mg.o: mg.f globals.h npbparams.h 16 | ${FCOMPILE} mg.f 17 | 18 | clean: 19 | - rm -f *.o *~ 20 | - rm -f npbparams.h core 21 | - if [ -d rii_files ]; then rm -r rii_files; fi 22 | -------------------------------------------------------------------------------- /applications/NPB/OMP/MG/mg.input.sample: -------------------------------------------------------------------------------- 1 | 8 = top level 2 | 256 256 256 = nx ny nz 3 | 20 = nit 4 | 0 0 0 0 0 0 0 0 = debug_vec 5 | -------------------------------------------------------------------------------- /applications/NPB/OMP/SP/add.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine add 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | c--------------------------------------------------------------------- 11 | c addition of update to the vector u 12 | c--------------------------------------------------------------------- 13 | 14 | include 'header.h' 15 | 16 | integer i,j,k,m 17 | 18 | if (timeron) call timer_start(t_add) 19 | !$omp parallel do default(shared) private(i,j,k,m) 20 | do k = 1, nz2 21 | do j = 1, ny2 22 | do i = 1, nx2 23 | do m = 1, 5 24 | u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k) 25 | end do 26 | end do 27 | end do 28 | end do 29 | if (timeron) call timer_stop(t_add) 30 | 31 | return 32 | end 33 | 34 | -------------------------------------------------------------------------------- /applications/NPB/OMP/SP/adi.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine adi 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | call compute_rhs 11 | 12 | call txinvr 13 | 14 | call x_solve 15 | 16 | call y_solve 17 | 18 | call z_solve 19 | 20 | call add 21 | 22 | return 23 | end 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/OMP/SP/inputsp.data.sample: -------------------------------------------------------------------------------- 1 | 400 number of time steps 2 | 0.0015d0 dt for class A = 0.0015d0. class B = 0.001d0 class C = 0.00067d0 3 | 64 64 64 4 | -------------------------------------------------------------------------------- /applications/NPB/OMP/common/wtime.c: -------------------------------------------------------------------------------- 1 | #include "wtime.h" 2 | #include 3 | #ifndef DOS 4 | #include 5 | #endif 6 | 7 | void wtime(double *t) 8 | { 9 | /* a generic timer */ 10 | static int sec = -1; 11 | struct timeval tv; 12 | gettimeofday(&tv, (void *)0); 13 | if (sec < 0) sec = tv.tv_sec; 14 | *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/OMP/common/wtime.h: -------------------------------------------------------------------------------- 1 | /* C/Fortran interface is different on different machines. 2 | * You may need to tweak this. 3 | */ 4 | 5 | 6 | #if defined(IBM) 7 | #define wtime wtime 8 | #elif defined(CRAY) 9 | #define wtime WTIME 10 | #else 11 | #define wtime wtime_ 12 | #endif 13 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/README: -------------------------------------------------------------------------------- 1 | This directory contains examples of make.def files that were used 2 | by the NPB team in testing the benchmarks on different platforms. 3 | They can be used as starting points for make.def files for your 4 | own platform, but you may need to taylor them for best performance 5 | on your installation. A clean template can be found in directory 6 | `config'. 7 | Some examples of suite.def files are also provided. -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.bt: -------------------------------------------------------------------------------- 1 | bt S 2 | bt W 3 | bt A 4 | bt B 5 | bt C 6 | bt D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.cg: -------------------------------------------------------------------------------- 1 | cg S 2 | cg W 3 | cg A 4 | cg B 5 | cg C 6 | cg D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.ep: -------------------------------------------------------------------------------- 1 | ep S 2 | ep W 3 | ep A 4 | ep B 5 | ep C 6 | ep D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.ft: -------------------------------------------------------------------------------- 1 | ft S 2 | ft W 3 | ft A 4 | ft B 5 | ft C 6 | ft D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.is: -------------------------------------------------------------------------------- 1 | is S 2 | is W 3 | is A 4 | is B 5 | is C 6 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.lu: -------------------------------------------------------------------------------- 1 | lu S 2 | lu W 3 | lu A 4 | lu B 5 | lu C 6 | lu D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.mg: -------------------------------------------------------------------------------- 1 | mg S 2 | mg W 3 | mg A 4 | mg B 5 | mg C 6 | mg D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/NAS.samples/suite.def.sp: -------------------------------------------------------------------------------- 1 | sp S 2 | sp W 3 | sp A 4 | sp B 5 | sp C 6 | sp D 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/config/suite.def.template: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name and the class. 6 | # The name is one of "cg", "is", "dc", "ep", mg", "ft", "sp", 7 | # "bt", "lu", and "ua". 8 | # The class is one of "S", "W", "A" through "E" 9 | # (except that no classes C,D,E for DC and no class E for IS and UA). 10 | # No blank lines. 11 | # The following example builds sample sizes of all benchmarks. 12 | ft S 13 | mg S 14 | sp S 15 | lu S 16 | bt S 17 | is S 18 | ep S 19 | cg S 20 | ua S 21 | dc S 22 | -------------------------------------------------------------------------------- /applications/NPB/OMP/sys/Makefile: -------------------------------------------------------------------------------- 1 | UCC = cc 2 | include ../config/make.def 3 | 4 | # Note that COMPILE is also defined in make.common and should 5 | # be the same. We can't include make.common because it has a lot 6 | # of other garbage. 7 | FCOMPILE = $(F77) -c $(F_INC) $(FFLAGS) 8 | 9 | all: setparams 10 | 11 | # setparams creates an npbparam.h file for each benchmark 12 | # configuration. npbparams.h also contains info about how a benchmark 13 | # was compiled and linked 14 | 15 | setparams: setparams.c ../config/make.def 16 | $(UCC) ${CONVERTFLAG} -o setparams setparams.c 17 | 18 | 19 | clean: 20 | -rm -f setparams setparams.h npbparams.h 21 | -rm -f *~ *.o 22 | 23 | -------------------------------------------------------------------------------- /applications/NPB/OMP/sys/print_header: -------------------------------------------------------------------------------- 1 | echo ' ============================================' 2 | echo ' = NAS PARALLEL BENCHMARKS 3.3 =' 3 | echo ' = OpenMP Versions =' 4 | echo ' = F77/C =' 5 | echo ' ============================================' 6 | echo '' 7 | -------------------------------------------------------------------------------- /applications/NPB/OMP/sys/print_instructions: -------------------------------------------------------------------------------- 1 | echo '' 2 | echo ' To make a NAS benchmark type ' 3 | echo '' 4 | echo ' make CLASS=' 5 | echo '' 6 | echo ' where is "bt", "cg", "ep", "ft", "is", "lu",' 7 | echo ' "mg", "sp", "ua", or "dc"' 8 | echo ' is "S", "W", "A", "B", "C" or "D"' 9 | echo '' 10 | echo ' To make a set of benchmarks, create the file config/suite.def' 11 | echo ' according to the instructions in config/suite.def.template and type' 12 | echo '' 13 | echo ' make suite' 14 | echo '' 15 | echo ' ***************************************************************' 16 | echo ' * Remember to edit the file config/make.def for site specific *' 17 | echo ' * information as described in the README file *' 18 | echo ' ***************************************************************' 19 | 20 | -------------------------------------------------------------------------------- /applications/NPB/OMP/sys/suite.awk: -------------------------------------------------------------------------------- 1 | BEGIN { SMAKE = "make" } { 2 | if ($1 !~ /^#/ && NF > 1) { 3 | printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE; 4 | printf "%s CLASS=%s", SMAKE, $2; 5 | if (NF > 2) { 6 | printf " VERSION=%s", $3; 7 | } 8 | printf "; cd ..\n"; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/add.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine add 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c addition of update to the vector u 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | integer i, j, k, m 16 | 17 | if (timeron) call timer_start(t_add) 18 | do k = 1, grid_points(3)-2 19 | do j = 1, grid_points(2)-2 20 | do i = 1, grid_points(1)-2 21 | do m = 1, 5 22 | u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k) 23 | enddo 24 | enddo 25 | enddo 26 | enddo 27 | if (timeron) call timer_stop(t_add) 28 | 29 | return 30 | end 31 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/adi.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine adi 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | call compute_rhs 10 | 11 | call x_solve 12 | 13 | call y_solve 14 | 15 | call z_solve 16 | 17 | call add 18 | 19 | return 20 | end 21 | 22 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/exact_solution.f: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | 4 | subroutine exact_solution(xi,eta,zeta,dtemp) 5 | 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | 9 | c--------------------------------------------------------------------- 10 | c this function returns the exact solution at point xi, eta, zeta 11 | c--------------------------------------------------------------------- 12 | 13 | include 'header.h' 14 | 15 | double precision xi, eta, zeta, dtemp(5) 16 | integer m 17 | 18 | do m = 1, 5 19 | dtemp(m) = ce(m,1) + 20 | > xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + 21 | > eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ 22 | > zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 23 | > zeta*ce(m,13)))) 24 | enddo 25 | 26 | return 27 | end 28 | 29 | 30 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/inputbt.data.sample: -------------------------------------------------------------------------------- 1 | 60 number of time steps 2 | 0.01d0 dt for class A = 0.0008d0. class B = 0.0003d0 class C = 0.0001d0 3 | 12 12 12 4 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/work_lhs.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c header.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | c 9 | double precision fjac(5, 5, 0:problem_size), 10 | > njac(5, 5, 0:problem_size), 11 | > lhs (5, 5, 3, 0:problem_size), 12 | > tmp1, tmp2, tmp3 13 | common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3 14 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/BT/work_lhs_vec.h: -------------------------------------------------------------------------------- 1 | c--------------------------------------------------------------------- 2 | c--------------------------------------------------------------------- 3 | c 4 | c header.h 5 | c 6 | c--------------------------------------------------------------------- 7 | c--------------------------------------------------------------------- 8 | c 9 | double precision fjac(5, 5, 0:problem_size, 0:problem_size), 10 | > njac(5, 5, 0:problem_size, 0:problem_size), 11 | > lhs (5, 5, 3, 0:problem_size, 0:problem_size), 12 | > tmp1, tmp2, tmp3 13 | common /work_lhs/ fjac, njac, lhs, tmp1, tmp2, tmp3 14 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/CG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=cg 3 | BENCHMARKU=CG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = cg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | cg.o: cg.f globals.h npbparams.h 16 | ${FCOMPILE} cg.f 17 | 18 | clean: 19 | - rm -f *.o *~ 20 | - rm -f npbparams.h core 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/CG/README.carefully: -------------------------------------------------------------------------------- 1 | Note: please observe that in the routine conj_grad three 2 | implementations of the sparse matrix-vector multiply have 3 | been supplied. The default matrix-vector multiply is not 4 | loop unrolled. The alternate implementations are unrolled 5 | to a depth of 2 and unrolled to a depth of 8. Please 6 | experiment with these to find the fastest for your particular 7 | architecture. If reporting timing results, any of these three may 8 | be used without penalty. 9 | 10 | Performance examples: 11 | The non-unrolled version of the multiply is actually (slightly: 12 | maybe %5) faster on the sp2-66MHz-WN on 16 nodes than is the 13 | unrolled-by-2 version below. On the Cray t3d, the reverse is true, 14 | i.e., the unrolled-by-two version is some 10% faster. 15 | The unrolled-by-8 version below is significantly faster 16 | on the Cray t3d - overall speed of code is 1.5 times faster. 17 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/DC/ADC.par: -------------------------------------------------------------------------------- 1 | attrNum=12 2 | measuresNum=1 3 | tuplesNum=100 4 | INVERSE_ENDIAN=0 5 | fileName=ADC 6 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/DC/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=dc 3 | BENCHMARKU=DC 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = adc.o dc.o extbuild.o rbt.o jobcntl.o \ 10 | ${COMMON}/c_print_results.o \ 11 | ${COMMON}/c_timers.o ${COMMON}/c_wtime.o 12 | 13 | 14 | # npbparams.h is provided for backward compatibility with NPB compilation 15 | # header.h: npbparams.h 16 | 17 | ${PROGRAM}: config ${OBJS} 18 | ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB} 19 | 20 | .c.o: 21 | $(CCOMPILE) $< 22 | 23 | adc.o: adc.c npbparams.h 24 | dc.o: dc.c adcc.h adc.h macrodef.h npbparams.h 25 | extbuild.o: extbuild.c adcc.h adc.h macrodef.h npbparams.h 26 | rbt.o: rbt.c adcc.h adc.h rbt.h macrodef.h npbparams.h 27 | jobcntl.o: jobcntl.c adcc.h adc.h macrodef.h npbparams.h 28 | 29 | clean: 30 | - rm -f *.o 31 | - rm -f npbparams.h core 32 | - rm -f {../,}ADC.{logf,view,dat,viewsz,groupby,chunks}.* 33 | 34 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/DC/macrodef.h: -------------------------------------------------------------------------------- 1 | #define PutErrMsg(msg) {fprintf(stderr," %s, errno = %d\n", msg, errno);} 2 | 3 | #define WriteToFile(ptr,size,nitems,stream,logf) if( fwrite(ptr,size,nitems,stream) != nitems )\ 4 | {\ 5 | fprintf(stderr,"\n Write error from WriteToFile()\n"); return ADC_WRITE_FAILED; \ 6 | } 7 | 8 | #ifdef WINNT 9 | #define FSEEK(stream,offset,whence) fseek(stream, (long)offset,whence); 10 | #else 11 | #define FSEEK(stream,offset,whence) fseek(stream,offset,whence); 12 | #endif 13 | 14 | #define GetRecSize(nd,nm) (DIM_FSZ*nd+MSR_FSZ*nm) 15 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/DC/rbt.h: -------------------------------------------------------------------------------- 1 | #ifndef _ADC_PARVIEW_TREE_DEF_H_ 2 | #define _ADC_PARVIEW_TREE_DEF_H_ 3 | 4 | #define MAX_TREE_HEIGHT 64 5 | enum{BLACK,RED}; 6 | 7 | typedef struct treeNode{ 8 | struct treeNode *left; 9 | struct treeNode *right; 10 | uint32 clr; 11 | int64 nodeMemPool[1]; 12 | } treeNode; 13 | 14 | typedef struct RBTree{ 15 | treeNode root; 16 | treeNode * mp; 17 | uint32 count; 18 | uint32 treeNodeSize; 19 | uint32 nodeDataSize; 20 | uint32 memoryLimit; 21 | uint32 memaddr; 22 | uint32 memoryIsFull; 23 | uint32 freeNodeCounter; 24 | uint32 nNodesLimit; 25 | uint32 nd; 26 | uint32 nm; 27 | uint32 *drcts; 28 | treeNode **nodes; 29 | unsigned char * memPool; 30 | } RBTree; 31 | 32 | #define NEW_TREE_NODE(node_ptr,memPool,memaddr,treeNodeSize, \ 33 | freeNodeCounter,memoryIsFull) \ 34 | node_ptr=(struct treeNode*)(memPool+memaddr); \ 35 | memaddr+=treeNodeSize; \ 36 | (freeNodeCounter)--; \ 37 | if( freeNodeCounter == 0 ) { \ 38 | memoryIsFull = 1; \ 39 | } 40 | 41 | int32 TreeInsert(RBTree *tree, uint32 *attrs); 42 | 43 | #endif /* _ADC_PARVIEW_TREE_DEF_H_ */ 44 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/EP/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ep 3 | BENCHMARKU=EP 4 | 5 | include ../config/make.def 6 | 7 | OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o \ 8 | ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | 16 | ep.o: ep.f npbparams.h 17 | ${FCOMPILE} ep.f 18 | 19 | clean: 20 | - rm -f *.o *~ 21 | - rm -f npbparams.h core 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/EP/README: -------------------------------------------------------------------------------- 1 | This code implements the random-number generator described in the 2 | NAS Parallel Benchmark document RNR Technical Report RNR-94-007. 3 | The code is "embarrassingly" parallel in that no communication is 4 | required for the generation of the random numbers itself. 5 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/FT/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=ft 3 | BENCHMARKU=FT 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = appft.o auxfnct.o fft3d.o mainft.o verify.o \ 10 | ${COMMON}/${RAND}.o ${COMMON}/print_results.o \ 11 | ${COMMON}/timers.o ${COMMON}/wtime.o 12 | 13 | ${PROGRAM}: config ${OBJS} 14 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 15 | 16 | 17 | 18 | .f.o: 19 | ${FCOMPILE} $< 20 | 21 | appft.o: appft.f global.h npbparams.h 22 | auxfnct.o: auxfnct.f global.h npbparams.h 23 | fft3d.o: fft3d.f global.h npbparams.h 24 | mainft.o: mainft.f global.h npbparams.h 25 | verify.o: verify.f global.h npbparams.h 26 | 27 | clean: 28 | - rm -f *.o *~ mputil* 29 | - rm -f ft npbparams.h core 30 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/IS/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=is 3 | BENCHMARKU=IS 4 | 5 | include ../config/make.def 6 | 7 | include ../sys/make.common 8 | 9 | OBJS = is.o \ 10 | ${COMMON}/c_print_results.o \ 11 | ${COMMON}/c_timers.o \ 12 | ${COMMON}/c_wtime.o 13 | 14 | 15 | ${PROGRAM}: config ${OBJS} 16 | ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${C_LIB} 17 | 18 | .c.o: 19 | ${CCOMPILE} $< 20 | 21 | is.o: is.c npbparams.h 22 | 23 | 24 | clean: 25 | - rm -f *.o *~ mputil* 26 | - rm -f npbparams.h core 27 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/LU/inputlu.data.sample: -------------------------------------------------------------------------------- 1 | c 2 | c***controls printing of the progress of iterations: ipr inorm 3 | 1 250 4 | c 5 | c***the maximum no. of pseudo-time steps to be performed: nitmax 6 | 250 7 | c 8 | c***magnitude of the time step: dt 9 | 2.0e+00 10 | c 11 | c***relaxation factor for SSOR iterations: omega 12 | 1.2 13 | c 14 | c***tolerance levels for steady-state residuals: tolnwt(m),m=1,5 15 | 1.0e-08 1.0e-08 1.0e-08 1.0e-08 1.0e-08 16 | c 17 | c***number of grid points in xi and eta and zeta directions: nx ny nz 18 | 64 64 64 19 | c 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/MG/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | BENCHMARK=mg 3 | BENCHMARKU=MG 4 | 5 | include ../config/make.def 6 | 7 | OBJS = mg.o ${COMMON}/print_results.o \ 8 | ${COMMON}/${RAND}.o ${COMMON}/timers.o ${COMMON}/wtime.o 9 | 10 | include ../sys/make.common 11 | 12 | ${PROGRAM}: config ${OBJS} 13 | ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB} 14 | 15 | mg.o: mg.f globals.h npbparams.h 16 | ${FCOMPILE} mg.f 17 | 18 | clean: 19 | - rm -f *.o *~ 20 | - rm -f npbparams.h core 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/MG/mg.input.sample: -------------------------------------------------------------------------------- 1 | 8 = top level 2 | 256 256 256 = nx ny nz 3 | 20 = nit 4 | 0 0 0 0 0 0 0 0 = debug_vec 5 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/SP/add.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine add 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | c--------------------------------------------------------------------- 11 | c addition of update to the vector u 12 | c--------------------------------------------------------------------- 13 | 14 | include 'header.h' 15 | 16 | integer i,j,k,m 17 | 18 | if (timeron) call timer_start(t_add) 19 | do k = 1, nz2 20 | do j = 1, ny2 21 | do i = 1, nx2 22 | do m = 1, 5 23 | u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k) 24 | end do 25 | end do 26 | end do 27 | end do 28 | if (timeron) call timer_stop(t_add) 29 | 30 | return 31 | end 32 | 33 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/SP/adi.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine adi 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | call compute_rhs 11 | 12 | call txinvr 13 | 14 | call x_solve 15 | 16 | call y_solve 17 | 18 | call z_solve 19 | 20 | call add 21 | 22 | return 23 | end 24 | 25 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/SP/exact_solution.f: -------------------------------------------------------------------------------- 1 | 2 | c--------------------------------------------------------------------- 3 | c--------------------------------------------------------------------- 4 | 5 | subroutine exact_solution(xi,eta,zeta,dtemp) 6 | 7 | c--------------------------------------------------------------------- 8 | c--------------------------------------------------------------------- 9 | 10 | c--------------------------------------------------------------------- 11 | c this function returns the exact solution at point xi, eta, zeta 12 | c--------------------------------------------------------------------- 13 | 14 | include 'header.h' 15 | 16 | double precision xi, eta, zeta, dtemp(5) 17 | integer m 18 | 19 | do m = 1, 5 20 | dtemp(m) = ce(m,1) + 21 | > xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + 22 | > eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ 23 | > zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 24 | > zeta*ce(m,13)))) 25 | end do 26 | 27 | return 28 | end 29 | 30 | 31 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/SP/inputsp.data.sample: -------------------------------------------------------------------------------- 1 | 400 number of time steps 2 | 0.0015d0 dt for class A = 0.0015d0. class B = 0.001d0 class C = 0.00067d0 3 | 64 64 64 4 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/common/wtime.c: -------------------------------------------------------------------------------- 1 | #include "wtime.h" 2 | #include 3 | #ifndef DOS 4 | #include 5 | #endif 6 | 7 | void wtime(double *t) 8 | { 9 | static int sec = -1; 10 | struct timeval tv; 11 | gettimeofday(&tv, (void *)0); 12 | if (sec < 0) sec = tv.tv_sec; 13 | *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/common/wtime.h: -------------------------------------------------------------------------------- 1 | /* C/Fortran interface is different on different machines. 2 | * You may need to tweak this. 3 | */ 4 | 5 | 6 | #if defined(IBM) 7 | #define wtime wtime 8 | #elif defined(CRAY) 9 | #define wtime WTIME 10 | #else 11 | #define wtime wtime_ 12 | #endif 13 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/README: -------------------------------------------------------------------------------- 1 | This directory contains examples of make.def files that were used 2 | by the NPB team in testing the benchmarks on different platforms. 3 | They can be used as starting points for make.def files for your 4 | own platform, but you may need to taylor them for best performance 5 | on your installation. A clean template can be found in directory 6 | `config'. 7 | Some examples of suite.def files are also provided. -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.bt: -------------------------------------------------------------------------------- 1 | bt S 2 | bt W 3 | bt A 4 | bt B 5 | bt C 6 | bt D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.cg: -------------------------------------------------------------------------------- 1 | cg S 2 | cg W 3 | cg A 4 | cg B 5 | cg C 6 | cg D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.ep: -------------------------------------------------------------------------------- 1 | ep S 2 | ep W 3 | ep A 4 | ep B 5 | ep C 6 | ep D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.ft: -------------------------------------------------------------------------------- 1 | ft S 2 | ft W 3 | ft A 4 | ft B 5 | ft C 6 | ft D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.is: -------------------------------------------------------------------------------- 1 | is S 2 | is W 3 | is A 4 | is B 5 | is C 6 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.lu: -------------------------------------------------------------------------------- 1 | lu S 2 | lu W 3 | lu A 4 | lu B 5 | lu C 6 | lu D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.mg: -------------------------------------------------------------------------------- 1 | mg S 2 | mg W 3 | mg A 4 | mg B 5 | mg C 6 | mg D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/NAS.samples/suite.def.sp: -------------------------------------------------------------------------------- 1 | sp S 2 | sp W 3 | sp A 4 | sp B 5 | sp C 6 | sp D 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/config/suite.def.template: -------------------------------------------------------------------------------- 1 | # config/suite.def 2 | # This file is used to build several benchmarks with a single command. 3 | # Typing "make suite" in the main directory will build all the benchmarks 4 | # specified in this file. 5 | # Each line of this file contains a benchmark name, class. 6 | # The name is one of "cg", "is", "dc", "ep", mg", "ft", "sp", 7 | # "bt", "lu", and "ua". 8 | # The class is one of "S", "W", "A", "B", and "C" 9 | # (classes D and E are defined for a number of benchmarks, but they 10 | # are likely not practical to run in serial. See README.install). 11 | # No blank lines. 12 | # The following example builds serial sample sizes of all benchmarks. 13 | ft S 14 | mg S 15 | sp S 16 | lu S 17 | bt S 18 | is S 19 | ep S 20 | cg S 21 | ua S 22 | dc S 23 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/sys/Makefile: -------------------------------------------------------------------------------- 1 | UCC = cc 2 | include ../config/make.def 3 | 4 | # Note that COMPILE is also defined in make.common and should 5 | # be the same. We can't include make.common because it has a lot 6 | # of other garbage. 7 | FCOMPILE = $(F77) -c $(F_INC) $(FFLAGS) 8 | 9 | all: setparams 10 | 11 | # setparams creates an npbparam.h file for each benchmark 12 | # configuration. npbparams.h also contains info about how a benchmark 13 | # was compiled and linked 14 | 15 | setparams: setparams.c ../config/make.def 16 | $(UCC) ${CONVERTFLAG} -o setparams setparams.c 17 | 18 | 19 | clean: 20 | -rm -f setparams setparams.h npbparams.h 21 | -rm -f *~ *.o 22 | 23 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/sys/print_header: -------------------------------------------------------------------------------- 1 | echo ' ===========================================' 2 | echo ' = NAS PARALLEL BENCHMARKS 3.3 =' 3 | echo ' = Serial Versions =' 4 | echo ' = F77/C =' 5 | echo ' ===========================================' 6 | echo '' 7 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/sys/print_instructions: -------------------------------------------------------------------------------- 1 | echo '' 2 | echo ' To make a NAS benchmark type ' 3 | echo '' 4 | echo ' make CLASS=' 5 | echo '' 6 | echo ' where is "bt", "cg", "ep", "ft", "is", "lu",' 7 | echo ' "lu-hp", "mg", "sp", or "ua"' 8 | echo ' is "S", "W", "A", "B", "C" or "D"' 9 | echo '' 10 | echo ' To make a set of benchmarks, create the file config/suite.def' 11 | echo ' according to the instructions in config/suite.def.template and type' 12 | echo '' 13 | echo ' make suite' 14 | echo '' 15 | echo ' ***************************************************************' 16 | echo ' * Remember to edit the file config/make.def for site specific *' 17 | echo ' * information as described in the README file *' 18 | echo ' ***************************************************************' 19 | 20 | -------------------------------------------------------------------------------- /applications/NPB/SERIAL/sys/suite.awk: -------------------------------------------------------------------------------- 1 | BEGIN { SMAKE = "make" } { 2 | if ($1 !~ /^#/ && NF > 1) { 3 | printf "cd `echo %s|tr '[a-z]' '[A-Z]'`; %s clean;", $1, SMAKE; 4 | printf "%s CLASS=%s", SMAKE, $2; 5 | if (NF > 2) { 6 | printf " VERSION=%s", $3; 7 | } 8 | printf "; cd ..\n"; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /applications/demos/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB DEMOS 3 | "*/*.cpp" 4 | ) 5 | 6 | # make separate build targets for each BFS variant 7 | foreach(file ${DEMOS}) 8 | get_filename_component(base ${file} NAME_WE) 9 | add_grappa_exe(demo-${base} ${base}.exe ${file}) 10 | set_property(TARGET ${name} PROPERTY FOLDER "Applications") 11 | endforeach() 12 | -------------------------------------------------------------------------------- /applications/demos/standalone/Makefile: -------------------------------------------------------------------------------- 1 | # Example standalone Grappa app using Grappa's GNU Make include file 2 | # 3 | # To use, build and install Grappa. Then source /bin/settings.sh. After that you should be able to just say 5 | # "make" in this directory, and run the generated binary like you do 6 | # any other MPI program. 7 | 8 | GRAPPA_IMPLICIT_RULES:=on 9 | include $(GRAPPA_PREFIX)/share/Grappa/grappa.mk 10 | 11 | standalone: standalone.o 12 | -------------------------------------------------------------------------------- /applications/graph500/.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.o 3 | *~ 4 | make.inc 5 | seq-list/seq-list 6 | seq-csr/seq-csr 7 | omp-csr/omp-csr 8 | *.pl 9 | xmt-csr/xmt-csr 10 | xmt-csr-local/xmt-csr-local 11 | *.gcda 12 | mpi/graph500_mpi_* 13 | generator/generator_test_mpi 14 | *.bin 15 | make-edgelist 16 | -------------------------------------------------------------------------------- /applications/graph500/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(generator) -------------------------------------------------------------------------------- /applications/graph500/README-Grappa.md: -------------------------------------------------------------------------------- 1 | 2 | # Don't use this 3 | 4 | This code no longer works in the current version of Grappa; it's here because part of it is a dependence of some other examples. 5 | 6 | If you're looking for a BFS implementation, use the one in the applications/graphlab directory or in nativegraph/bfs/bfs_beamer.cpp. 7 | 8 | -------------------------------------------------------------------------------- /applications/graph500/generator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(SOURCES 3 | graph_generator.h 4 | graph_generator.c 5 | make_graph.h 6 | make_graph.c 7 | splittable_mrg.h 8 | splittable_mrg.c 9 | utils.h 10 | utils.c 11 | user_settings.h 12 | mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h 13 | ../prng.c 14 | ) 15 | set_source_files_properties( make_graph.c graph_generator.c utils.c PROPERTIES LANGUAGE CXX ) 16 | 17 | add_definitions( 18 | -Drestrict=__restrict__ 19 | -DGRAPH_GENERATOR_GRAPPA 20 | -DGRAPH_GENERATOR_SEQ 21 | -DGRAPHGEN_DISTRIBUTED_MEMORY 22 | ) 23 | 24 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math") 25 | 26 | add_library(generator EXCLUDE_FROM_ALL ${SOURCES}) 27 | set_property(TARGET generator PROPERTY FOLDER "Applications") 28 | add_dependencies(generator all-third-party) 29 | -------------------------------------------------------------------------------- /applications/graph500/generator/Makefile.mpi: -------------------------------------------------------------------------------- 1 | CC = mpicc 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g 4 | LDFLAGS = -O3 5 | # LDFLAGS = -g 6 | MPICC = mpicc 7 | 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 10 | 11 | all: generator_test_mpi 12 | 13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 14 | $(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm 15 | 16 | clean: 17 | -rm -f generator_test_mpi 18 | -------------------------------------------------------------------------------- /applications/graph500/generator/Makefile.omp: -------------------------------------------------------------------------------- 1 | CC = gcc -fopenmp 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg 3 | LDFLAGS = -O3 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_omp 9 | 10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_omp 15 | -------------------------------------------------------------------------------- /applications/graph500/generator/Makefile.seq: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ -D_GRAPPA # -g -pg 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__ 4 | LDFLAGS = -g # -g -pg 5 | 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 8 | 9 | all: generator_test_seq 10 | 11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 12 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm 13 | 14 | clean: 15 | -rm -f generator_test_seq 16 | 17 | -------------------------------------------------------------------------------- /applications/graph500/generator/Makefile.xmt: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -DNDEBUG 3 | LDFLAGS = $(CFLAGS) # -g -pg 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_xmt 9 | 10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_xmt 15 | -------------------------------------------------------------------------------- /applications/graph500/graph500.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(GRAPH500_HEADER_) 5 | #define GRAPH500_HEADER_ 6 | 7 | #define NAME "Graph500 sequential list" 8 | #define VERSION 0 9 | 10 | #include "generator/graph_generator.h" 11 | 12 | /** Pass the edge list to an external graph creation routine. */ 13 | int create_graph_from_edgelist (struct packed_edge *IJ, int64_t nedge); 14 | 15 | /** Create the BFS tree from a given source vertex. */ 16 | int make_bfs_tree (int64_t *bfs_tree_out, int64_t *max_vtx_out, 17 | int64_t srcvtx); 18 | 19 | /** Clean up. */ 20 | void destroy_graph (void); 21 | 22 | #endif /* GRAPH500_HEADER_ */ 23 | -------------------------------------------------------------------------------- /applications/graph500/grappa/.gitignore: -------------------------------------------------------------------------------- 1 | .igor 2 | -------------------------------------------------------------------------------- /applications/graph500/grappa/asciize.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | f = open(ARGV[0]) 3 | puts "-------" 4 | nedge = f.read(8).unpack("L")[0] 5 | nv = f.read(8).unpack("L")[0] 6 | nadj = f.read(8).unpack("L")[0] 7 | nbfs = f.read(8).unpack("L")[0] 8 | puts "nedge: #{nedge}, nv: #{nv}, nadj: #{nadj}, nbfs: #{nbfs}" 9 | 10 | puts "-- edges --" 11 | (0...nedge*2).each{|i| 12 | puts "#{i}: #{f.read(8).unpack('L')[0]}" 13 | } 14 | 15 | puts "-- xoff --" 16 | (0...(2*nv+2)).each{|i| 17 | puts "#{i}: #{f.read(8).unpack('L')[0]}" 18 | } 19 | 20 | puts "-- xadj --" 21 | (0...nadj).each{|i| 22 | puts "#{i}: #{f.read(8).unpack('L')[0]}" 23 | } 24 | 25 | puts "-- bfsroots --" 26 | (0...nbfs).each{|i| 27 | puts "#{i}: #{f.read(8).unpack('L')[0]}" 28 | } 29 | 30 | -------------------------------------------------------------------------------- /applications/graph500/grappa/graph.cpp: -------------------------------------------------------------------------------- 1 | #include "graph.hpp" 2 | 3 | -------------------------------------------------------------------------------- /applications/graph500/grappa/options.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #ifndef _OPTIONS_H 5 | #define _OPTIONS_H 6 | 7 | extern int VERBOSE; 8 | extern int use_RMAT; 9 | extern char *dumpname; 10 | extern char *rootname; 11 | 12 | #define A_PARAM 0.57 13 | #define B_PARAM 0.19 14 | #define C_PARAM 0.19 15 | /* Hence D = 0.05. */ 16 | 17 | extern double A, B, C, D; 18 | 19 | #define NBFS_max 64 20 | extern int NBFS; 21 | 22 | #define default_SCALE ((int64_t)14) 23 | #define default_edgefactor ((int64_t)16) 24 | 25 | extern int64_t SCALE; 26 | extern int64_t edgefactor; 27 | 28 | extern bool load_checkpoint; 29 | extern bool write_checkpoint; 30 | 31 | extern bool verify; 32 | 33 | void get_options (int argc, char **argv); 34 | 35 | #endif /* _OPTIONS_H */ 36 | -------------------------------------------------------------------------------- /applications/graph500/grappa/trace.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'pty' 3 | require 'fileutils'; include FileUtils 4 | 5 | cmd = ARGV.join(' ') 6 | begin 7 | PTY.spawn(cmd) do |stdin, stdout, pid| 8 | begin 9 | stdin.sync 10 | stdin.each{|line| puts line.strip} 11 | rescue Errno::EIO 12 | end 13 | end 14 | rescue PTY::ChildExited 15 | end 16 | 17 | otf = Dir.glob("*.otf").max_by {|f| File.mtime(f)} 18 | base = otf[/(.*)\.otf/,1] 19 | open("#{base}.sh", "w"){|f| f.write("#{cmd}\n") } 20 | dest = "trace/#{base}" 21 | mkdir dest 22 | `mv #{base}.* #{dest}` 23 | -------------------------------------------------------------------------------- /applications/graph500/kronecker.h: -------------------------------------------------------------------------------- 1 | #if !defined(KRONECKER_HEADER_) 2 | #define KRONECKER_HEADER_ 3 | 4 | #include "generator/graph_generator.h" 5 | 6 | void kronecker_edgelist (struct packed_edge *IJ, int64_t nedge, int64_t SCALE, 7 | double A, double B, double C); 8 | 9 | #endif /* KRONECKER_HEADER_ */ 10 | -------------------------------------------------------------------------------- /applications/graph500/make-incs/make.inc-gcc: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | # Copyright 2010, Georgia Institute of Technology, USA. 3 | # See COPYING for license. 4 | CFLAGS = -g -std=c99 5 | #CFLAGS = -g -std=c99 -O3 -march=native -fgcse-sm -fgcse-las -fgcse-after-reload -floop-strip-mine -ftree-loop-im -fivopts -funswitch-loops 6 | LDLIBS = -lm -lrt 7 | CPPFLAGS = -DUSE_MMAP_LARGE -DUSE_MMAP_LARGE_EXT 8 | 9 | BUILD_OPENMP = Yes 10 | CFLAGS_OPENMP = -fopenmp 11 | -------------------------------------------------------------------------------- /applications/graph500/make-incs/make.inc-osx: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | # Copyright 2010, University of Illinois at Urbana-Champaign 3 | # See COPYING for license. 4 | CFLAGS = -g -std=c99 -Wall 5 | LDLIBS = -lm 6 | # OSX does not support MAP_ANON in POSIX mode and the timers for MacOSX are 7 | # not available for Leopard 8 | # Use HAVE_MACH_ABSOLUTE_TIME to get the Mac OSX Timer 9 | # Use HAVE_ALLOCA_H to include alloca.h 10 | # Use HAVE_MPI_INT64_T to use the MPI 2.2 Datatype for int64_t items 11 | # Use HAVE_UNISTD_H to include unistd.h (for getopt definitions) 12 | CPPFLAGS = -DHAVE_MACH_ABSOLUTE_TIME -DHAVE_ALLOCA_H -DHAVE_MPI_INT64_T -DHAVE_UNISTD_H 13 | BUILD_MPI = No 14 | BUILD_OPENMP = Yes 15 | -------------------------------------------------------------------------------- /applications/graph500/make-incs/make.inc-xmt: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | # Copyright 2010, Georgia Institute of Technology, USA. 3 | # See COPYING for license. 4 | # Modules and Emacs+tramp don't cooperate at PNNL. 5 | #PATH:=/opt/mta-pe/6.5.0/bin:${PATH} 6 | CFLAGS = 7 | LDLIBS = -lprand -lm -lrt 8 | CPPFLAGS = -DUSE_MMAP_LARGE -DNDEBUG 9 | 10 | BUILD_OPENMP = No 11 | CFLAGS_OPENMP = 12 | 13 | BUILD_XMT = Yes 14 | 15 | CLEANS:= xmt-csr/*.pl xmt-csr-local/*.pl *.o 16 | 17 | -------------------------------------------------------------------------------- /applications/graph500/mpi/igor_mpi_bfs.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'igor' 3 | 4 | # inherit parser, sbatch_flags 5 | require_relative '../../../util/igor_common.rb' 6 | 7 | Igor do 8 | include Isolatable 9 | 10 | database '~/exp/sosp.db', :bfs 11 | 12 | # isolate everything needed for the executable so we can sbcast them for local execution 13 | isolate(%w[simple replicated replicated_csc].map{|v| "graph500_mpi_#{v}"}, 14 | File.dirname(__FILE__)) 15 | 16 | command "#{$srun} %{tdir}/graph500_mpi_%{mpibfs} %{scale} %{edgefactor}" 17 | 18 | sbatch_flags << "--time=4:00:00" 19 | 20 | params { 21 | mpibfs 'simple' 22 | nnode 2 23 | ppn 1 24 | scale 20 25 | edgefactor 16 26 | } 27 | 28 | expect :max_teps 29 | 30 | $filtered = results{|t| t.select(:id, :mpibfs, :scale, :nnode, :ppn, :run_at, :min_time, :max_teps) } 31 | 32 | interact # enter interactive mode 33 | end 34 | -------------------------------------------------------------------------------- /applications/graph500/octave/Graph500.m: -------------------------------------------------------------------------------- 1 | SCALE = 10; 2 | edgefactor = 16; 3 | NBFS = 64; 4 | 5 | rand ("seed", 103); 6 | 7 | ij = kronecker_generator (SCALE, edgefactor); 8 | 9 | tic; 10 | G = kernel_1 (ij); 11 | kernel_1_time = toc; 12 | 13 | N = size (G, 1); 14 | coldeg = full (spstats (G)); 15 | search_key = randperm (N); 16 | search_key(coldeg(search_key) == 0) = []; 17 | if length (search_key) > NBFS, 18 | search_key = search_key(1:NBFS); 19 | else 20 | NBFS = length (search_key); 21 | end 22 | search_key = search_key - 1; 23 | 24 | kernel_2_time = Inf * ones (NBFS, 1); 25 | kernel_2_nedge = zeros (NBFS, 1); 26 | 27 | indeg = histc (ij(:), 1:N); % For computing the number of edges 28 | 29 | for k = 1:NBFS, 30 | tic; 31 | parent = kernel_2 (G, search_key(k)); 32 | kernel_2_time(k) = toc; 33 | err = validate (parent, ij, search_key (k)); 34 | if err <= 0, 35 | error (sprintf ("BFS %d from search key %d failed to validate: %d", 36 | k, search_key(k), err)); 37 | end 38 | kernel_2_nedge(k) = sum (indeg(parent >= 0))/2; % Volume/2 39 | end 40 | 41 | output (SCALE, edgefactor, NBFS, kernel_1_time, kernel_2_time, kernel_2_nedge); 42 | -------------------------------------------------------------------------------- /applications/graph500/octave/kernel_1.m: -------------------------------------------------------------------------------- 1 | function G = kernel_1 (ij) 2 | %% Compute a sparse adjacency matrix representation 3 | %% of the graph with edges from ij. 4 | 5 | %% Remove self-edges. 6 | ij(:, ij(1,:) == ij(2,:)) = []; 7 | %% Adjust away from zero labels. 8 | ij = ij + 1; 9 | %% Find the maximum label for sizing. 10 | N = max (max (ij)); 11 | %% Create the matrix, ensuring it is square. 12 | G = sparse (ij(1,:), ij(2,:), ones (1, size (ij, 2)), N, N); 13 | %% Symmetrize to model an undirected graph. 14 | G = spones (G + G.'); 15 | -------------------------------------------------------------------------------- /applications/graph500/octave/kernel_2.m: -------------------------------------------------------------------------------- 1 | function parent = kernel_2 (G, root) 2 | %% Compute a sparse adjacency matrix representation 3 | %% of the graph with edges from ij. 4 | 5 | N = size (G, 1); 6 | %% Adjust from zero labels. 7 | root = root + 1; 8 | parent = zeros (N, 1); 9 | parent (root) = root; 10 | 11 | vlist = zeros (N, 1); 12 | vlist(1) = root; 13 | lastk = 1; 14 | for k = 1:N, 15 | v = vlist(k); 16 | if v == 0, break; end 17 | [I,J,V] = find (G(:, v)); 18 | nxt = I(parent(I) == 0); 19 | parent(nxt) = v; 20 | vlist(lastk + (1:length (nxt))) = nxt; 21 | lastk = lastk + length (nxt); 22 | end 23 | 24 | %% Adjust to zero labels. 25 | parent = parent - 1; 26 | 27 | -------------------------------------------------------------------------------- /applications/graph500/octave/validate.m: -------------------------------------------------------------------------------- 1 | function out = validate (parent, ij, search_key) 2 | out = 1; 3 | parent = parent + 1; 4 | search_key = search_key + 1; 5 | 6 | if parent (search_key) != search_key, 7 | out = 0; 8 | return; 9 | end 10 | 11 | ij = ij + 1; 12 | N = max (max (ij)); 13 | slice = find (parent > 0); 14 | 15 | level = zeros (size (parent)); 16 | level (slice) = 1; 17 | P = parent (slice); 18 | mask = P != search_key; 19 | k = 0; 20 | while any (mask), 21 | level(slice(mask)) = level(slice(mask)) + 1; 22 | P = parent (P); 23 | mask = P != search_key; 24 | k = k + 1; 25 | if k > N, 26 | %% There must be a cycle in the tree. 27 | out = -3; 28 | return; 29 | end 30 | end 31 | 32 | lij = level (ij); 33 | neither_in = lij(1,:) == 0 & lij(2,:) == 0; 34 | both_in = lij(1,:) > 0 & lij(2,:) > 0; 35 | if any (not (neither_in | both_in)), 36 | out = -4; 37 | return 38 | end 39 | respects_tree_level = abs (lij(1,:) - lij(2,:)) <= 1; 40 | if any (not (neither_in | respects_tree_level)), 41 | out = -5; 42 | return 43 | end 44 | -------------------------------------------------------------------------------- /applications/graph500/options.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(OPTIONS_HEADER_) 5 | #define OPTIONS_HEADER_ 6 | 7 | #include 8 | 9 | extern int VERBOSE; 10 | extern int use_RMAT; 11 | extern char *dumpname; 12 | extern char *rootname; 13 | 14 | #define A_PARAM 0.57 15 | #define B_PARAM 0.19 16 | #define C_PARAM 0.19 17 | /* Hence D = 0.05. */ 18 | 19 | extern double A, B, C, D; 20 | 21 | #define NBFS_max 8 22 | extern int NBFS; 23 | 24 | #define default_SCALE ((int64_t)14) 25 | #define default_edgefactor ((int64_t)16) 26 | 27 | extern int64_t SCALE; 28 | extern int64_t edgefactor; 29 | 30 | void get_options (int argc, char **argv); 31 | 32 | extern bool load_checkpoint; 33 | extern bool verify; 34 | 35 | #endif /* OPTIONS_HEADER_ */ 36 | -------------------------------------------------------------------------------- /applications/graph500/prng.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(PRNG_HEADER_) 5 | #define PRNG_HEADER_ 6 | 7 | /** Initialze the PRNG, called in a sequential context. */ 8 | void init_random (void); 9 | 10 | extern uint64_t userseed; 11 | extern uint_fast32_t prng_seed[5]; 12 | extern void *prng_state; 13 | 14 | #ifdef __MTA__ 15 | #include 16 | #else 17 | #include 18 | static void prand(int64_t n, double * v) { 19 | int64_t i; 20 | extern int64_t xmtcompat_rand_initialized; 21 | extern void xmtcompat_initialize_rand(void); 22 | if (!xmtcompat_rand_initialized) xmtcompat_initialize_rand(); 23 | for (i = 0; i < n; ++i) { 24 | v[i] = drand48(); 25 | } 26 | } 27 | #endif /* !defined(__MTA__) */ 28 | 29 | #endif /* PRNG_HEADER_ */ 30 | -------------------------------------------------------------------------------- /applications/graph500/rmat.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(RMAT_HEADER_) 5 | #define RMAT_HEADER_ 6 | 7 | #include "generator/graph_generator.h" 8 | 9 | /** Fill IJ with a randomly permuted R-MAT generated edge list. */ 10 | void rmat_edgelist (struct packed_edge *IJ, int64_t nedge, int SCALE, 11 | double A, double B, double C); 12 | void permute_vertex_labels (struct packed_edge * IJ, int64_t nedge, int64_t max_nvtx, 13 | void * st, int64_t * newlabel); 14 | void permute_edgelist (struct packed_edge * IJ, int64_t nedge, void *st); 15 | 16 | #endif /* RMAT_HEADER_ */ 17 | -------------------------------------------------------------------------------- /applications/graph500/timer.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(TIMER_HEADER_) 5 | #define TIMER_HEADER_ 6 | 7 | /** Start timing. */ 8 | void tic (void); 9 | 10 | /** Return seconds since last tic. */ 11 | double toc (void); 12 | 13 | /** return current seconds */ 14 | double timer(void); 15 | 16 | /** Macro to time a block. */ 17 | #define TIME(timevar, what) do { tic (); what; timevar = toc(); } while (0) 18 | 19 | #endif /* TIMER_HEADER_ */ 20 | -------------------------------------------------------------------------------- /applications/graph500/verify.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(VERIFY_HEADER_) 5 | #define VERIFY_HEADER_ 6 | 7 | #include "generator/graph_generator.h" 8 | 9 | /** Verify a BFS tree, return volume or -1 if failed. */ 10 | int64_t verify_bfs_tree (int64_t *bfs_tree, int64_t max_bfsvtx, 11 | int64_t root, 12 | const struct packed_edge *IJ, int64_t nedge); 13 | 14 | #endif /* VERIFY_HEADER_ */ 15 | -------------------------------------------------------------------------------- /applications/graph500/xalloc.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(XALLOC_HEADER_) 5 | #define XALLOC_HEADER_ 6 | 7 | void * xmalloc (size_t); 8 | void * xmalloc_large (size_t); 9 | void xfree_large (void *); 10 | void * xmalloc_large_ext (size_t); 11 | 12 | /* 13 | void mark_large_unused (void *); 14 | void mark_large_willuse (void *); 15 | */ 16 | 17 | #endif /* XALLOC_HEADER_ */ 18 | -------------------------------------------------------------------------------- /applications/graphlab/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COMMON graphlab.hpp graphlab.cpp) 2 | 3 | foreach(app pagerank sssp test pagerank_new cc bfs) 4 | add_grappa_exe(graphlab-${app} ${app}.exe ${app}.cpp ${COMMON}) 5 | set_property(TARGET ${name} PROPERTY FOLDER "Graphlab") 6 | endforeach() 7 | -------------------------------------------------------------------------------- /applications/graphlab/README.md: -------------------------------------------------------------------------------- 1 | GraphLab API in Grappa 2 | ---------------------- 3 | 4 | This directory contains code to emulate the [GraphLab][] API with a simple layer on top of Grappa. This API is not perfectly compatible with GraphLab code, but the example vertex programs in this directory are mostly faithful to those in GraphLab proper. 5 | 6 | There are currently two implementations: 7 | 8 | - `NaiveGraphlabEngine` (`graphlab_naive.hpp`): implements a restricted GraphLab API using the builtin Grappa Graph structure. Most notably, only `gather:IN_EDGES` and `scatter:OUT_EDGES` are supported. 9 | 10 | - `GraphlabEngine` (`graphlab_splitv.hpp`): built on a custom graph structure mimicking GraphLab's greedy vertex-split representation. This is currently slower, and still does not implement the full range of options. `pagerank_new.cpp` is an example that uses this engine. 11 | 12 | [GraphLab]: graphlab.org -------------------------------------------------------------------------------- /applications/graphlab/graphlab.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////// 2 | /// GraphLab is an API and runtime system for graph-parallel computation. 3 | /// This is a rough prototype implementation of the programming model to 4 | /// demonstrate using Grappa as a platform for other models. 5 | /// More information on the actual GraphLab system can be found at: 6 | /// graphlab.org. 7 | //////////////////////////////////////////////////////////////////////// 8 | 9 | #include "graphlab.hpp" 10 | 11 | GRAPPA_DEFINE_METRIC(SummarizingMetric, iteration_time, 0); 12 | GRAPPA_DEFINE_METRIC(SummarizingMetric, core_set_size, 0); 13 | 14 | DEFINE_int32(max_iterations, 1024, "Stop after this many iterations, no matter what."); 15 | -------------------------------------------------------------------------------- /applications/isopath/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(grappa) 2 | -------------------------------------------------------------------------------- /applications/isopath/generator/Makefile.grappa: -------------------------------------------------------------------------------- 1 | include ../../../system/Makefile 2 | 3 | CFLAGS += -O3 -DGRAPH_GENERATOR_GRAPPA -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg 4 | CFLAGS += -I$(GRAPPA_HOME)/system -I$(GRAPPA_HOME)/system/tasks 5 | CFLAGS += -D_GRAPPA 6 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g 7 | 8 | # force cplusplus on .c 9 | CC= $(CXX) 10 | 11 | GENERATOR_OBJS = graph_generator.o make_graph.o splittable_mrg.o utils.o 12 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 13 | 14 | all: generator_test_mpi 15 | 16 | splittable_mrg.o: splittable_mrg.c 17 | make -f Makefile.seq $@ 18 | 19 | genlib: $(GENERATOR_OBJS) $(GENERATOR_HEADERS) 20 | ar rcs generator.a $(GENERATOR_OBJS) 21 | 22 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 23 | $(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm 24 | 25 | clean: 26 | rm -f generator_test_mpi 27 | rm -f $(GENERATOR_OBJS) generator.a 28 | -------------------------------------------------------------------------------- /applications/isopath/generator/Makefile.mpi: -------------------------------------------------------------------------------- 1 | CC = mpicc 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g 4 | LDFLAGS = -O3 5 | # LDFLAGS = -g 6 | MPICC = mpicc 7 | 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 10 | 11 | all: generator_test_mpi 12 | 13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 14 | $(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm 15 | 16 | clean: 17 | -rm -f generator_test_mpi 18 | -------------------------------------------------------------------------------- /applications/isopath/generator/Makefile.omp: -------------------------------------------------------------------------------- 1 | CC = gcc -fopenmp 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg 3 | LDFLAGS = -O3 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_omp 9 | 10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_omp 15 | -------------------------------------------------------------------------------- /applications/isopath/generator/Makefile.seq: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ # -g -pg 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__ 4 | LDFLAGS = -g # -g -pg 5 | 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 8 | 9 | all: generator_test_seq 10 | 11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 12 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm 13 | 14 | clean: 15 | -rm -f generator_test_seq 16 | -------------------------------------------------------------------------------- /applications/isopath/generator/Makefile.xmt: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -DNDEBUG 3 | LDFLAGS = $(CFLAGS) # -g -pg 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_xmt 9 | 10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_xmt 15 | -------------------------------------------------------------------------------- /applications/isopath/grappa/.gitignore: -------------------------------------------------------------------------------- 1 | .igor 2 | -------------------------------------------------------------------------------- /applications/isopath/grappa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(SOURCES 3 | common.h 4 | oned_csr.h 5 | oned_csr.cpp 6 | options.h 7 | options.cpp 8 | timer.h 9 | ../prng.c 10 | simple_graphs.hpp 11 | simple_graphs.cpp 12 | ) 13 | 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") 15 | add_definitions(-Drestrict=__restrict__ -DGRAPH_GENERATOR_GRAPPA) 16 | 17 | add_grappa_application(isopath.exe ${SOURCES} isopath.cpp) 18 | target_link_libraries(isopath.exe generator) 19 | -------------------------------------------------------------------------------- /applications/isopath/grappa/graph.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "graph.hpp" 3 | 4 | -------------------------------------------------------------------------------- /applications/isopath/grappa/options.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #ifndef _OPTIONS_H 5 | #define _OPTIONS_H 6 | 7 | extern int VERBOSE; 8 | extern int use_RMAT; 9 | extern char *dumpname; 10 | extern char *rootname; 11 | 12 | #define A_PARAM 0.57 13 | #define B_PARAM 0.19 14 | #define C_PARAM 0.19 15 | /* Hence D = 0.05. */ 16 | 17 | extern double A, B, C, D; 18 | 19 | #define NBFS_max 64 20 | extern int NBFS; 21 | 22 | #define default_SCALE ((int64_t)14) 23 | #define default_edgefactor ((int64_t)16) 24 | 25 | extern int64_t SCALE; 26 | extern int64_t edgefactor; 27 | 28 | extern bool load_checkpoint; 29 | extern bool write_checkpoint; 30 | 31 | extern bool verify; 32 | 33 | void get_options (int argc, char **argv); 34 | 35 | #endif /* _OPTIONS_H */ 36 | -------------------------------------------------------------------------------- /applications/isopath/grappa/simple_graphs.hpp: -------------------------------------------------------------------------------- 1 | //Generates tuple graph representation for a few simple graphs 2 | 3 | void meshgrid_graph(int64_t * num_edges, GlobalAddress * tuple_edges, int n, int m); 4 | 5 | void balanced_tree_graph(int64_t * num_edges, GlobalAddress * tuple_edges, int lvs, int branches); 6 | 7 | void complete_graph(int64_t * num_edges, GlobalAddress * tuple_edges, int vertices); 8 | -------------------------------------------------------------------------------- /applications/isopath/options.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(OPTIONS_HEADER_) 5 | #define OPTIONS_HEADER_ 6 | 7 | #include 8 | 9 | extern int VERBOSE; 10 | extern int use_RMAT; 11 | extern char *dumpname; 12 | extern char *rootname; 13 | 14 | #define A_PARAM 0.57 15 | #define B_PARAM 0.19 16 | #define C_PARAM 0.19 17 | /* Hence D = 0.05. */ 18 | 19 | extern double A, B, C, D; 20 | 21 | #define NBFS_max 8 22 | extern int NBFS; 23 | 24 | #define default_SCALE ((int64_t)14) 25 | #define default_edgefactor ((int64_t)16) 26 | 27 | extern int64_t SCALE; 28 | extern int64_t edgefactor; 29 | 30 | void get_options (int argc, char **argv); 31 | 32 | extern bool load_checkpoint; 33 | extern bool verify; 34 | 35 | #endif /* OPTIONS_HEADER_ */ 36 | -------------------------------------------------------------------------------- /applications/isopath/prng.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(PRNG_HEADER_) 5 | #define PRNG_HEADER_ 6 | 7 | /** Initialze the PRNG, called in a sequential context. */ 8 | void init_random (void); 9 | 10 | extern uint64_t userseed; 11 | extern uint_fast32_t prng_seed[5]; 12 | extern void *prng_state; 13 | 14 | #ifdef __MTA__ 15 | #include 16 | #else 17 | #include 18 | static void prand(int64_t n, double * v) { 19 | int64_t i; 20 | extern int64_t xmtcompat_rand_initialized; 21 | extern void xmtcompat_initialize_rand(void); 22 | if (!xmtcompat_rand_initialized) xmtcompat_initialize_rand(); 23 | for (i = 0; i < n; ++i) { 24 | v[i] = drand48(); 25 | } 26 | } 27 | #endif /* !defined(__MTA__) */ 28 | 29 | #endif /* PRNG_HEADER_ */ 30 | -------------------------------------------------------------------------------- /applications/isopath/timer.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(TIMER_HEADER_) 5 | #define TIMER_HEADER_ 6 | 7 | /** Start timing. */ 8 | void tic (void); 9 | 10 | /** Return seconds since last tic. */ 11 | double toc (void); 12 | 13 | /** return current seconds */ 14 | double timer(void); 15 | 16 | /** Macro to time a block. */ 17 | #define TIME(timevar, what) do { tic (); what; timevar = toc(); } while (0) 18 | 19 | #endif /* TIMER_HEADER_ */ 20 | -------------------------------------------------------------------------------- /applications/isopath/verify.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(VERIFY_HEADER_) 5 | #define VERIFY_HEADER_ 6 | 7 | #include "generator/graph_generator.h" 8 | 9 | /** Verify a BFS tree, return volume or -1 if failed. */ 10 | int64_t verify_bfs_tree (int64_t *bfs_tree, int64_t max_bfsvtx, 11 | int64_t root, 12 | const struct packed_edge *IJ, int64_t nedge); 13 | 14 | #endif /* VERIFY_HEADER_ */ 15 | -------------------------------------------------------------------------------- /applications/isopath/xalloc.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; mode: folding; fill-column: 70; -*- */ 2 | /* Copyright 2010, Georgia Institute of Technology, USA. */ 3 | /* See COPYING for license. */ 4 | #if !defined(XALLOC_HEADER_) 5 | #define XALLOC_HEADER_ 6 | 7 | void * xmalloc (size_t); 8 | void * xmalloc_large (size_t); 9 | void xfree_large (void *); 10 | void * xmalloc_large_ext (size_t); 11 | 12 | /* 13 | void mark_large_unused (void *); 14 | void mark_large_willuse (void *); 15 | */ 16 | 17 | #endif /* XALLOC_HEADER_ */ 18 | -------------------------------------------------------------------------------- /applications/join/.gitignore: -------------------------------------------------------------------------------- 1 | grappa*.cpp 2 | strings.cc 3 | strings.h 4 | utils.h 5 | utils.cc 6 | *bk 7 | *bkup 8 | -------------------------------------------------------------------------------- /applications/join/Aggregates.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace Aggregates { 4 | template < typename State, typename UV > 5 | State SUM(State sofar, UV nextval) { 6 | return sofar + nextval; 7 | } 8 | 9 | template < typename State, typename UV > 10 | State COUNT(State sofar, UV nextval) { 11 | return sofar + 1; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /applications/join/HashJoin.cpp: -------------------------------------------------------------------------------- 1 | #include "HashJoin.hpp" 2 | Grappa::GlobalCompletionEvent default_join_left_gce; 3 | Grappa::GlobalCompletionEvent default_join_right_gce; 4 | Grappa::GlobalCompletionEvent default_join_reduce_gce; 5 | -------------------------------------------------------------------------------- /applications/join/Local_graph_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "local_graph.hpp" 3 | 4 | 5 | BOOST_AUTO_TEST_SUITE( Local_graph_tests ); 6 | 7 | 8 | 9 | BOOST_AUTO_TEST_CASE( testBasicList ) { 10 | BOOST_MESSAGE("Testing basic adj list"); 11 | 12 | std::vector edges; 13 | edges.push_back({4,5}); 14 | edges.push_back({6,7}); 15 | edges.push_back({10,11}); 16 | 17 | LocalAdjListGraph g(edges); 18 | BOOST_CHECK( g.neighbors(4)[0] == 5 ); 19 | BOOST_CHECK( g.neighbors(6)[0] == 7 ); 20 | BOOST_CHECK( g.neighbors(10)[0] == 11 ); 21 | } 22 | 23 | BOOST_AUTO_TEST_SUITE_END(); 24 | -------------------------------------------------------------------------------- /applications/join/MapReduce.cpp: -------------------------------------------------------------------------------- 1 | #include "MapReduce.hpp" 2 | 3 | namespace MapReduce { 4 | Grappa::GlobalCompletionEvent default_mr_gce; 5 | } 6 | 7 | GRAPPA_DEFINE_METRIC(SummarizingMetric, mr_mapping_runtime, 0); 8 | GRAPPA_DEFINE_METRIC(SummarizingMetric, mr_combining_runtime, 0); 9 | GRAPPA_DEFINE_METRIC(SummarizingMetric, mr_reducing_runtime, 0); 10 | GRAPPA_DEFINE_METRIC(SummarizingMetric, mr_reallocation_runtime, 0); 11 | -------------------------------------------------------------------------------- /applications/join/MatchesDHT.cpp: -------------------------------------------------------------------------------- 1 | #include "MatchesDHT.hpp" 2 | 3 | // for all hash tables 4 | //GRAPPA_DEFINE_METRIC(MaxMetric, max_cell_length, 0); 5 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_tables_size, 0); 6 | GRAPPA_DEFINE_METRIC(SummarizingMetric, hash_tables_lookup_steps, 0); 7 | 8 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_remote_lookups, 0); 9 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_remote_inserts, 0); 10 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_local_lookups, 0); 11 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_local_inserts, 0); 12 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_called_lookups, 0); 13 | GRAPPA_DEFINE_METRIC(SimpleMetric, hash_called_inserts, 0); 14 | -------------------------------------------------------------------------------- /applications/join/Tuple.cpp: -------------------------------------------------------------------------------- 1 | #include "Tuple.hpp" 2 | #include 3 | 4 | std::ostream& operator<< (std::ostream& o, Tuple& t) { 5 | std::stringstream ss; 6 | ss << "("; 7 | for ( uint64_t i=0; i 5 | #include 6 | 7 | #define TUPLE_LEN 2 8 | struct Tuple { 9 | int64_t columns[TUPLE_LEN]; 10 | }; 11 | 12 | std::ostream& operator<< (std::ostream& o, Tuple& t); 13 | 14 | #endif // TUPLE_HPP 15 | 16 | -------------------------------------------------------------------------------- /applications/join/convert2bin.cpp: -------------------------------------------------------------------------------- 1 | #include "relation_io.hpp" 2 | 3 | int main(int argc, char** argv) { 4 | 5 | if (argc < 5) { 6 | std::cerr << "Usage: " << argv[0] << " FILE TYPE{i,d} SEPS BURNS" << std::endl; 7 | exit(1); 8 | } 9 | 10 | if (strncmp(argv[2], "i", 1) == 0) { 11 | convert2bin( argv[1], &toInt, argv[3], atoi(argv[4]) ); 12 | } else if (strncmp(argv[2], "d", 1) == 0) { 13 | convert2bin( argv[1], &toDouble, argv[3], atoi(argv[4]) ); 14 | } else { 15 | std::cerr << "unrecognized type " << argv[2] << std::endl; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /applications/join/double.txt: -------------------------------------------------------------------------------- 1 | 0 1 2 | 1 2 3 | 2 3 4 | 5 6 5 | 6 7 6 | 7 8 7 | 101 102 8 | 101 103 9 | 101 104 10 | 101 105 11 | 101 106 12 | 101 107 13 | 101 108 14 | 101 109 15 | 101 110 16 | 101 111 17 | -------------------------------------------------------------------------------- /applications/join/extract_timestamps.sh: -------------------------------------------------------------------------------- 1 | input=$1 2 | #schema 3 | #process-id event-type timestamp 4 | echo "stream type time" >$input.trace 5 | grep timestamp $input | awk '{gsub(/ +/, " ");print}' | cut -d ' ' -f 7,8,9 >>$input.trace 6 | -------------------------------------------------------------------------------- /applications/join/hex_tri.soln.txt: -------------------------------------------------------------------------------- 1 | 1 2 3 2 | 1 3 4 3 | 1 4 5 4 | 1 6 7 5 | 1 2 7 6 | -------------------------------------------------------------------------------- /applications/join/hex_tri.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 1 3 3 | 1 4 4 | 1 5 5 | 1 6 6 | 1 7 7 | 2 1 8 | 3 1 9 | 4 1 10 | 5 1 11 | 6 1 12 | 7 1 13 | 2 3 14 | 3 4 15 | 4 5 16 | 5 6 17 | 6 7 18 | 7 2 19 | 7 6 20 | 6 5 21 | 5 4 22 | 4 3 23 | 3 2 24 | 2 7 25 | -------------------------------------------------------------------------------- /applications/join/overlapping.txt: -------------------------------------------------------------------------------- 1 | 0 1 2 | 0 2 3 | 2 3 4 | 3 4 5 | 4 5 6 | 6 7 7 | 101 102 8 | 101 103 9 | 101 104 10 | 101 105 11 | 101 106 12 | 101 107 13 | 101 108 14 | 101 109 15 | 101 110 16 | 101 111 17 | -------------------------------------------------------------------------------- /applications/join/relation.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template < typename T > 6 | struct Relation { 7 | GlobalAddress data; 8 | size_t numtuples; 9 | }; 10 | -------------------------------------------------------------------------------- /applications/join/relation_io.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | DEFINE_string(relations, ".", "path to relation files"); 4 | DEFINE_bool(bin, true, "input file is binary format"); 5 | 6 | -------------------------------------------------------------------------------- /applications/join/scripts/activenodes.sh: -------------------------------------------------------------------------------- 1 | sueue | grep bdmyers | getcolumn 9 2 | -------------------------------------------------------------------------------- /applications/join/scripts/forall.sh: -------------------------------------------------------------------------------- 1 | cmd=$1 2 | 3 | declare -a arr 4 | 5 | while read line 6 | do 7 | arr+=($line) 8 | done 9 | 10 | for h in "${arr[@]}" 11 | do 12 | ssh $h $cmd 13 | done 14 | -------------------------------------------------------------------------------- /applications/join/scripts/getcolumn.sh: -------------------------------------------------------------------------------- 1 | awk '{ gsub(/[ \t]+/, " ");print }' | cut -d ' ' -f $1 2 | -------------------------------------------------------------------------------- /applications/join/scripts/nodes_nested2names.sh: -------------------------------------------------------------------------------- 1 | scontrol show hostname $1 2 | -------------------------------------------------------------------------------- /applications/join/scripts/pidlist.sh: -------------------------------------------------------------------------------- 1 | ps aux | grep grappa | grep -v grep | getcolumn.sh 2 2 | -------------------------------------------------------------------------------- /applications/join/single.txt: -------------------------------------------------------------------------------- 1 | 0 1 2 | 0 2 3 | 2 3 4 | 3 4 5 | 1 11 6 | 6 7 7 | 101 102 8 | 101 103 9 | 101 104 10 | 101 105 11 | 101 106 12 | 101 107 13 | 101 108 14 | 101 109 15 | 101 110 16 | 101 111 17 | -------------------------------------------------------------------------------- /applications/join/small_tri.soln.txt: -------------------------------------------------------------------------------- 1 | 1 2 3 2 | 2 3 4 3 | -------------------------------------------------------------------------------- /applications/join/small_tri.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 2 3 3 | 3 1 4 | 3 4 5 | 4 2 6 | 4 5 7 | 5 1 8 | -------------------------------------------------------------------------------- /applications/join/sp2b.100mb.sh: -------------------------------------------------------------------------------- 1 | DIR=$SP2B/bin 2 | rm -f sp2bench_1m sp2bench_1m.index 3 | ln -s $DIR/sp2b.100mb.i sp2bench_1m 4 | ln -s $DIR/sp2b.100mb.index sp2bench_1m.index 5 | export NTUPLES=`wc -l $DIR/sp2b.100mb.i | cut -d ' ' -f1` 6 | -------------------------------------------------------------------------------- /applications/join/sp2b.100t.sh: -------------------------------------------------------------------------------- 1 | DIR=/sampa/home/bdmyers/escience/datalogcompiler/c_test_environment 2 | rm -f sp2bench_1m sp2bench_1m.index 3 | ln -s $DIR/sp2b.100t.i sp2bench_1m 4 | ln -s $DIR/sp2b.100t.index sp2bench_1m.index 5 | export NTUPLES=`wc -l $DIR/sp2b.100t.i | cut -d ' ' -f1` 6 | -------------------------------------------------------------------------------- /applications/join/sp2b.1gb.sh: -------------------------------------------------------------------------------- 1 | DIR=$SP2B/bin 2 | rm -f sp2bench_1m sp2bench_1m.index 3 | ln -s $DIR/sp2b.1gb.i sp2bench_1m 4 | ln -s $DIR/sp2b.1gb.index sp2bench_1m.index 5 | export NTUPLES=`wc -l $DIR/sp2b.1gb.i | cut -d ' ' -f1` 6 | -------------------------------------------------------------------------------- /applications/join/squares.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Query.hpp" 3 | #include "grappa/graph.hpp" 4 | 5 | class SquareQuery : public Query { 6 | public: 7 | virtual void preprocessing(std::vector relations); 8 | 9 | virtual void execute(std::vector relations); 10 | }; 11 | -------------------------------------------------------------------------------- /applications/join/squares_bushy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Query.hpp" 3 | #include "grappa/graph.hpp" 4 | 5 | class SquareBushyPlan : public Query { 6 | public: 7 | virtual void preprocessing(std::vector relations); 8 | 9 | virtual void execute(std::vector relations); 10 | }; 11 | -------------------------------------------------------------------------------- /applications/join/squares_partition.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Query.hpp" 3 | #include "grappa/graph.hpp" 4 | 5 | class SquarePartition4way: public Query { 6 | private: 7 | GlobalAddress> index; 8 | public: 9 | virtual void preprocessing(std::vector relations); 10 | 11 | virtual void execute(std::vector relations); 12 | }; 13 | -------------------------------------------------------------------------------- /applications/join/squares_partition_bushy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Query.hpp" 3 | #include "grappa/graph.hpp" 4 | 5 | class SquarePartitionBushy4way: public Query { 6 | private: 7 | GlobalAddress> index; 8 | public: 9 | virtual void preprocessing(std::vector relations); 10 | 11 | virtual void execute(std::vector relations); 12 | }; 13 | -------------------------------------------------------------------------------- /applications/join/stats.cpp: -------------------------------------------------------------------------------- 1 | #include "stats.h" 2 | 3 | GRAPPA_DEFINE_METRIC(SimpleMetric, query_runtime, 0); 4 | GRAPPA_DEFINE_METRIC(SimpleMetric, scan_runtime, 0); 5 | GRAPPA_DEFINE_METRIC(SimpleMetric, in_memory_runtime,0); 6 | GRAPPA_DEFINE_METRIC(SimpleMetric, init_runtime,0); 7 | GRAPPA_DEFINE_METRIC(SimpleMetric, join_coarse_result_count,0); 8 | GRAPPA_DEFINE_METRIC(SimpleMetric, emit_count,0); 9 | 10 | -------------------------------------------------------------------------------- /applications/join/stats.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | GRAPPA_DECLARE_METRIC(SimpleMetric, query_runtime); 6 | GRAPPA_DECLARE_METRIC(SimpleMetric, scan_runtime); 7 | GRAPPA_DECLARE_METRIC(SimpleMetric, in_memory_runtime); 8 | GRAPPA_DECLARE_METRIC(SimpleMetric, init_runtime); 9 | GRAPPA_DECLARE_METRIC(SimpleMetric, join_coarse_result_count); 10 | GRAPPA_DECLARE_METRIC(SimpleMetric, emit_count); 11 | -------------------------------------------------------------------------------- /applications/join/triangles.sql: -------------------------------------------------------------------------------- 1 | \timing 2 | 3 | select E1.src,E2.src,E3.src 4 | from followedby E1, followedby E2, followedby E3 5 | where E1.dest=E2.src and E2.dest=E3.src and E3.dest=E1.src -- triangle select 6 | and E1.src < E2.src and E2.src < E3.src; -- no duplicates 7 | -------------------------------------------------------------------------------- /applications/join/utility.cpp: -------------------------------------------------------------------------------- 1 | #include "utility.hpp" 2 | #include 3 | 4 | int64_t fourth_root(int64_t x) { 5 | // index pow 4 6 | std::vector powers = {0, 1, 16, 81, 256, 625, 1296, 2401}; 7 | int64_t ind = powers.size() / 2; 8 | int64_t hi = powers.size()-1; 9 | int64_t lo = 0; 10 | while(true) { 11 | if (x == powers[ind]) { 12 | return ind; 13 | } else if (x > powers[ind]) { 14 | int64_t next = (ind+hi)/2; 15 | if (next - ind == 0) { 16 | return ind; 17 | } 18 | lo = ind; 19 | ind = next; 20 | } else { 21 | int64_t next = (ind+lo)/2; 22 | hi = ind; 23 | ind = next; 24 | } 25 | } 26 | } 27 | 28 | 29 | std::function makeHash( int64_t dim ) { 30 | // identity 31 | return [dim](int64_t x) { return x % dim; }; 32 | } 33 | 34 | -------------------------------------------------------------------------------- /applications/join/utility.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | typedef std::pair pair_t; 10 | namespace std { 11 | template <> struct hash { 12 | size_t operator()(const pair_t& x) const { 13 | static int64_t p = 32416152883; // prime 14 | return p*x.first + x.second; 15 | } 16 | }; 17 | } 18 | 19 | 20 | 21 | int64_t fourth_root(int64_t x); 22 | 23 | std::function makeHash( int64_t dim ); 24 | -------------------------------------------------------------------------------- /applications/nativegraph/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(bfs) 2 | add_subdirectory(cc) 3 | add_subdirectory(sssp) 4 | -------------------------------------------------------------------------------- /applications/nativegraph/README.md: -------------------------------------------------------------------------------- 1 | Simple Graph Algorithms 2 | ----------------------- 3 | 4 | This directory contains some graph algorithms implemented directly against Grappa's Graph data structure. These can be contrasted against the implementations in `applications/graphlab`, which are implemented at a higher level using the GraphLab API emulation. 5 | 6 | Be warned, in some cases, for instance `bfs/bfs_beamer`, this "native" version is the fastest implementation, but in many cases, the GraphLab version is better optimized and more efficient, and this `simplegraph` version is more for demonstration purposes. 7 | -------------------------------------------------------------------------------- /applications/nativegraph/bfs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(SOURCES main.cpp common.hpp) 2 | 3 | add_grappa_application(bfs_queues.exe bfs_queues.cpp ${SOURCES}) 4 | add_grappa_application(bfs_spmd.exe bfs_spmd.cpp ${SOURCES}) 5 | add_grappa_application(bfs_beamer.exe bfs_beamer.cpp ${SOURCES}) 6 | -------------------------------------------------------------------------------- /applications/nativegraph/bfs/common.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../verifier.hpp" 5 | 6 | using namespace Grappa; 7 | 8 | // additional data to attach to each vertex in the graph 9 | struct BFSData { 10 | int64_t parent; 11 | int64_t level; 12 | bool seen; 13 | 14 | void init() { 15 | parent = -1; 16 | level = 0; 17 | seen = false; 18 | } 19 | }; 20 | 21 | using G = Graph; 22 | 23 | extern int64_t nedge_traversed; 24 | 25 | void bfs(GlobalAddress g, int nbfs, TupleGraph tg); 26 | 27 | template< typename V, typename E > 28 | inline int64_t choose_root(GlobalAddress> g) { 29 | int64_t root; 30 | do { 31 | root = random() % g->nv; 32 | } while (delegate::call(g->vs+root,[](typename G::Vertex& v){ return v.nadj; }) == 0); 33 | return root; 34 | } 35 | 36 | inline int64_t verify(TupleGraph tg, GlobalAddress g, int64_t root) { 37 | return VerificatorBase::verify(tg, g, root); 38 | } 39 | 40 | -------------------------------------------------------------------------------- /applications/nativegraph/cc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_grappa_application(cc_kahan.exe main.cpp cc_kahan.hpp) 3 | -------------------------------------------------------------------------------- /applications/nativegraph/sssp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_grappa_application(sssp.exe sssp.cpp sssp.hpp ../verifier.hpp) 2 | -------------------------------------------------------------------------------- /applications/pagerank/.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.igor 3 | -------------------------------------------------------------------------------- /applications/pagerank/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(SOURCES 2 | spmv_mult.cpp 3 | spmv_mult.hpp 4 | ) 5 | 6 | add_definitions(-Drestrict=__restrict__ -DGRAPH_GENERATOR_GRAPPA -D_GRAPPA) 7 | 8 | add_grappa_application(pagerank.exe ${SOURCES} pagerank.cpp) 9 | 10 | add_grappa_application(mult.exe 11 | ${SOURCES} mult_main.cpp 12 | ) 13 | target_link_libraries(mult.exe generator) 14 | -------------------------------------------------------------------------------- /applications/pagerank/README.md: -------------------------------------------------------------------------------- 1 | This directory contains three Grappa programs. 2 | * pagerank.exe: A version of Pagerank using our current graph representation 3 | * pagerank_old.exe: A version of Pagerank using our previous graph representation 4 | * mult.exe: A matrix multiply example 5 | -------------------------------------------------------------------------------- /applications/pagerank/spmv_mult.hpp: -------------------------------------------------------------------------------- 1 | // graph500/grappa/ 2 | // XXX shouldn't have to include this first: common.h and oned_csr.h have cyclic dependency 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using vindex = int; 11 | 12 | struct PagerankData { 13 | double * weights; 14 | double v[2]; 15 | }; 16 | using PagerankVertex = Grappa::Vertex; 17 | 18 | void spmv_mult(GlobalAddress> g, vindex x, vindex y); 19 | -------------------------------------------------------------------------------- /applications/sort/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(grappa) 2 | -------------------------------------------------------------------------------- /applications/sort/grappa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(SOURCES 3 | main.cpp 4 | npb_intsort.h 5 | ) 6 | 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") 8 | add_definitions(-Drestrict=__restrict__) 9 | 10 | add_grappa_application(sort.exe ${SOURCES}) 11 | -------------------------------------------------------------------------------- /applications/sort/grappa/npb_intsort.h: -------------------------------------------------------------------------------- 1 | 2 | enum npb_class { S, W, A, B, C, D, None = -1 }; 3 | static const int NKEY_LOG2[] = { 16, 20, 23, 25, 27, 29 }; 4 | static const int MAX_KEY_LOG2[] = { 11, 16, 19, 21, 23, 27 }; 5 | static const int NBUCKET_LOG2[] = { 10, 10, 10, 10, 10, 10 }; 6 | 7 | inline npb_class get_npb_class(char c) { 8 | switch (c) { 9 | case 'S': return S; 10 | case 'W': return W; 11 | case 'A': return A; 12 | case 'B': return B; 13 | case 'C': return C; 14 | case 'D': return D; 15 | default: return None; 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /applications/sort/grappa/test.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require "./sort.rb" 3 | 4 | $params = { 5 | scale: [16], 6 | log2buckets: [7], 7 | log2maxkey: [10], 8 | nnode: [12], 9 | ppn: [2], 10 | nworkers: [1024], 11 | flushticks: [2000000], 12 | pollticks: [20000], 13 | chunksize: [64], 14 | threshold: [64], 15 | io_blocks_per_node: [1], 16 | io_blocksize_mb: [512], 17 | nproc: expr('nnode*ppn'), 18 | machine: [$machinename], 19 | } 20 | $opt_force = true 21 | 22 | if __FILE__ == $PROGRAM_NAME 23 | run_experiments($cmd, $params, $dbpath, $table, &$json_plus_fields_parser) 24 | end 25 | -------------------------------------------------------------------------------- /applications/util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB UTILS 3 | "*/*.cpp" 4 | ) 5 | 6 | # make separate build targets for each utility 7 | foreach(file ${UTILS}) 8 | get_filename_component(base ${file} NAME_WE) 9 | add_grappa_exe(util-${base} ${base}.exe ${file}) 10 | set_property(TARGET ${name} PROPERTY FOLDER "Applications") 11 | endforeach() 12 | 13 | 14 | 15 | # TODO: this should really just enabled with tracing 16 | 17 | macro(add_vampir_exe target exe ) 18 | add_executable(${target} EXCLUDE_FROM_ALL ${ARGN}) 19 | set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_NAME "${exe}") 20 | target_link_libraries(${target} 21 | Grappa 22 | open-trace-format 23 | sqlite3 24 | ) 25 | endmacro(add_vampir_exe) 26 | 27 | macro(add_vampir_application name) 28 | add_vampir_exe(${name} ${name} ${ARGN}) 29 | set_property(TARGET ${name} PROPERTY FOLDER "Applications") # For organization in Xcode project 30 | endmacro(add_vampir_application) 31 | 32 | add_vampir_application(otf2sqlite.exe "otf2sqlite.cpp") 33 | -------------------------------------------------------------------------------- /applications/uts/.gitignore: -------------------------------------------------------------------------------- 1 | config.in 2 | out.txt 3 | uts-mem-shm 4 | .igor 5 | -------------------------------------------------------------------------------- /applications/uts/AUTHORS: -------------------------------------------------------------------------------- 1 | The Unbalanced Tree Search (UTS) Project Team: 2 | --------------------------------------------- 3 | 4 | University of Maryland: 5 | Bill Pugh 7 | 8 | The Ohio State University: 9 | James Dinan 12 | 13 | University of North Carolina, Chapel Hill: 14 | Stephen Olivier 18 | 19 | Supercomputing Research Center: 20 | Daniel Pryor 21 | 22 | * - indicates project PI 23 | -------------------------------------------------------------------------------- /applications/uts/Changelog: -------------------------------------------------------------------------------- 1 | 1.0.1 - Released 2/10/2010 2 | 3 | * Added uts_upc_enhanced to the distribution. 4 | 5 | 1.0 - Released 11/11/2009 6 | 7 | * Initial public release of the benchmark. 8 | -------------------------------------------------------------------------------- /applications/uts/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 See AUTHORS file for copyright holders 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /applications/uts/check_ctrk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # On Cray: 3 | #!/opt/open/open/bin/perl 4 | 5 | 6 | while (<>) { 7 | next unless (/^CTRK/); 8 | 9 | # Capture the work ID into $1 10 | /(0x[0-9A-Fa-f]+$)/; 11 | $id = $1; 12 | 13 | if (/put chunk/) { 14 | $hash{$id}++; 15 | $nreleased++; 16 | } 17 | elsif (/got chunk/) { 18 | $hash{$id}--; 19 | $nacquired++; 20 | } 21 | # elsif (!/TERMINATING/) { 22 | # print "Warning: malformed entry. $_"; 23 | # } 24 | } 25 | 26 | print "Total Put = " . $nreleased . ", Total Got = " . $nacquired . "\n"; 27 | 28 | $errors = 0; 29 | 30 | while(($key, $value) = each %hash) { 31 | ($value > 0) and print "Never got: $key ($value)\n" and $errors++; 32 | ($value < 0) and print "Never put: $key ($value)\n" and $errors++; 33 | } 34 | 35 | print "$errors errors\n"; 36 | -------------------------------------------------------------------------------- /applications/uts/configure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONFDIR="config" 4 | CONFFILE="config.in" 5 | MYNAME="configure.sh" 6 | 7 | function usage () { 8 | 9 | echo "UTS - Unbalanced Tree Search Configuration. Selects from available" 10 | echo " configurations in the '$CONFDIR' directory." 11 | echo 12 | echo " Usage: $MYNAME CONFIGURATION_NAME" 13 | echo 14 | echo " Available Configurations:" 15 | 16 | for file in ${CONFDIR}/* 17 | do 18 | [ -r $file ] && [ ! -d $file ] && echo " $(echo $file | cut -d/ -f2)" 19 | done 20 | 21 | } 22 | 23 | if [ ! -d $CONFDIR ] 24 | then 25 | echo "Fatal error: Unable to access the config file directory, '$CONFDIR'!" 26 | exit 1 27 | fi 28 | 29 | if [ $# -lt 1 ] || [ $1 = '-h' ] || [ $1 = '--help' ] 30 | then 31 | usage 32 | exit 0 33 | fi 34 | 35 | if [ -r $CONFDIR/$1 ] 36 | then 37 | ln -sf $CONFDIR/$1 $CONFFILE 38 | 39 | echo 40 | echo "Configuration changed. Please review '$CONFDIR/$1' to ensure the" 41 | echo "new settings are correct." 42 | echo 43 | else 44 | echo "Could not find configuration file: $1" 45 | exit 1 46 | fi 47 | -------------------------------------------------------------------------------- /applications/uts/dlist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ---- The Unbalanced Tree Search (UTS) Benchmark ---- 3 | * 4 | * Copyright (c) 2010 See AUTHORS file for copyright holders 5 | * 6 | * This file is part of the unbalanced tree search benchmark. This 7 | * project is licensed under the MIT Open Source license. See the LICENSE 8 | * file for copyright and licensing information. 9 | * 10 | * UTS is a collaborative project between researchers at the University of 11 | * Maryland, the University of North Carolina at Chapel Hill, and the Ohio 12 | * State University. See AUTHORS file for more information. 13 | * 14 | */ 15 | 16 | #ifndef DLIST_H 17 | #define DLIST_H 18 | 19 | typedef struct dcell *dlist; 20 | 21 | struct dcell 22 | { 23 | void *element; 24 | dlist next; 25 | dlist prev; 26 | }; 27 | 28 | extern dlist dcons(void *element, dlist prev, dlist next); 29 | extern dlist create_and_link(void *element, dlist prev, dlist next); 30 | extern void* unlink_and_free(dlist l); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /applications/uts/rng/rng.h: -------------------------------------------------------------------------------- 1 | #ifndef _RNG_H 2 | #define _RNG_H 3 | 4 | /*********************************************************** 5 | * * 6 | * splitable random number generator to use: * 7 | * (default) sha1 hash * 8 | * (UTS_ALFG) additive lagged fibonacci generator * 9 | * * 10 | ***********************************************************/ 11 | 12 | #if defined(UTS_ALFG) 13 | # include "alfg.h" 14 | # define RNG_TYPE 1 15 | #elif defined(BRG_RNG) 16 | # include "brg_sha1.h" 17 | # define RNG_TYPE 0 18 | #elif defined(DEVINE_RNG) 19 | # include "devine_sha1.h" 20 | # define RNG_TYPE 0 21 | #else 22 | # error "No random number generator selected." 23 | #endif 24 | 25 | #endif /* _RNG_H */ 26 | -------------------------------------------------------------------------------- /applications/uts/shared_dlist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ---- The Unbalanced Tree Search (UTS) Benchmark ---- 3 | * 4 | * Copyright (c) 2010 See AUTHORS file for copyright holders 5 | * 6 | * This file is part of the unbalanced tree search benchmark. This 7 | * project is licensed under the MIT Open Source license. See the LICENSE 8 | * file for copyright and licensing information. 9 | * 10 | * UTS is a collaborative project between researchers at the University of 11 | * Maryland, the University of North Carolina at Chapel Hill, and the Ohio 12 | * State University. See AUTHORS file for more information. 13 | * 14 | */ 15 | 16 | #ifndef SHARED_DLIST_H 17 | #define SHARED_DLIST_H 18 | 19 | #include 20 | 21 | typedef shared struct shr_dcell * shr_dlist; 22 | 23 | struct shr_dcell 24 | { 25 | shared void *element; 26 | shr_dlist next; 27 | shr_dlist prev; 28 | }; 29 | 30 | extern shr_dlist shr_dcons(shared void *element, shr_dlist prev, shr_dlist next); 31 | extern shr_dlist shr_create_and_link(shared void *element, shr_dlist prev, shr_dlist next); 32 | extern shared void* shr_unlink_and_free(shr_dlist l); 33 | 34 | #endif /* SHARED_DLIST_H */ 35 | -------------------------------------------------------------------------------- /bin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # file(COPY . DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 2 | file(GLOB scripts "*") 3 | foreach(file ${scripts}) 4 | get_filename_component(name ${file} NAME) 5 | file(RELATIVE_PATH relative_file ${CMAKE_CURRENT_BINARY_DIR} ${file}) 6 | execute_process(COMMAND 7 | ln -sf ${relative_file} ${CMAKE_CURRENT_BINARY_DIR}/${name} 8 | ) 9 | endforeach() 10 | 11 | install(PROGRAMS settings.sh DESTINATION "bin") 12 | -------------------------------------------------------------------------------- /bin/distcc_make: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # calls 'salloc', launches distcc on each of the nodes of the allocation, 3 | # and fires up a new bash shell with DISTCC_HOSTS set up 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | nnode=${DISTCC_NNODE-8} 7 | partition=${DISTCC_PARTITION} # use default slurm partition if none specified 8 | 9 | if [ ! -z $partition ]; then 10 | partitionarg=-p$partition 11 | fi 12 | 13 | exec salloc -N$nnode $partitionarg "$DIR/launch_distcc.sh" make "$@" 14 | 15 | # note: for Bash Completion to work with this, find the bash_completion/make and add 'distcc_make' to the list of make commands to complete for (near the end of the file) 16 | -------------------------------------------------------------------------------- /bin/distcc_ninja: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # calls 'salloc', launches distcc on each of the nodes of the allocation, 3 | # and fires up a new bash shell with DISTCC_HOSTS set up 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | nnode=${DISTCC_NNODE-8} 7 | 8 | exec salloc -N$nnode "$DIR/launch_distcc.sh" ninja -j $((nnode*4)) "$@" 9 | -------------------------------------------------------------------------------- /bin/launch_distcc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #################################################### 3 | # launch distcc on slurm allocation 4 | # assumes that distcc is already running on those nodes 5 | # usage: salloc -N4 distcc.sh make -j 6 | #################################################### 7 | nodelist=`scontrol show hostname $SLURM_JOB_NODELIST | xargs` 8 | # hosts="--randomize" 9 | # for n in $nodelist; do 10 | # hosts="$hosts $n,cpp,lzo" 11 | # done 12 | hosts="--randomize $nodelist" # non-pump mode 13 | export DISTCC_HOSTS="$hosts" 14 | export PS1="(distcc) $PS1" 15 | echo "export DISTCC_HOSTS='$hosts'" 16 | exec "$@" 17 | -------------------------------------------------------------------------------- /bin/settings.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # find Grappa installation location 4 | SCRIPT_PATH="${BASH_SOURCE[0]}"; 5 | if ([ -h "${SCRIPT_PATH}" ]) 6 | then 7 | while([ -h "${SCRIPT_PATH}" ]) 8 | do 9 | SCRIPT_PATH=`readlink "${SCRIPT_PATH}"` 10 | done 11 | fi 12 | pushd . > /dev/null 13 | cd `dirname ${SCRIPT_PATH}` > /dev/null 14 | SCRIPT_PATH=`pwd` 15 | cd .. 16 | GRAPPA_PREFIX=`pwd` 17 | popd > /dev/null 18 | 19 | # make Grappa installation location visible 20 | export GRAPPA_PREFIX 21 | 22 | # load important Grappa environment variables 23 | source $GRAPPA_PREFIX/bin/env.sh 24 | 25 | -------------------------------------------------------------------------------- /bin/srun_epilog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # for i in `ipcs -m | grep bholt | cut -d" " -f1`; do ipcrm -M $i; done 3 | ipcs -m | grep $USER | awk '{print $2}' | xargs -n1 -r ipcrm -m 4 | rm -f /dev/shm/GrappaLocaleSharedMemory 5 | -------------------------------------------------------------------------------- /doc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(tutorial) 2 | -------------------------------------------------------------------------------- /doc/testing.md: -------------------------------------------------------------------------------- 1 | Testing in Grappa 2 | =============================================================================== 3 | We use Boost::Test to test our code. 4 | 5 | The full list of unit tests is found in `system/CMakeLists.txt`. Here, a macro `add_check` is used to define a test and tell whether it is currently expected to pass or fail. 6 | 7 | Each test defined in this way creates two targets: `*.test` which builds the test, and `check-*`, which runs the test. In addition, there are aggregate targets `check-all-{pass,fail}` which build and run all the passing or failing tests respectively, and `check-all-{pass,fail}-compile-only` which, as the name implies, only compiles them. 8 | 9 | Non-exhaustive list of test targets: 10 | - `New_loop_tests.test`: build loop tests 11 | - `check-New_loop_tests`: build and run loop tests 12 | - `check-all-pass`: build and run all passing tests 13 | - `check-all-pass-compile-only`: just build all the tests expected to pass 14 | 15 | Someday we'll get this up and running with some CI server, but until then, we just try and run it whenever we make significant changes. 16 | -------------------------------------------------------------------------------- /doc/tutorial/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB TUTORIAL_SOURCES 2 | "*.cpp" 3 | ) 4 | 5 | foreach(file ${TUTORIAL_SOURCES}) 6 | get_filename_component(base ${file} NAME_WE) 7 | add_grappa_exe(tutorial-${base} ${base}.exe ${file}) 8 | set_property(TARGET ${name} PROPERTY FOLDER "Tutorial") 9 | endforeach() 10 | -------------------------------------------------------------------------------- /doc/tutorial/addressing_linear.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////// 2 | // tutorial/addressing_linear.cpp 3 | ////////////////////////////////// 4 | #include 5 | #include 6 | 7 | using namespace Grappa; 8 | 9 | int main(int argc, char *argv[]) { 10 | init(&argc, &argv); 11 | run([]{ 12 | auto array = global_alloc(48); 13 | for (auto i=0; i<48; i++) { 14 | std::cout << "[" << i << ": core " << (array+i).core() << "] "; 15 | } 16 | std::cout << "\n"; 17 | }); 18 | finalize(); 19 | } 20 | 21 | //> srun --nodes=2 --ntasks-per-node=2 -- tutorial/addressing_linear.exe 22 | -------------------------------------------------------------------------------- /doc/tutorial/addressing_symmetric.cpp: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////// 2 | // tutorial/addressing_symmetric.cpp 3 | ///////////////////////////////////// 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace Grappa; 9 | 10 | struct Data { 11 | size_t N; 12 | long *buffer; 13 | 14 | void init(size_t N) { 15 | this->N = N; 16 | this->buffer = new long[32]; 17 | } 18 | } GRAPPA_BLOCK_ALIGNED; 19 | 20 | int main(int argc, char *argv[]) { 21 | init(&argc, &argv); 22 | run([]{ 23 | // allocate a copy of Data on every core out of the global heap 24 | GlobalAddress d = symmetric_global_alloc< Data >(); 25 | 26 | on_all_cores([d]{ 27 | // use `->` overload to get pointer to local copy to call the method on 28 | d->init(1024); 29 | }); 30 | 31 | // now we have a local copy of the struct available anywhere 32 | on_all_cores([d]{ 33 | d->buffer[0] = d->N; 34 | }); 35 | }); 36 | finalize(); 37 | } 38 | 39 | -------------------------------------------------------------------------------- /doc/tutorial/delegates.cpp: -------------------------------------------------------------------------------- 1 | /////////////////////////// 2 | // tutorial/delegates.cpp 3 | /////////////////////////// 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace Grappa; 11 | 12 | int main(int argc, char *argv[]) { 13 | init(&argc, &argv); 14 | run([]{ 15 | 16 | size_t N = 50; 17 | GlobalAddress array = global_alloc(N); 18 | 19 | // simple global write 20 | for (size_t i = 0; i < N; i++) { 21 | // array[i] = i 22 | delegate::write( array+i, i ); 23 | } 24 | 25 | for (size_t i = 0; i < N; i += 10) { 26 | // simple remote read 27 | // value = array[i] 28 | long value = delegate::read( array+i ); 29 | std::cout << "[" << i << "] = " << value; 30 | 31 | // do some arbitrary computation on the core that owns `array+i` 32 | double v = delegate::call(array+i, [](long *a){ return tan(*a); }); 33 | std::cout << ", tan = " << v << std::endl; 34 | } 35 | 36 | }); 37 | finalize(); 38 | } 39 | -------------------------------------------------------------------------------- /doc/tutorial/hello_world_1.cpp: -------------------------------------------------------------------------------- 1 | /////////////////////////////// 2 | // tutorial/hello_world_1.cpp 3 | /////////////////////////////// 4 | #include 5 | #include 6 | int main(int argc, char *argv[]) { 7 | // this code is running on all cores 8 | 9 | // initialize Grappa 10 | Grappa::init(&argc, &argv); 11 | 12 | // spawn the root task 13 | Grappa::run([]{ 14 | // this code is running as a task on a single core 15 | std::cout << "Hello world from the root task!\n"; 16 | }); 17 | 18 | // shutdown Grappa 19 | Grappa::finalize(); 20 | } 21 | -------------------------------------------------------------------------------- /doc/tutorial/hello_world_2.cpp: -------------------------------------------------------------------------------- 1 | /////////////////////////////// 2 | // tutorial/hello_world_2.cpp 3 | /////////////////////////////// 4 | #include 5 | #include 6 | #include 7 | int main(int argc, char *argv[]) { 8 | 9 | Grappa::init(&argc, &argv); 10 | 11 | Grappa::run([]{ 12 | std::cout << "Hello world from the root task!\n"; 13 | 14 | // SPMD execution on all cores 15 | Grappa::on_all_cores([]{ 16 | std::cout << "Hello world from Core " << Grappa::mycore() << " of " << Grappa::cores() 17 | << " (locale " << Grappa::mylocale() << ")"<< "\n"; 18 | }); 19 | std::cout << "Exiting root task.\n"; 20 | }); 21 | 22 | Grappa::finalize(); 23 | } 24 | -------------------------------------------------------------------------------- /scratch/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # make separate build targets for each cpp file in scratch/ (must reconfigure after adding file) 2 | # ex: 3 | # > touch scratch/test.cpp 4 | # > make rebuild_cache 5 | # > make scratch-test 6 | # > grappa_srun -- scratch/test.exe 7 | 8 | file(GLOB SCRATCHES 9 | "*.cpp" 10 | ) 11 | 12 | foreach(file ${SCRATCHES}) 13 | get_filename_component(base ${file} NAME_WE) 14 | add_grappa_exe(scratch-${base} ${base}.exe ${file}) 15 | set_property(TARGET ${name} PROPERTY FOLDER "Scratch") 16 | endforeach() 17 | -------------------------------------------------------------------------------- /system/Grappa.md: -------------------------------------------------------------------------------- 1 | Grappa: Developer Documentation {#mainpage} 2 | =========================== 3 | These pages are the API documentation for the Grappa runtime system. For beginners, we recommend first reading through the tutorial on Github, as it will explain the programming model and main ideas. For other information about the project, including technical papers about the techniques, we refer readers to the project website: [grappa.io](http://grappa.io). 4 | 5 | Grappa is a runtime system for scaling irregular applications on commodity clusters. It's a PGAS library and runtime system that allows you to write global-view C++11 code that runs on distributed-memory computers. 6 | 7 | Grappa is a research project and is still young! Please expect things to break. Please do not expect amazing performance yet. Please ask for help if you run into problems. We're excited for you to use the software and to help make Grappa a great tool for the irregular applications community! To find answers to questions or submit new ones, please use [Github Issues](https://github.com/uwsampa/grappa/issues). 8 | -------------------------------------------------------------------------------- /system/NTBuffer.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NTBuffer.hpp" 3 | 4 | namespace Grappa { 5 | namespace impl { 6 | 7 | int NTBuffer::initial_offset = 0; 8 | 9 | } // namespace impl 10 | } // namespace Grappa 11 | -------------------------------------------------------------------------------- /system/NTMessage.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NTMessage.hpp" 3 | 4 | #include 5 | 6 | namespace Grappa { 7 | namespace impl { 8 | 9 | std::ostream& operator<<( std::ostream& o, const NTMessageBase& m ) { 10 | uint64_t fp = m.fp_; 11 | return o << ""; 12 | } 13 | 14 | char * deaggregate_nt_buffer( char * buf, size_t size ) { 15 | const char * end = buf + size; 16 | while( buf < end ) { 17 | #ifdef USE_NT_OPS 18 | _mm_prefetch( buf, _MM_HINT_NTA ); 19 | _mm_prefetch( buf+64, _MM_HINT_NTA ); 20 | #endif 21 | char * next = buf + 8; 22 | if( 0 != *(reinterpret_cast(buf)) ) { 23 | auto mb = reinterpret_cast(buf); 24 | uint64_t fp_int = mb->fp_; 25 | auto fp = reinterpret_cast(fp_int); 26 | DVLOG(5) << "Deserializing with " << (void*) fp << "/" << *mb << " at " << (void*) buf; 27 | next = (*fp)(buf); 28 | } else { 29 | DVLOG(5) << "Skipping a word at " << (void*) buf; 30 | } 31 | buf = next; 32 | } 33 | return buf; 34 | } 35 | 36 | } // namespace impl 37 | } // namespace Grappa 38 | 39 | -------------------------------------------------------------------------------- /system/doxygen_footer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /system/runlatencyswitch.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require '../experiment_utils' 3 | 4 | 5 | db = "context_switch.db" 6 | table = :context_switch_latency 7 | 8 | cmd = "make mpi_test TARGET=ContextSwitchLatency_tests.test \ 9 | NNODE=%{nnode} \ 10 | PPN=%{ppn} \ 11 | VERBOSE_TESTS=1 \ 12 | SRUN_FLAGS=--time=5 \ 13 | GARGS=' \ 14 | --lines=%{touched_cachelines}' 2>&1 |tee out.txt" 15 | 16 | 17 | params = { 18 | trial: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], 19 | nnode: [1], 20 | ppn: [1], 21 | #total_iterations: [51200000], 22 | machine: ['cluster'], 23 | touched_cachelines: [1,4,16,32,128,512,1024,8192,16000,50000,150000,500000,1000000,1500000,4000000], 24 | problem: ['switch_latency'], 25 | } 26 | 27 | 28 | parser = lambda{ |cmdout| 29 | records = {} 30 | 31 | # parse experiment specific results 32 | dict = /time = (?\d+\.\d+e-\d+)/.match(cmdout).dictionize 33 | 34 | if dict.empty? then 35 | raise "Output string does not match" 36 | end 37 | 38 | records.merge!(dict) 39 | 40 | records 41 | } 42 | 43 | run_experiments(cmd, params, db, table, &parser) 44 | -------------------------------------------------------------------------------- /system/tests/igor_datastructs.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'igor' 3 | 4 | # inherit parser, sbatch_flags 5 | require_relative '../util/igor_common.rb' 6 | 7 | Igor do 8 | database '~/exp/pgas.sqlite', :queue 9 | 10 | @params.merge! GFLAGS 11 | 12 | @sbatch_flags.delete_if{|e| e =~ /--time/} << "--time=1:00:00" 13 | 14 | @test_cmd = -> test, extras { %Q[ ../bin/grappa_srun --test=#{test} --no-verbose -- #{GFLAGS.expand} #{extras}] } 15 | command @test_cmd['GlobalVector_tests',''] 16 | 17 | params { 18 | nnode 2 19 | ppn 1 20 | } 21 | 22 | interact # enter interactive mode 23 | end 24 | -------------------------------------------------------------------------------- /system/tests/igor_hashmap.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require_relative 'igor_hashset' 3 | 4 | Igor do 5 | @dbtable = :hashmap 6 | 7 | command @test_cmd['GlobalHash_tests', '--map_perf'] 8 | 9 | interact 10 | end 11 | -------------------------------------------------------------------------------- /system/tests/igor_hashset.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require_relative 'igor_datastructs' 3 | 4 | Igor do 5 | @dbtable = :hashset 6 | 7 | include Isolatable 8 | isolate 'GlobalHash_tests.test' 9 | 10 | GFLAGS.merge!({ 11 | nelems: [1024], 12 | ntrials: [1], 13 | max_key: [1024], 14 | global_hash_size: [1024], 15 | fraction_lookups: [0.5], 16 | insert_async: [0], 17 | }) 18 | @params.merge!(GFLAGS) 19 | command @test_cmd['GlobalHash_tests', '--set_perf'] 20 | 21 | params { 22 | version 'fc_looks_fixed' 23 | log_nelems 10; nelems expr('2**log_nelems') 24 | log_max_key 10; max_key expr('2**log_max_key') 25 | global_hash_size expr('2**log_max_key') 26 | ntrials 1 27 | } 28 | 29 | interact 30 | end 31 | -------------------------------------------------------------------------------- /system/tests/igor_queue.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require_relative 'igor_datastructs' 3 | 4 | Igor do 5 | include Isolatable 6 | 7 | @dbtable = :queue 8 | 9 | isolate "GlobalVector_tests.test" 10 | 11 | GFLAGS.merge!({ 12 | ntrials: [1], 13 | nelems: [1024], 14 | vector_size: [1024], 15 | fraction_push: [0.5], 16 | flat_combining_local_only: [0], 17 | }) 18 | @params.merge! GFLAGS 19 | command @test_cmd['GlobalVector_tests', '--queue_perf'] 20 | 21 | params { 22 | version 'fixed_random' 23 | log_nelems 10 24 | nelems expr('2**log_nelems') 25 | vector_size expr('(2**log_nelems)*2') 26 | ntrials 1 27 | } 28 | 29 | interact 30 | end 31 | -------------------------------------------------------------------------------- /system/tests/igor_stack.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require_relative 'igor_queue' 3 | 4 | Igor do 5 | include Isolatable 6 | 7 | @dbtable = :stack 8 | 9 | isolate "GlobalVector_tests.test" 10 | 11 | command @test_cmd['GlobalVector_tests', '--stack_perf'] 12 | 13 | params { 14 | version 'matching_better' 15 | } 16 | 17 | interact 18 | end 19 | -------------------------------------------------------------------------------- /system/tests/igor_tests.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'igor' 3 | 4 | # inherit parser, sbatch_flags 5 | require_relative '../util/igor_common.rb' 6 | 7 | Igor do 8 | database '~/exp/test.sqlite', :vector 9 | 10 | # isolate everything needed for the executable so we can sbcast them for local execution 11 | params.merge!(GFLAGS) 12 | 13 | $cmd = -> { %Q[ ../bin/grappa_srun --no-verbose --test=%{name} -- #{GFLAGS.expand}] } 14 | command $cmd[] 15 | 16 | sbatch_flags.delete_if{|e| e =~ /--time/} << "--time=15:00" 17 | 18 | params { 19 | name 'GlobalVector_tests' 20 | nnode 2 21 | ppn 1 22 | scale 10 23 | nelems expr('2**scale') 24 | } 25 | 26 | interact # enter interactive mode 27 | end 28 | -------------------------------------------------------------------------------- /system/utils/README: -------------------------------------------------------------------------------- 1 | Various misc utility functions, 2 | TODO: move general ones out of system/ 3 | -------------------------------------------------------------------------------- /third-party/bashflags/test/bool.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="${BASH_SOURCE%/*}" 3 | source $DIR/../flags.bash 4 | 5 | define_bool_flag 'foo' 'help text' 'f' 6 | define_bool_flag 'bar' 'useless flag' 'b' 7 | 8 | parse_flags $@ 9 | 10 | if flags_true $FLAGS_foo && [ $FLAGS_foo = true ] && $FLAGS_foo; then 11 | echo "$FLAGS_foo" 12 | else 13 | echo "$FLAGS_foo" 14 | fi 15 | -------------------------------------------------------------------------------- /third-party/bashflags/test/echo.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="${BASH_SOURCE%/*}" 3 | source $DIR/../flags.bash 4 | 5 | define_flag 'text' 'default' 'sample description' 't' 6 | 7 | parse_flags $@ 8 | 9 | echo "text=$FLAGS_text,extra=$FLAGS_extra" 10 | -------------------------------------------------------------------------------- /third-party/downloads/README.md: -------------------------------------------------------------------------------- 1 | 2 | Satisfying Grappa's third-party dependences without web access 3 | -------------------------------------------------------------- 4 | 5 | If you want to build Grappa on a machine without access to the web, and that machine doesn't already have all the third-party libraries installed that Grappa needs, you'll have to provide the source archives for those dependences yourself. 6 | 7 | To do so, download and untar the following file in ```third-party/downloads```. Then run ```configure```, including the ```--no-downloads``` flag. 8 | 9 | [http://grappa.cs.washington.edu/files/grappa-third-party-downloads.tar](http://grappa.cs.washington.edu/files/grappa-third-party-downloads.tar) -------------------------------------------------------------------------------- /third-party/google-glog/AUTHORS: -------------------------------------------------------------------------------- 1 | opensource@google.com 2 | 3 | -------------------------------------------------------------------------------- /third-party/google-glog/NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampa/grappa/69f2f3674d6f8e512e0bf55264bb75b972fd82de/third-party/google-glog/NEWS -------------------------------------------------------------------------------- /third-party/google-glog/README: -------------------------------------------------------------------------------- 1 | This repository contains a C++ implementation of the Google logging 2 | module. Documentation for the implementation is in doc/. 3 | 4 | See INSTALL for (generic) installation instructions for C++: basically 5 | ./configure && make && make install 6 | -------------------------------------------------------------------------------- /third-party/google-glog/README.windows: -------------------------------------------------------------------------------- 1 | This project has begun being ported to Windows. A working solution 2 | file exists in this directory: 3 | google-glog.sln 4 | 5 | You can load this solution file into VC++ 9.0 (Visual Studio 6 | 2008). You may also be able to use this solution file with older 7 | Visual Studios by converting the solution file. 8 | 9 | Note that stack tracing and some unittests are not ported 10 | yet. 11 | 12 | You can also link glog code in statically -- see the example project 13 | libglog_static and logging_unittest_static, which does this. For this 14 | to work, you'll need to add "/D GOOGLE_GLOG_DLL_DECL=" to the compile 15 | line of every glog's .cc file. 16 | 17 | I have little experience with Windows programming, so there may be 18 | better ways to set this up than I've done! If you run across any 19 | problems, please post to the google-glog Google Group, or report 20 | them on the google-glog Google Code site: 21 | http://groups.google.com/group/google-glog 22 | http://code.google.com/p/google-glog/issues/list 23 | 24 | -- Shinichiro Hamaji 25 | 26 | Last modified: 23 January 2009 27 | -------------------------------------------------------------------------------- /third-party/google-glog/libglog.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: libglog 7 | Description: Google Log (glog) C++ logging framework 8 | Version: @VERSION@ 9 | Libs: -L${libdir} -lglog 10 | Cflags: -I${includedir} 11 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/ac_have_attribute.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_C___ATTRIBUTE__], [ 2 | AC_MSG_CHECKING(for __attribute__) 3 | AC_CACHE_VAL(ac_cv___attribute__, [ 4 | AC_TRY_COMPILE( 5 | [#include 6 | static void foo(void) __attribute__ ((unused)); 7 | void foo(void) { exit(1); }], 8 | [], 9 | ac_cv___attribute__=yes, 10 | ac_cv___attribute__=no 11 | )]) 12 | if test "$ac_cv___attribute__" = "yes"; then 13 | AC_DEFINE(HAVE___ATTRIBUTE__, 1, [define if your compiler has __attribute__]) 14 | fi 15 | AC_MSG_RESULT($ac_cv___attribute__) 16 | ]) 17 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/ac_have_builtin_expect.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_C___BUILTIN_EXPECT], [ 2 | AC_MSG_CHECKING(for __builtin_expect) 3 | AC_CACHE_VAL(ac_cv___builtin_expect, [ 4 | AC_TRY_COMPILE( 5 | [int foo(void) { if (__builtin_expect(0, 0)) return 1; return 0; }], 6 | [], 7 | ac_cv___builtin_expect=yes, 8 | ac_cv___builtin_expect=no 9 | )]) 10 | if test "$ac_cv___builtin_expect" = "yes"; then 11 | AC_DEFINE(HAVE___BUILTIN_EXPECT, 1, [define if your compiler has __builtin_expect]) 12 | fi 13 | AC_MSG_RESULT($ac_cv___builtin_expect) 14 | ]) 15 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/ac_have_sync_val_compare_and_swap.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_C___SYNC_VAL_COMPARE_AND_SWAP], [ 2 | AC_MSG_CHECKING(for __sync_val_compare_and_swap) 3 | AC_CACHE_VAL(ac_cv___sync_val_compare_and_swap, [ 4 | AC_TRY_LINK( 5 | [], 6 | [int a; if (__sync_val_compare_and_swap(&a, 0, 1)) return 1; return 0;], 7 | ac_cv___sync_val_compare_and_swap=yes, 8 | ac_cv___sync_val_compare_and_swap=no 9 | )]) 10 | if test "$ac_cv___sync_val_compare_and_swap" = "yes"; then 11 | AC_DEFINE(HAVE___SYNC_VAL_COMPARE_AND_SWAP, 1, [define if your compiler has __sync_val_compare_and_swap]) 12 | fi 13 | AC_MSG_RESULT($ac_cv___sync_val_compare_and_swap) 14 | ]) 15 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # Generated from ltversion.in. 11 | 12 | # serial 3017 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.2.6b]) 16 | m4_define([LT_PACKAGE_REVISION], [1.3017]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.2.6b' 20 | macro_revision='1.3017' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/namespaces.m4: -------------------------------------------------------------------------------- 1 | # Checks whether the compiler implements namespaces 2 | AC_DEFUN([AC_CXX_NAMESPACES], 3 | [AC_CACHE_CHECK(whether the compiler implements namespaces, 4 | ac_cv_cxx_namespaces, 5 | [AC_LANG_SAVE 6 | AC_LANG_CPLUSPLUS 7 | AC_TRY_COMPILE([namespace Outer { 8 | namespace Inner { int i = 0; }}], 9 | [using namespace Outer::Inner; return i;], 10 | ac_cv_cxx_namespaces=yes, 11 | ac_cv_cxx_namespaces=no) 12 | AC_LANG_RESTORE]) 13 | if test "$ac_cv_cxx_namespaces" = yes; then 14 | AC_DEFINE(HAVE_NAMESPACES, 1, [define if the compiler implements namespaces]) 15 | fi]) 16 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/stl_namespace.m4: -------------------------------------------------------------------------------- 1 | # We check what namespace stl code like vector expects to be executed in 2 | 3 | AC_DEFUN([AC_CXX_STL_NAMESPACE], 4 | [AC_CACHE_CHECK( 5 | what namespace STL code is in, 6 | ac_cv_cxx_stl_namespace, 7 | [AC_REQUIRE([AC_CXX_NAMESPACES]) 8 | AC_LANG_SAVE 9 | AC_LANG_CPLUSPLUS 10 | AC_TRY_COMPILE([#include ], 11 | [vector t; return 0;], 12 | ac_cv_cxx_stl_namespace=none) 13 | AC_TRY_COMPILE([#include ], 14 | [std::vector t; return 0;], 15 | ac_cv_cxx_stl_namespace=std) 16 | AC_LANG_RESTORE]) 17 | if test "$ac_cv_cxx_stl_namespace" = none; then 18 | AC_DEFINE(STL_NAMESPACE,, 19 | [the namespace where STL code like vector<> is defined]) 20 | fi 21 | if test "$ac_cv_cxx_stl_namespace" = std; then 22 | AC_DEFINE(STL_NAMESPACE,std, 23 | [the namespace where STL code like vector<> is defined]) 24 | fi 25 | ]) 26 | -------------------------------------------------------------------------------- /third-party/google-glog/m4/using_operator.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AC_CXX_USING_OPERATOR], 2 | [AC_CACHE_CHECK( 3 | whether compiler supports using ::operator<<, 4 | ac_cv_cxx_using_operator, 5 | [AC_LANG_SAVE 6 | AC_LANG_CPLUSPLUS 7 | AC_TRY_COMPILE([#include 8 | std::ostream& operator<<(std::ostream&, struct s);], 9 | [using ::operator<<; return 0;], 10 | ac_cv_cxx_using_operator=1, 11 | ac_cv_cxx_using_operator=0) 12 | AC_LANG_RESTORE]) 13 | if test "$ac_cv_cxx_using_operator" = 1; then 14 | AC_DEFINE(HAVE_USING_OPERATOR, 1, [define if the compiler supports using expression for operator]) 15 | fi]) 16 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/README: -------------------------------------------------------------------------------- 1 | The list of files here isn't complete. For a step-by-step guide on 2 | how to set this package up correctly, check out 3 | http://www.debian.org/doc/maint-guide/ 4 | 5 | Most of the files that are in this directory are boilerplate. 6 | However, you may need to change the list of binary-arch dependencies 7 | in 'rules'. 8 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/compat: -------------------------------------------------------------------------------- 1 | 4 2 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/control: -------------------------------------------------------------------------------- 1 | Source: google-glog 2 | Priority: optional 3 | Maintainer: Google Inc. 4 | Build-Depends: debhelper (>= 4.0.0), binutils 5 | Standards-Version: 3.6.1 6 | 7 | Package: libgoogle-glog-dev 8 | Section: libdevel 9 | Architecture: any 10 | Depends: libgoogle-glog0 (= ${Source-Version}) 11 | Description: a library that implements application-level logging. 12 | This library provides logging APIs based on C++-style streams and 13 | various helper macros. The devel package contains static and debug 14 | libraries and header files for developing applications that use the 15 | google-glog package. 16 | 17 | Package: libgoogle-glog0 18 | Section: libs 19 | Architecture: any 20 | Depends: ${shlibs:Depends} 21 | Description: a library that implements application-level logging. 22 | This library provides logging APIs based on C++-style streams and 23 | various helper macros. 24 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/docs: -------------------------------------------------------------------------------- 1 | AUTHORS 2 | COPYING 3 | ChangeLog 4 | INSTALL 5 | NEWS 6 | README 7 | doc/designstyle.css 8 | doc/glog.html 9 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/libgoogle-glog-dev.dirs: -------------------------------------------------------------------------------- 1 | usr/lib 2 | usr/lib/pkgconfig 3 | usr/include 4 | usr/include/glog 5 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/libgoogle-glog-dev.install: -------------------------------------------------------------------------------- 1 | usr/include/glog/* 2 | usr/lib/lib*.so 3 | usr/lib/lib*.a 4 | usr/lib/*.la 5 | usr/lib/pkgconfig/* 6 | debian/tmp/usr/include/glog/* 7 | debian/tmp/usr/lib/lib*.so 8 | debian/tmp/usr/lib/lib*.a 9 | debian/tmp/usr/lib/*.la 10 | debian/tmp/usr/lib/pkgconfig/* 11 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/libgoogle-glog0.dirs: -------------------------------------------------------------------------------- 1 | usr/lib 2 | -------------------------------------------------------------------------------- /third-party/google-glog/packages/deb/libgoogle-glog0.install: -------------------------------------------------------------------------------- 1 | usr/lib/lib*.so.* 2 | debian/tmp/usr/lib/lib*.so.* 3 | -------------------------------------------------------------------------------- /third-party/graph500-generator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(GENERATOR_SOURCES 3 | graph_generator.h 4 | graph_generator.c 5 | make_graph.h 6 | make_graph.c 7 | splittable_mrg.h 8 | splittable_mrg.c 9 | utils.h 10 | utils.c 11 | user_settings.h 12 | mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h 13 | ) 14 | 15 | add_library(graph500-generator STATIC ${GENERATOR_SOURCES}) 16 | set_target_properties(graph500-generator PROPERTIES 17 | COMPILE_FLAGS "-Drestrict=__restrict__ -DGRAPH_GENERATOR_SEQ -ffast-math ${STATIC_FLAGS}" 18 | FOLDER "Third Party" 19 | ) 20 | 21 | install(TARGETS graph500-generator DESTINATION "lib") 22 | -------------------------------------------------------------------------------- /third-party/graph500-generator/Makefile.mpi: -------------------------------------------------------------------------------- 1 | CC = mpicc 2 | CFLAGS = -std=c99 -O3 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -DNDEBUG # -g -pg 3 | # CFLAGS = -std=c99 -DGRAPH_GENERATOR_MPI -DGRAPHGEN_DISTRIBUTED_MEMORY -g 4 | LDFLAGS = -O3 5 | # LDFLAGS = -g 6 | MPICC = mpicc 7 | 8 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 9 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 10 | 11 | all: generator_test_mpi 12 | 13 | generator_test_mpi: generator_test_mpi.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 14 | $(MPICC) $(CFLAGS) $(LDFLAGS) -o generator_test_mpi generator_test_mpi.c $(GENERATOR_SOURCES) -lm 15 | 16 | clean: 17 | -rm -f generator_test_mpi 18 | -------------------------------------------------------------------------------- /third-party/graph500-generator/Makefile.omp: -------------------------------------------------------------------------------- 1 | CC = gcc -fopenmp 2 | CFLAGS = -std=c99 -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_OMP # -g -pg 3 | LDFLAGS = -O3 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_omp 9 | 10 | generator_test_omp: generator_test_omp.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_omp generator_test_omp.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_omp 15 | -------------------------------------------------------------------------------- /third-party/graph500-generator/Makefile.seq: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -g -Wall -Drestrict=__restrict__ -O3 -DNDEBUG -ffast-math -DGRAPH_GENERATOR_SEQ # -g -pg 3 | # CFLAGS = -g -Wall -Drestrict=__restrict__ 4 | LDFLAGS = -g # -g -pg 5 | 6 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 7 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 8 | 9 | all: generator_test_seq 10 | 11 | generator_test_seq: generator_test_seq.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 12 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_seq generator_test_seq.c $(GENERATOR_SOURCES) -lm 13 | 14 | clean: 15 | -rm -f generator_test_seq 16 | -------------------------------------------------------------------------------- /third-party/graph500-generator/Makefile.xmt: -------------------------------------------------------------------------------- 1 | CC = cc 2 | CFLAGS = -DNDEBUG 3 | LDFLAGS = $(CFLAGS) # -g -pg 4 | 5 | GENERATOR_SOURCES = graph_generator.c make_graph.c splittable_mrg.c utils.c 6 | GENERATOR_HEADERS = graph_generator.h make_graph.h mod_arith_32bit.h mod_arith_64bit.h mod_arith.h mod_arith_xmt.h splittable_mrg.h utils.h user_settings.h mrg_transitions.c 7 | 8 | all: generator_test_xmt 9 | 10 | generator_test_xmt: generator_test_xmt.c $(GENERATOR_SOURCES) $(GENERATOR_HEADERS) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o generator_test_xmt generator_test_xmt.c $(GENERATOR_SOURCES) -lm 12 | 13 | clean: 14 | -rm -f generator_test_xmt 15 | -------------------------------------------------------------------------------- /third-party/vampirtrace.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'fileutils'; include FileUtils 3 | require 'optparse'; require 'ostruct' 4 | 5 | def `(cmd) 6 | system cmd 7 | if not $?.success? 8 | warn "error! debugging..." 9 | require 'pry'; binding.pry 10 | end 11 | end 12 | 13 | opt = OpenStruct.new 14 | opt.prefix = '/opt/vampir' 15 | 16 | OptionParser.new {|p| 17 | p.on('--prefix=path'){|p| opt.prefix = p } 18 | }.parse! 19 | 20 | `wget http://sampa.cs.washington.edu/grappa/VampirTrace-5.14.4.tar.gz` 21 | `tar xzf VampirTrace-5.14.4.tar.gz` 22 | 23 | cd ("VampirTrace-5.14.4") do 24 | `./configure --prefix=#{opt.prefix}` 25 | `make -j4` 26 | `make install` 27 | end 28 | 29 | rmdir "VampirTrace-5.14.4" 30 | rm "VampirTrace-5.14.4.tar.gz" 31 | -------------------------------------------------------------------------------- /util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # copy everything into build dir 2 | # file(COPY . DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 3 | file(GLOB scripts "*") 4 | foreach(file ${scripts}) 5 | get_filename_component(name ${file} NAME) 6 | file(RELATIVE_PATH relative_file ${CMAKE_CURRENT_BINARY_DIR} ${file}) 7 | execute_process(COMMAND 8 | ln -sf ${relative_file} ${CMAKE_CURRENT_BINARY_DIR}/${name} 9 | ) 10 | endforeach() 11 | 12 | 13 | # 14 | # installation 15 | # 16 | 17 | install(PROGRAMS env.sh DESTINATION "bin") 18 | -------------------------------------------------------------------------------- /util/common.sh: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | # Common BASH helpers, including a mini flag-parsing library. 3 | ##################################################################### 4 | 5 | function has_srun { 6 | type srun >/dev/null 2>&1 7 | } 8 | 9 | -------------------------------------------------------------------------------- /util/env.sh: -------------------------------------------------------------------------------- 1 | ## set up Google logging defaults 2 | export GLOG_logtostderr="1" 3 | export GLOG_v="1" 4 | 5 | ## set Google profiler sample rate 6 | export CPUPROFILE_FREQUENCY="50" 7 | 8 | ## set VampirTrace options 9 | #export VT_VERBOSE="10" 10 | export VT_MAX_FLUSHES="0" 11 | export VT_PFORM_GDIR="." 12 | export VT_PFORM_LDIR="/scratch" 13 | export VT_FILE_UNIQUE="yes" 14 | export VT_MPITRACE="no" 15 | export VT_UNIFY="no" 16 | 17 | ## set MVAPICH2 options to avoid keeping around malloced memory 18 | ## (and some performance tweaks which may be irrelevant) 19 | export MV2_USE_LAZY_MEM_UNREGISTER="0" 20 | export MV2_HOMOGENEOUS_CLUSTER="1" 21 | 22 | export MV2_USE_RDMA_FAST_PATH="0" 23 | 24 | export MV2_SRQ_MAX_SIZE="8192" 25 | #export MV2_USE_XRC="1" # doesn't seem to work with 1.9b on pal 26 | 27 | #export MV2_USE_MCAST="1" # doesn't always work on pal 28 | 29 | ## set MVAPICH2 options to avoid keeping around malloced memory 30 | export OMPI_MCA_mpi_leave_pinned="0" 31 | export OMPI_MCA_mpi_yield_when_idle="0" 32 | 33 | # in case $USER isn't set 34 | USER=${USER-$(whoami)} 35 | -------------------------------------------------------------------------------- /util/histogram.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'sequel' 3 | 4 | dbpath = ARGV[0] 5 | user_glob = ARGV[1] 6 | 7 | puts "database: #{dbpath}" 8 | puts "user_glob: #{user_glob}" 9 | 10 | db = Sequel.sqlite(dbpath) 11 | table = :histograms 12 | histable = db[table] 13 | 14 | db.create_table?(table){ 15 | primary_key :id 16 | Integer :jobid 17 | Integer :core 18 | String :stat 19 | Integer :value 20 | index :jobid 21 | index :stat 22 | } 23 | 24 | Dir.glob(user_glob).each do |f| 25 | m = f.match(/histogram\.(?\d+)\/(?[\w_]+)\.(?\d+)\.out/) 26 | r = {jobid:m[:jobid].to_i,core:m[:core].to_i,stat:m[:stat],value:0} 27 | puts "#{f} -- #{r}" 28 | data = [] 29 | File.open(f,"r") do |f| 30 | while b = f.read(8) do 31 | v = b.unpack("q")[0] 32 | data << r.merge({value:v}) 33 | end 34 | end 35 | histable.multi_insert(data) 36 | end 37 | --------------------------------------------------------------------------------