├── .gitignore ├── DGEMM ├── onecpu.c └── tstdgemm.c ├── FFT ├── bcnrand.c ├── fft235.c ├── hpccfft.h ├── mpifft.c ├── onecpu.c ├── pzfft1d.c ├── tstfft.c ├── wrapfftw.c ├── wrapfftw.h ├── wrapmpifftw.c ├── wrapmpifftw.h └── zfft1d.c ├── Makefile ├── PTRANS ├── cblacslt.c ├── cblacslt.h ├── mem.c ├── pdmatcmp.c ├── pdmatgen.c ├── pdtrans.c ├── pdtransdriver.c ├── pmatgeninc.c └── sclapack.c ├── README.tex ├── README.xml ├── RandomAccess ├── MPIRandomAccess.c ├── MPIRandomAccessLCG.c ├── MPIRandomAccessLCG_opt.c ├── MPIRandomAccessLCG_vanilla.c ├── MPIRandomAccess_opt.c ├── MPIRandomAccess_vanilla.c ├── RandomAccess.h ├── buckets.c ├── buckets.h ├── core_single_cpu.c ├── core_single_cpu_lcg.c ├── heap.c ├── heap.h ├── pool.c ├── pool.h ├── single_cpu.c ├── single_cpu_lcg.c ├── star_single_cpu.c ├── star_single_cpu_lcg.c ├── time_bound.c ├── time_bound.h ├── time_bound_lcg.c ├── utility.c ├── verification.c └── verification_lcg.c ├── STREAM ├── onecpu.c ├── split_stream_funcs.py ├── stream.c └── stream_mpi.c ├── TEST ├── _hpccinf.txt ├── doc ├── class2specs.tex ├── hpccimpg.tex └── hpccusrg.tex ├── hpl ├── BUGS ├── COPYRIGHT ├── HISTORY ├── HPL.build.log.220120040613 ├── INSTALL ├── Make.MacOSX ├── Make.UNKNOWN ├── Make.top ├── Makefile ├── README ├── TODO ├── TUNING ├── include │ ├── hpccmema.h │ ├── hpl.h │ ├── hpl_auxil.h │ ├── hpl_blas.h │ ├── hpl_comm.h │ ├── hpl_gesv.h │ ├── hpl_grid.h │ ├── hpl_matgen.h │ ├── hpl_misc.h │ ├── hpl_panel.h │ ├── hpl_pauxil.h │ ├── hpl_pfact.h │ ├── hpl_pgesv.h │ ├── hpl_pmatgen.h │ ├── hpl_pmisc.h │ ├── hpl_ptest.h │ ├── hpl_ptimer.h │ ├── hpl_test.h │ ├── hpl_timer.h │ └── hpl_units.h ├── lib │ └── arch │ │ └── build │ │ └── Makefile.hpcc ├── makes │ ├── Make.auxil │ ├── Make.blas │ ├── Make.comm │ ├── Make.gesv │ ├── Make.grid │ ├── Make.matgen │ ├── Make.panel │ ├── Make.pauxil │ ├── Make.pfact │ ├── Make.pgesv │ ├── Make.pmatgen │ ├── Make.ptest │ ├── Make.ptimer │ ├── Make.test │ ├── Make.timer │ └── Make.units ├── man │ └── man3 │ │ ├── HPL_abort.3 │ │ ├── HPL_all_reduce.3 │ │ ├── HPL_barrier.3 │ │ ├── HPL_bcast.3 │ │ ├── HPL_binit.3 │ │ ├── HPL_broadcast.3 │ │ ├── HPL_bwait.3 │ │ ├── HPL_copyL.3 │ │ ├── HPL_daxpy.3 │ │ ├── HPL_dcopy.3 │ │ ├── HPL_dgemm.3 │ │ ├── HPL_dgemv.3 │ │ ├── HPL_dger.3 │ │ ├── HPL_dlacpy.3 │ │ ├── HPL_dlamch.3 │ │ ├── HPL_dlange.3 │ │ ├── HPL_dlaprnt.3 │ │ ├── HPL_dlaswp00N.3 │ │ ├── HPL_dlaswp01N.3 │ │ ├── HPL_dlaswp01T.3 │ │ ├── HPL_dlaswp02N.3 │ │ ├── HPL_dlaswp03N.3 │ │ ├── HPL_dlaswp03T.3 │ │ ├── HPL_dlaswp04N.3 │ │ ├── HPL_dlaswp04T.3 │ │ ├── HPL_dlaswp05N.3 │ │ ├── HPL_dlaswp05T.3 │ │ ├── HPL_dlaswp06N.3 │ │ ├── HPL_dlaswp06T.3 │ │ ├── HPL_dlaswp10N.3 │ │ ├── HPL_dlatcpy.3 │ │ ├── HPL_dlocmax.3 │ │ ├── HPL_dlocswpN.3 │ │ ├── HPL_dlocswpT.3 │ │ ├── HPL_dmatgen.3 │ │ ├── HPL_dscal.3 │ │ ├── HPL_dswap.3 │ │ ├── HPL_dtrsm.3 │ │ ├── HPL_dtrsv.3 │ │ ├── HPL_equil.3 │ │ ├── HPL_fprintf.3 │ │ ├── HPL_grid_exit.3 │ │ ├── HPL_grid_info.3 │ │ ├── HPL_grid_init.3 │ │ ├── HPL_idamax.3 │ │ ├── HPL_indxg2l.3 │ │ ├── HPL_indxg2lp.3 │ │ ├── HPL_indxg2p.3 │ │ ├── HPL_indxl2g.3 │ │ ├── HPL_infog2l.3 │ │ ├── HPL_jumpit.3 │ │ ├── HPL_ladd.3 │ │ ├── HPL_lmul.3 │ │ ├── HPL_logsort.3 │ │ ├── HPL_max.3 │ │ ├── HPL_min.3 │ │ ├── HPL_numroc.3 │ │ ├── HPL_numrocI.3 │ │ ├── HPL_pabort.3 │ │ ├── HPL_packL.3 │ │ ├── HPL_pddriver.3 │ │ ├── HPL_pdfact.3 │ │ ├── HPL_pdgesv.3 │ │ ├── HPL_pdgesv0.3 │ │ ├── HPL_pdgesvK1.3 │ │ ├── HPL_pdgesvK2.3 │ │ ├── HPL_pdinfo.3 │ │ ├── HPL_pdlamch.3 │ │ ├── HPL_pdlange.3 │ │ ├── HPL_pdlaprnt.3 │ │ ├── HPL_pdlaswp00N.3 │ │ ├── HPL_pdlaswp00T.3 │ │ ├── HPL_pdlaswp01N.3 │ │ ├── HPL_pdlaswp01T.3 │ │ ├── HPL_pdmatgen.3 │ │ ├── HPL_pdmxswp.3 │ │ ├── HPL_pdpancrN.3 │ │ ├── HPL_pdpancrT.3 │ │ ├── HPL_pdpanel_disp.3 │ │ ├── HPL_pdpanel_free.3 │ │ ├── HPL_pdpanel_init.3 │ │ ├── HPL_pdpanel_new.3 │ │ ├── HPL_pdpanllN.3 │ │ ├── HPL_pdpanllT.3 │ │ ├── HPL_pdpanrlN.3 │ │ ├── HPL_pdpanrlT.3 │ │ ├── HPL_pdrpancrN.3 │ │ ├── HPL_pdrpancrT.3 │ │ ├── HPL_pdrpanllN.3 │ │ ├── HPL_pdrpanllT.3 │ │ ├── HPL_pdrpanrlN.3 │ │ ├── HPL_pdrpanrlT.3 │ │ ├── HPL_pdtest.3 │ │ ├── HPL_pdtrsv.3 │ │ ├── HPL_pdupdateNN.3 │ │ ├── HPL_pdupdateNT.3 │ │ ├── HPL_pdupdateTN.3 │ │ ├── HPL_pdupdateTT.3 │ │ ├── HPL_perm.3 │ │ ├── HPL_pipid.3 │ │ ├── HPL_plindx0.3 │ │ ├── HPL_plindx1.3 │ │ ├── HPL_plindx10.3 │ │ ├── HPL_pnum.3 │ │ ├── HPL_ptimer.3 │ │ ├── HPL_ptimer_cputime.3 │ │ ├── HPL_ptimer_walltime.3 │ │ ├── HPL_pwarn.3 │ │ ├── HPL_rand.3 │ │ ├── HPL_recv.3 │ │ ├── HPL_reduce.3 │ │ ├── HPL_rollN.3 │ │ ├── HPL_rollT.3 │ │ ├── HPL_sdrv.3 │ │ ├── HPL_send.3 │ │ ├── HPL_setran.3 │ │ ├── HPL_spreadN.3 │ │ ├── HPL_spreadT.3 │ │ ├── HPL_sum.3 │ │ ├── HPL_timer.3 │ │ ├── HPL_timer_cputime.3 │ │ ├── HPL_timer_walltime.3 │ │ ├── HPL_warn.3 │ │ └── HPL_xjumpm.3 ├── setup │ ├── Make.BGP │ ├── Make.CrayX1 │ ├── Make.FreeBSD_PIV_CBLAS │ ├── Make.HPUX_FBLAS │ ├── Make.I860_FBLAS │ ├── Make.IRIX_FBLAS │ ├── Make.Linux-x86_64-OpenBLAS-FFTW3 │ ├── Make.LinuxIntelIA64Itan2_eccMKL │ ├── Make.Linux_ATHLON_CBLAS │ ├── Make.Linux_ATHLON_FBLAS │ ├── Make.Linux_ATHLON_VSIPL │ ├── Make.Linux_AtlasCBLAS_Lam │ ├── Make.Linux_AtlasFBLAS_Lam │ ├── Make.Linux_PII_CBLAS │ ├── Make.Linux_PII_CBLAS_gm │ ├── Make.Linux_PII_FBLAS │ ├── Make.Linux_PII_FBLAS_gm │ ├── Make.Linux_PII_VSIPL │ ├── Make.Linux_PII_VSIPL_gm │ ├── Make.Linux_SGI_AltixIA64_Goto │ ├── Make.Linux_SGI_AltixIA64_SCSL │ ├── Make.PWR2_FBLAS │ ├── Make.PWR3_FBLAS │ ├── Make.PWRPC_FBLAS │ ├── Make.Power4_ESSL │ ├── Make.Power4_ESSLSMP │ ├── Make.Power4_ESSL_r │ ├── Make.SUN4SOL2-g_FBLAS │ ├── Make.SUN4SOL2-g_VSIPL │ ├── Make.SUN4SOL2_FBLAS │ ├── Make.Sun │ ├── Make.T3E_FBLAS │ ├── Make.Tru64_FBLAS │ ├── Make.Tru64_FBLAS_MPI │ ├── Make.Tru64_FBLAS_elan │ ├── Make.UNKNOWN.in │ ├── Make.cygwin │ ├── Make.macports_openmpi │ └── make_generic ├── src │ ├── auxil │ │ ├── HPL_abort.c │ │ ├── HPL_dlacpy.c │ │ ├── HPL_dlamch.c │ │ ├── HPL_dlange.c │ │ ├── HPL_dlaprnt.c │ │ ├── HPL_dlatcpy.c │ │ ├── HPL_fprintf.c │ │ └── HPL_warn.c │ ├── blas │ │ ├── HPL_daxpy.c │ │ ├── HPL_dcopy.c │ │ ├── HPL_dgemm.c │ │ ├── HPL_dgemv.c │ │ ├── HPL_dger.c │ │ ├── HPL_dscal.c │ │ ├── HPL_dswap.c │ │ ├── HPL_dtrsm.c │ │ ├── HPL_dtrsv.c │ │ └── HPL_idamax.c │ ├── comm │ │ ├── HPL_1rinM.c │ │ ├── HPL_1ring.c │ │ ├── HPL_2rinM.c │ │ ├── HPL_2ring.c │ │ ├── HPL_bcast.c │ │ ├── HPL_binit.c │ │ ├── HPL_blonM.c │ │ ├── HPL_blong.c │ │ ├── HPL_bwait.c │ │ ├── HPL_copyL.c │ │ ├── HPL_packL.c │ │ ├── HPL_recv.c │ │ ├── HPL_sdrv.c │ │ └── HPL_send.c │ ├── grid │ │ ├── HPL_all_reduce.c │ │ ├── HPL_barrier.c │ │ ├── HPL_broadcast.c │ │ ├── HPL_grid_exit.c │ │ ├── HPL_grid_info.c │ │ ├── HPL_grid_init.c │ │ ├── HPL_max.c │ │ ├── HPL_min.c │ │ ├── HPL_pnum.c │ │ ├── HPL_reduce.c │ │ └── HPL_sum.c │ ├── panel │ │ ├── HPL_pdpanel_disp.c │ │ ├── HPL_pdpanel_free.c │ │ ├── HPL_pdpanel_init.c │ │ └── HPL_pdpanel_new.c │ ├── pauxil │ │ ├── HPL_dlaswp00N.c │ │ ├── HPL_dlaswp01N.c │ │ ├── HPL_dlaswp01T.c │ │ ├── HPL_dlaswp02N.c │ │ ├── HPL_dlaswp03N.c │ │ ├── HPL_dlaswp03T.c │ │ ├── HPL_dlaswp04N.c │ │ ├── HPL_dlaswp04T.c │ │ ├── HPL_dlaswp05N.c │ │ ├── HPL_dlaswp05T.c │ │ ├── HPL_dlaswp06N.c │ │ ├── HPL_dlaswp06T.c │ │ ├── HPL_dlaswp10N.c │ │ ├── HPL_indxg2l.c │ │ ├── HPL_indxg2lp.c │ │ ├── HPL_indxg2p.c │ │ ├── HPL_indxl2g.c │ │ ├── HPL_infog2l.c │ │ ├── HPL_numroc.c │ │ ├── HPL_numrocI.c │ │ ├── HPL_pabort.c │ │ ├── HPL_pdlamch.c │ │ ├── HPL_pdlange.c │ │ ├── HPL_pdlaprnt.c │ │ └── HPL_pwarn.c │ ├── pfact │ │ ├── HPL_dlocmax.c │ │ ├── HPL_dlocswpN.c │ │ ├── HPL_dlocswpT.c │ │ ├── HPL_pdfact.c │ │ ├── HPL_pdmxswp.c │ │ ├── HPL_pdpancrN.c │ │ ├── HPL_pdpancrT.c │ │ ├── HPL_pdpanllN.c │ │ ├── HPL_pdpanllT.c │ │ ├── HPL_pdpanrlN.c │ │ ├── HPL_pdpanrlT.c │ │ ├── HPL_pdrpancrN.c │ │ ├── HPL_pdrpancrT.c │ │ ├── HPL_pdrpanllN.c │ │ ├── HPL_pdrpanllT.c │ │ ├── HPL_pdrpanrlN.c │ │ └── HPL_pdrpanrlT.c │ └── pgesv │ │ ├── HPL_equil.c │ │ ├── HPL_logsort.c │ │ ├── HPL_pdgesv.c │ │ ├── HPL_pdgesv0.c │ │ ├── HPL_pdgesvK1.c │ │ ├── HPL_pdgesvK2.c │ │ ├── HPL_pdlaswp00N.c │ │ ├── HPL_pdlaswp00T.c │ │ ├── HPL_pdlaswp01N.c │ │ ├── HPL_pdlaswp01T.c │ │ ├── HPL_pdtrsv.c │ │ ├── HPL_pdupdateNN.c │ │ ├── HPL_pdupdateNT.c │ │ ├── HPL_pdupdateTN.c │ │ ├── HPL_pdupdateTT.c │ │ ├── HPL_perm.c │ │ ├── HPL_pipid.c │ │ ├── HPL_plindx0.c │ │ ├── HPL_plindx1.c │ │ ├── HPL_plindx10.c │ │ ├── HPL_rollN.c │ │ ├── HPL_rollT.c │ │ ├── HPL_spreadN.c │ │ └── HPL_spreadT.c ├── testing │ ├── matgen │ │ ├── HPL_dmatgen.c │ │ ├── HPL_jumpit.c │ │ ├── HPL_ladd.c │ │ ├── HPL_lmul.c │ │ ├── HPL_rand.c │ │ ├── HPL_setran.c │ │ └── HPL_xjumpm.c │ ├── pmatgen │ │ └── HPL_pdmatgen.c │ ├── ptest │ │ ├── HPL.dat │ │ ├── HPL_pddriver.c │ │ ├── HPL_pdinfo.c │ │ └── HPL_pdtest.c │ ├── ptimer │ │ ├── HPL_ptimer.c │ │ ├── HPL_ptimer_cputime.c │ │ └── HPL_ptimer_walltime.c │ └── timer │ │ ├── HPL_timer.c │ │ ├── HPL_timer_cputime.c │ │ └── HPL_timer_walltime.c └── www │ ├── 1rinM.jpg │ ├── 1ring.jpg │ ├── 2-273x48.jpg │ ├── 2rinM.jpg │ ├── 2ring.jpg │ ├── HPL_abort.html │ ├── HPL_all_reduce.html │ ├── HPL_barrier.html │ ├── HPL_bcast.html │ ├── HPL_binit.html │ ├── HPL_broadcast.html │ ├── HPL_bwait.html │ ├── HPL_copyL.html │ ├── HPL_daxpy.html │ ├── HPL_dcopy.html │ ├── HPL_dgemm.html │ ├── HPL_dgemv.html │ ├── HPL_dger.html │ ├── HPL_dlacpy.html │ ├── HPL_dlamch.html │ ├── HPL_dlange.html │ ├── HPL_dlaprnt.html │ ├── HPL_dlaswp00N.html │ ├── HPL_dlaswp01N.html │ ├── HPL_dlaswp01T.html │ ├── HPL_dlaswp02N.html │ ├── HPL_dlaswp03N.html │ ├── HPL_dlaswp03T.html │ ├── HPL_dlaswp04N.html │ ├── HPL_dlaswp04T.html │ ├── HPL_dlaswp05N.html │ ├── HPL_dlaswp05T.html │ ├── HPL_dlaswp06N.html │ ├── HPL_dlaswp06T.html │ ├── HPL_dlaswp10N.html │ ├── HPL_dlatcpy.html │ ├── HPL_dlocmax.html │ ├── HPL_dlocswpN.html │ ├── HPL_dlocswpT.html │ ├── HPL_dmatgen.html │ ├── HPL_dscal.html │ ├── HPL_dswap.html │ ├── HPL_dtrsm.html │ ├── HPL_dtrsv.html │ ├── HPL_equil.html │ ├── HPL_fprintf.html │ ├── HPL_grid_exit.html │ ├── HPL_grid_info.html │ ├── HPL_grid_init.html │ ├── HPL_idamax.html │ ├── HPL_indxg2l.html │ ├── HPL_indxg2lp.html │ ├── HPL_indxg2p.html │ ├── HPL_indxl2g.html │ ├── HPL_infog2l.html │ ├── HPL_jumpit.html │ ├── HPL_ladd.html │ ├── HPL_lmul.html │ ├── HPL_logsort.html │ ├── HPL_max.html │ ├── HPL_min.html │ ├── HPL_numroc.html │ ├── HPL_numrocI.html │ ├── HPL_pabort.html │ ├── HPL_packL.html │ ├── HPL_pddriver.html │ ├── HPL_pdfact.html │ ├── HPL_pdgesv.html │ ├── HPL_pdgesv0.html │ ├── HPL_pdgesvK1.html │ ├── HPL_pdgesvK2.html │ ├── HPL_pdinfo.html │ ├── HPL_pdlamch.html │ ├── HPL_pdlange.html │ ├── HPL_pdlaprnt.html │ ├── HPL_pdlaswp00N.html │ ├── HPL_pdlaswp00T.html │ ├── HPL_pdlaswp01N.html │ ├── HPL_pdlaswp01T.html │ ├── HPL_pdmatgen.html │ ├── HPL_pdmxswp.html │ ├── HPL_pdpancrN.html │ ├── HPL_pdpancrT.html │ ├── HPL_pdpanel_disp.html │ ├── HPL_pdpanel_free.html │ ├── HPL_pdpanel_init.html │ ├── HPL_pdpanel_new.html │ ├── HPL_pdpanllN.html │ ├── HPL_pdpanllT.html │ ├── HPL_pdpanrlN.html │ ├── HPL_pdpanrlT.html │ ├── HPL_pdrpancrN.html │ ├── HPL_pdrpancrT.html │ ├── HPL_pdrpanllN.html │ ├── HPL_pdrpanllT.html │ ├── HPL_pdrpanrlN.html │ ├── HPL_pdrpanrlT.html │ ├── HPL_pdtest.html │ ├── HPL_pdtrsv.html │ ├── HPL_pdupdateNN.html │ ├── HPL_pdupdateNT.html │ ├── HPL_pdupdateTN.html │ ├── HPL_pdupdateTT.html │ ├── HPL_perm.html │ ├── HPL_pipid.html │ ├── HPL_plindx0.html │ ├── HPL_plindx1.html │ ├── HPL_plindx10.html │ ├── HPL_pnum.html │ ├── HPL_ptimer.html │ ├── HPL_ptimer_cputime.html │ ├── HPL_ptimer_walltime.html │ ├── HPL_pwarn.html │ ├── HPL_rand.html │ ├── HPL_recv.html │ ├── HPL_reduce.html │ ├── HPL_rollN.html │ ├── HPL_rollT.html │ ├── HPL_sdrv.html │ ├── HPL_send.html │ ├── HPL_setran.html │ ├── HPL_spreadN.html │ ├── HPL_spreadT.html │ ├── HPL_sum.html │ ├── HPL_timer.html │ ├── HPL_timer_cputime.html │ ├── HPL_timer_walltime.html │ ├── HPL_warn.html │ ├── HPL_xjumpm.html │ ├── algorithm.html │ ├── aprunner.gif │ ├── copyright.html │ ├── documentation.html │ ├── errata.html │ ├── faqs.html │ ├── index.html │ ├── links.html │ ├── main.jpg │ ├── mat2.jpg │ ├── pfact.jpg │ ├── references.html │ ├── results.html │ ├── roll.jpg │ ├── rollM.jpg │ ├── scalability.html │ ├── software.html │ ├── spread.jpg │ ├── spreadM.jpg │ └── tuning.html ├── include ├── hpcc.h └── hpccver.h ├── python ├── Makefile ├── Makefile-grig ├── default.css ├── grig-data │ ├── README │ ├── hpccoutf.txt-1procs │ ├── hpccoutf.txt-2procs │ ├── hpccoutf.txt-3procs │ └── hpccoutf.txt-4procs ├── hpcc.c ├── hpcc.py ├── index.html ├── mpi.pyx ├── pyxutil.h ├── sequential.py └── setup.py ├── setup.py ├── src ├── HPL_slamch.c ├── bench_lat_bw_1.3.c ├── bench_lat_bw_1.5.1.c ├── bench_lat_bw_1.5.2.c ├── extfinalize.c ├── extinit.c ├── hpcc.c ├── io.c └── noopt.c ├── tools ├── README.txt ├── hpccoutf.pl ├── hpccoutf.py ├── hpccxml.py ├── makefile.py ├── readme.py ├── setup.py └── todo.txt ├── work ├── Makefile ├── conly │ ├── Makefile │ ├── c.gpt │ ├── d.gpt │ ├── enumerate_all.txt │ ├── enumfft.py │ ├── fft235.c │ ├── hpccfft.h │ ├── mem4fft.c │ ├── results.txt │ ├── w1.gpt │ ├── w2.gpt │ ├── ww.gpt │ ├── ww2.gpt │ ├── ww3.gpt │ ├── ww4.gpt │ └── zfft1d.c ├── cpp.py ├── fft.txt ├── fft235.cc ├── fftbug.txt ├── mem4fft.cc ├── mem4fft.h ├── tpdtrans.c └── zfft1d.cc └── www ├── hpcchallenge.org ├── Home.html ├── Makefile ├── poste_all.html └── pre_all.html └── icl_hpcc ├── orig_theoretical_peak.html └── theoretical_peak.html /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /hpcc 3 | /.cproject 4 | /.project 5 | /.settings 6 | /hpccinf.txt 7 | /hpccoutf.txt 8 | 9 | -------------------------------------------------------------------------------- /FFT/hpccfft.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #define FFTE_NDA2 65536 5 | #define FFTE_NDA3 4096 6 | #define FFTE_NDA4 256 7 | 8 | /* Parameters that affect performance */ 9 | 10 | /* 11 | Blocking parameter. Suggested values: 12 | 8 for Pentium III and Athlon 13 | 16 for Pentium4, Athlon XP, Opteron, Itanium and Itanium2 14 | */ 15 | #ifndef FFTE_NBLK 16 | #define FFTE_NBLK 16 17 | #endif 18 | 19 | /* 20 | Padding parameter to avoid cache conflicts. 21 | Suggested values: 22 | 2 for Pentium III 23 | 4 for Athlon, Athlon XP, Opteron, Itanium 24 | 8 for Pentium4 and Itanium2 25 | */ 26 | #ifndef FFTE_NP 27 | #define FFTE_NP 8 28 | #endif 29 | 30 | /* Size of Level 2 cache */ 31 | #ifndef FFTE_L2SIZE 32 | #define FFTE_L2SIZE 1048576 33 | #endif 34 | 35 | #ifdef LONG_IS_64BITS 36 | typedef unsigned long u64Int_t; 37 | typedef long s64Int_t; 38 | #else 39 | typedef unsigned long long u64Int_t; 40 | typedef long long s64Int_t; 41 | #endif 42 | 43 | #include "wrapfftw.h" 44 | 45 | extern int HPCC_ipow(int x, int p); 46 | 47 | extern int HPCC_zfft1d(int n, fftw_complex *a, fftw_complex *b, int iopt, hpcc_fftw_plan p); 48 | extern int HPCC_fft235(fftw_complex *a, fftw_complex *b, fftw_complex *w, int n, const int *ip); 49 | extern int HPCC_settbl(fftw_complex *w, int n); 50 | extern int HPCC_factor235(int n, int *ip); 51 | extern int HPCC_factor235_8(s64Int_t n, int *ip); 52 | 53 | extern int HPCC_bcnrand(u64Int_t n, u64Int_t a, void *x); 54 | 55 | #define ARR2D(a, i, j, lda) a[(i)+(j)*(lda)] 56 | #define PTR2D(a, i, j, lda) (a+(i)+(j)*(lda)) 57 | #define ARR3D(a, i, j, k, lda1, lda2) a[(i)+(lda1)*((j)+(k)*(lda2))] 58 | #define PTR3D(a, i, j, k, lda1, lda2) (a+(i)+(lda1)*((j)+(k)*(lda2))) 59 | #define ARR4D(a, i, j, k, l, lda1, lda2, lda3) a[(i)+(lda1)*((j)+(lda2)*((k)+(lda3)*(l)))] 60 | #define c_mul3v(v, v1, v2) c_re(v) = c_re(v1) * c_re(v2) - c_im(v1) * c_im(v2); c_im(v) = c_re(v1) * c_im(v2) + c_im(v1) * c_re(v2) 61 | #define c_assgn(d, s) c_re(d)=c_re(s);c_im(d)=c_im(s) 62 | #define V3MIN(r, e, v) r = (e); V2MIN(r, v) 63 | #define V2MIN(r, v) r = (v) < r ? (v) : r 64 | #define EMAX(d, v, e) d=(e); d=d>(v)?d:(v) 65 | 66 | #define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) 67 | -------------------------------------------------------------------------------- /FFT/wrapfftw.h: -------------------------------------------------------------------------------- 1 | 2 | #if defined(USING_FFTW) 3 | 4 | #include 5 | 6 | #elif defined(USING_FFTW3) 7 | 8 | #include 9 | 10 | typedef int fftw_direction; 11 | 12 | #define c_re(c) ((c)[0]) 13 | #define c_im(c) ((c)[1]) 14 | 15 | #else 16 | 17 | typedef double fftw_real; 18 | typedef struct { 19 | fftw_real re, im; 20 | } fftw_complex_orig; 21 | typedef fftw_real HPCC_Complex[2]; 22 | typedef HPCC_Complex fftw_complex; 23 | 24 | typedef enum { 25 | FFTW_FORWARD = -1, FFTW_BACKWARD = 1 26 | } fftw_direction; 27 | #endif 28 | 29 | struct hpcc_fftw_plan_struct { 30 | fftw_complex *w1, *w2, *ww1, *ww2, *ww3, *ww4, *c, *d; 31 | int n, c_size, d_size; 32 | int flags; 33 | fftw_direction dir; 34 | }; 35 | typedef struct hpcc_fftw_plan_struct *hpcc_fftw_plan; 36 | 37 | extern hpcc_fftw_plan HPCC_fftw_create_plan(int n, fftw_direction dir, int flags); 38 | extern void HPCC_fftw_destroy_plan(hpcc_fftw_plan plan); 39 | extern void HPCC_fftw_one(hpcc_fftw_plan plan, fftw_complex *in, fftw_complex *out); 40 | 41 | #if !defined(USING_FFTW) && !defined(USING_FFTW3) 42 | 43 | typedef struct hpcc_fftw_plan_struct *fftw_plan; 44 | 45 | #define c_re(c) ((c)[0]) 46 | #define c_im(c) ((c)[1]) 47 | 48 | #define fftw_malloc malloc 49 | #define fftw_free free 50 | /* flags for the planner */ 51 | #define FFTW_ESTIMATE (0) 52 | #define FFTW_MEASURE (1) 53 | 54 | #define FFTW_OUT_OF_PLACE (0) 55 | #define FFTW_IN_PLACE (8) 56 | #define FFTW_USE_WISDOM (16) 57 | 58 | #define fftw_create_plan HPCC_fftw_create_plan 59 | #define fftw_destroy_plan HPCC_fftw_destroy_plan 60 | #define fftw_one HPCC_fftw_one 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /FFT/wrapmpifftw.h: -------------------------------------------------------------------------------- 1 | #if defined(USING_FFTW) 2 | 3 | #include 4 | 5 | #elif defined(USING_FFTW3) 6 | 7 | #include 8 | 9 | typedef int fftw_direction; 10 | 11 | #define c_re(c) ((c)[0]) 12 | #define c_im(c) ((c)[1]) 13 | 14 | #else 15 | #include 16 | typedef struct hpcc_fftw_mpi_plan_struct *fftw_mpi_plan; 17 | #define fftw_mpi_create_plan HPCC_fftw_mpi_create_plan 18 | #define fftw_mpi_destroy_plan HPCC_fftw_mpi_destroy_plan 19 | #define fftw_mpi HPCC_fftw_mpi 20 | #define fftw_mpi_local_sizes HPCC_fftw_mpi_local_sizes 21 | #endif 22 | 23 | struct hpcc_fftw_mpi_plan_struct { 24 | MPI_Comm comm; 25 | MPI_Datatype cmplx; 26 | fftw_complex *wx, *wy, *wz, *c, *work; 27 | s64Int_t n; 28 | int flags, c_size; 29 | fftw_direction dir; 30 | double *timings; 31 | }; 32 | typedef struct hpcc_fftw_mpi_plan_struct *hpcc_fftw_mpi_plan; 33 | 34 | extern hpcc_fftw_mpi_plan 35 | HPCC_fftw_mpi_create_plan(MPI_Comm comm, s64Int_t n, fftw_direction dir, int flags); 36 | extern void HPCC_fftw_mpi_destroy_plan(hpcc_fftw_mpi_plan plan); 37 | extern void HPCC_fftw_mpi(hpcc_fftw_mpi_plan p, int n_fields, fftw_complex *local_data, 38 | fftw_complex *work); 39 | extern void HPCC_fftw_mpi_local_sizes(hpcc_fftw_mpi_plan p, s64Int_t *local_n, 40 | s64Int_t *local_start, s64Int_t *local_n_after_transform, 41 | s64Int_t *local_start_after_transform, s64Int_t *total_local_size); 42 | 43 | extern int 44 | HPCC_pzfft1d(s64Int_t n, fftw_complex *a, fftw_complex *b, fftw_complex *w, int me, int npu, int iopt, 45 | hpcc_fftw_mpi_plan p); 46 | 47 | extern double *HPCC_fft_timings_forward, *HPCC_fft_timings_backward; 48 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | 3 | arch = UNKNOWN 4 | include hpl/Make.$(arch) 5 | 6 | all: 7 | - $(MKDIR) hpl/lib/$(arch) 8 | ( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc ) 9 | 10 | clean: 11 | - $(MKDIR) hpl/lib/$(arch) 12 | ( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc clean ) 13 | 14 | readme: README.html README.txt 15 | 16 | README.html: README.tex 17 | hevea -fix -O README.tex 18 | python tools/readme.py README.html 19 | 20 | README.info: README.tex 21 | hevea -fix -info README.tex 22 | 23 | README.txt: README.tex 24 | hevea -fix -text README.tex 25 | 26 | .PHONY: all clean readme 27 | -------------------------------------------------------------------------------- /PTRANS/cblacslt.h: -------------------------------------------------------------------------------- 1 | 2 | #define SGET_SYSCONTXT 0 3 | #define SGET_BLACSCONTXT 10 4 | 5 | extern double dcputime00(void); 6 | extern double dwalltime00(void); 7 | extern void Cblacs_abort(int ConTxt, int ErrNo); 8 | extern void Cblacs_barrier(int ConTxt, char *scope); 9 | extern void Cblacs_exit(int NotDone); 10 | extern void Cblacs_get(int ConTxt, int what, int *val); 11 | extern void Cblacs_gridexit(int ConTxt); 12 | extern void Cblacs_gridinfo(int ConTxt, int *nprow, int *npcol, int *myrow, int *mycol); 13 | extern void Cblacs_gridinit(int *ConTxt, char *order, int nprow, int npcol); 14 | extern void Cblacs_gridmap(int *ConTxt, int *umap, int ldumap, int nprow, int npcol); 15 | extern void Cblacs_pinfo(int *mypnum, int *nprocs); 16 | extern void Cdgamn2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA, 17 | int *cA, int ldia, int rdest, int cdest); 18 | extern void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, 19 | int *rA, int *cA, int ldia, int rdest, int cdest); 20 | extern void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, 21 | int lda, int rsrc, int csrc); 22 | extern void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda); 23 | extern void Cdgerv2d(int ConTxt, int m, int n, double *A, int lda, int rsrc, int csrc); 24 | extern void Cdgesd2d(int ConTxt, int m, int n, double *A, int lda, int rdest, int cdest); 25 | extern void Cdgsum2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, 26 | int rdest, int cdest); 27 | extern void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, 28 | int csrc); 29 | extern void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda); 30 | extern void Cigsum2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, 31 | int rdest, int cdest); 32 | extern void Cblacs_dSendrecv(int ctxt, int mSrc, int nSrc, double *Asrc, int ldaSrc, int rdest, 33 | int cdest, int mDest, int nDest, double *Adest, int ldaDest, int rsrc, int csrc); 34 | -------------------------------------------------------------------------------- /RandomAccess/buckets.h: -------------------------------------------------------------------------------- 1 | 2 | typedef struct update_s { 3 | char *poolNext; /* pointer for memory pool */ 4 | u64Int value; 5 | struct update_s *forward; 6 | } Update_T, *Update_Ptr; 7 | 8 | #define NULL_UPDATE_PTR ((Update_Ptr) NULL) 9 | 10 | typedef struct pe_bucket_s { 11 | int numUpdates; 12 | Update_Ptr updateList; 13 | } Bucket_T, *Bucket_Ptr; 14 | 15 | #define NULL_BUCKET_PTR ((Bucket_Ptr) NULL) 16 | 17 | extern Bucket_Ptr HPCC_InitBuckets(int numPEs, int maxNumUpdates); 18 | extern void HPCC_FreeBuckets(Bucket_Ptr buckets, int numPEs); 19 | extern void HPCC_InsertUpdate(u64Int ran, int pe, Bucket_Ptr buckets); 20 | extern int HPCC_GetUpdates(Bucket_Ptr buckets, u64Int *buffer, int bufferSize, int *peUpdates); 21 | -------------------------------------------------------------------------------- /RandomAccess/heap.h: -------------------------------------------------------------------------------- 1 | 2 | #define HEAP_ROOT 0 3 | #define NOT_A_NODE (-1) 4 | 5 | typedef struct heap_record { 6 | char* poolNext; /* pointer for memory pool */ 7 | int index; 8 | int key; 9 | } Heap_Record, *Heap_Record_Ptr; 10 | 11 | extern void HPCC_ra_Heap_Init (int size); 12 | extern void HPCC_ra_Heap_Insert (int index, int key); 13 | extern void HPCC_ra_Heap_ExtractMax (int *index, int *key); 14 | extern void HPCC_ra_Heap_IncrementKey (int index); 15 | extern void HPCC_ra_Heapify (int node); 16 | extern void HPCC_ra_Heap_Free (); 17 | -------------------------------------------------------------------------------- /RandomAccess/pool.h: -------------------------------------------------------------------------------- 1 | 2 | #define HPCC_NULL_PTR ((char *)0) 3 | 4 | typedef struct Pool_s { /* used to minimize the use of malloc */ 5 | char *head; /* pointer to the first element of the pool */ 6 | char *tail; /* pointer to the last element of the pool */ 7 | int numObjs; /* number of objects to malloc */ 8 | int objSize; /* size of objects in bytes */ 9 | char *poolBase; /* pointer to block of memory allocated for pool */ 10 | } POOL; 11 | 12 | extern POOL* HPCC_PoolInit(int numObjs, int objSize); 13 | extern char *HPCC_PoolGetObj(POOL *poolPtr); 14 | extern void HPCC_PoolReturnObj(POOL *poolPtr, void *objPtr); 15 | extern void HPCC_PoolFree(POOL *poolPtr); 16 | 17 | -------------------------------------------------------------------------------- /RandomAccess/single_cpu.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- 2 | * 3 | * This file contains the interface for the single cpu RandomAccess test. The 4 | * test is only run on a single (random) node in the MPI universe, with all 5 | * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected 6 | * CPU to finish the RandomAccess test. 7 | * 8 | * This test uses the computational core found in core_single_cpu.c 9 | */ 10 | 11 | #include 12 | #include "RandomAccess.h" 13 | 14 | int 15 | HPCC_SingleRandomAccess(HPCC_Params *params) 16 | { 17 | int myRank, commSize; 18 | int rv, errCount, rank, failure = 0; 19 | double localGUPs; 20 | double scl = 1.0 / RAND_MAX; 21 | FILE *outputFile = NULL; 22 | MPI_Comm comm = MPI_COMM_WORLD; 23 | 24 | localGUPs = 0.0; 25 | 26 | MPI_Comm_size( comm, &commSize ); 27 | MPI_Comm_rank( comm, &myRank ); 28 | 29 | srand(time(NULL)); 30 | scl *= commSize; 31 | 32 | /* select a node at random, but not node 0 (unless there is just one node) */ 33 | if (1 == commSize) 34 | rank = 0; 35 | else 36 | for (rank = 0; ; rank = (int)(scl * rand())) { 37 | if (rank > 0 && rank < commSize) 38 | break; 39 | } 40 | 41 | MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ 42 | 43 | if (myRank == rank) /* if this node has been selected */ 44 | rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure ); 45 | 46 | MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ 47 | MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */ 48 | MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ 49 | errCount = rv; 50 | params->SingleGUPs = localGUPs; 51 | if (failure) params->Failure = 1; 52 | 53 | BEGIN_IO( myRank, params->outFname, outputFile); 54 | fprintf( outputFile, "Node(s) with error %d\n", errCount ); 55 | fprintf( outputFile, "Node selected %d\n", rank ); 56 | fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs ); 57 | END_IO( myRank, outputFile ); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /RandomAccess/single_cpu_lcg.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- 2 | * 3 | * This file contains the interface for the single cpu RandomAccess test. The 4 | * test is only run on a single (random) node in the MPI universe, with all 5 | * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected 6 | * CPU to finish the RandomAccess test. 7 | * 8 | * This test uses the computational core found in core_single_cpu.c 9 | */ 10 | 11 | #include 12 | #include "RandomAccess.h" 13 | 14 | int 15 | HPCC_SingleRandomAccess_LCG(HPCC_Params *params) 16 | { 17 | int myRank, commSize; 18 | int rv, errCount, rank, failure = 0; 19 | double localGUPs; 20 | double scl = 1.0 / RAND_MAX; 21 | FILE *outputFile = NULL; 22 | MPI_Comm comm = MPI_COMM_WORLD; 23 | 24 | localGUPs = 0.0; 25 | 26 | MPI_Comm_size( comm, &commSize ); 27 | MPI_Comm_rank( comm, &myRank ); 28 | 29 | srand(time(NULL)); 30 | scl *= commSize; 31 | 32 | /* select a node at random, but not node 0 (unless there is just one node) */ 33 | if (1 == commSize) 34 | rank = 0; 35 | else 36 | for (rank = 0; ; rank = (int)(scl * rand())) { 37 | if (rank > 0 && rank < commSize) 38 | break; 39 | } 40 | 41 | MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ 42 | 43 | if (myRank == rank) /* if this node has been selected */ 44 | rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure ); 45 | 46 | MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ 47 | MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */ 48 | MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ 49 | errCount = rv; 50 | params->Single_LCG_GUPs = localGUPs; 51 | if (failure) params->Failure = 1; 52 | 53 | BEGIN_IO( myRank, params->outFname, outputFile); 54 | fprintf( outputFile, "Node(s) with error %d\n", errCount ); 55 | fprintf( outputFile, "Node selected %d\n", rank ); 56 | fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs ); 57 | END_IO( myRank, outputFile ); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /RandomAccess/star_single_cpu.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- 2 | * 3 | * This file contains the interface for the star single cpu RandomAccess test. 4 | * The test runs on all cpus in the MPI universe, but there is no communication 5 | * between cpus during the process (each cpu runs its own version of the 6 | * single_cpu test). The final result is the average of the entire system. 7 | * 8 | * This test uses the computational core found in core_single_cpu.c 9 | */ 10 | 11 | #include 12 | #include "RandomAccess.h" 13 | 14 | int 15 | HPCC_StarRandomAccess(HPCC_Params *params) 16 | { 17 | int myRank, commSize; 18 | int rv, errCount, failure = 0, failureAll = 0; 19 | double minGUPs, avgGUPs, maxGUPs, localGUPs; 20 | FILE *outputFile = NULL; 21 | MPI_Comm comm = MPI_COMM_WORLD; 22 | 23 | minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0; 24 | 25 | MPI_Comm_size( comm, &commSize ); 26 | MPI_Comm_rank( comm, &myRank ); 27 | 28 | rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure ); 29 | MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); 30 | MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); 31 | if (failureAll) params->Failure = 1; 32 | 33 | MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); 34 | MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); 35 | MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); 36 | 37 | avgGUPs /= commSize; 38 | 39 | MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm ); 40 | params->StarGUPs = avgGUPs; 41 | 42 | BEGIN_IO( myRank, params->outFname, outputFile); 43 | fprintf( outputFile, "Node(s) with error %d\n", errCount ); 44 | fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs ); 45 | fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs ); 46 | fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs ); 47 | END_IO( myRank, outputFile ); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /RandomAccess/star_single_cpu_lcg.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- 2 | * 3 | * This file contains the interface for the star single cpu RandomAccess test. 4 | * The test runs on all cpus in the MPI universe, but there is no communication 5 | * between cpus during the process (each cpu runs its own version of the 6 | * single_cpu test). The final result is the average of the entire system. 7 | * 8 | * This test uses the computational core found in core_single_cpu.c 9 | */ 10 | 11 | #include 12 | #include "RandomAccess.h" 13 | 14 | int 15 | HPCC_StarRandomAccess_LCG(HPCC_Params *params) 16 | { 17 | int myRank, commSize; 18 | int rv, errCount, failure = 0, failureAll = 0; 19 | double minGUPs, avgGUPs, maxGUPs, localGUPs; 20 | FILE *outputFile = NULL; 21 | MPI_Comm comm = MPI_COMM_WORLD; 22 | 23 | minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0; 24 | 25 | MPI_Comm_size( comm, &commSize ); 26 | MPI_Comm_rank( comm, &myRank ); 27 | 28 | rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure ); 29 | MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); 30 | MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); 31 | if (failureAll) params->Failure = 1; 32 | 33 | MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); 34 | MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); 35 | MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); 36 | 37 | avgGUPs /= commSize; 38 | 39 | MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm ); 40 | params->Star_LCG_GUPs = avgGUPs; 41 | 42 | BEGIN_IO( myRank, params->outFname, outputFile); 43 | fprintf( outputFile, "Node(s) with error %d\n", errCount ); 44 | fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs ); 45 | fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs ); 46 | fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs ); 47 | END_IO( myRank, outputFile ); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /RandomAccess/time_bound.h: -------------------------------------------------------------------------------- 1 | 2 | #if defined( HPCC_RA_STDALG ) 3 | #if ! defined(RA_TIME_BOUND_DISABLE) 4 | #define RA_TIME_BOUND 1 5 | #endif 6 | #endif 7 | 8 | /* time bound in seconds */ 9 | #define TIME_BOUND 60 10 | 11 | 12 | /* _RA_SAMPLE_FACTOR determines the fraction of the total number 13 | * of updates used (in time_bound.c) to empirically derive an 14 | * upper bound for the number of updates executed by the benchmark. 15 | * This upper bound must be such that the total execution time of the 16 | * benchmark does not exceed a specified time bound. 17 | * _RA_SAMPLE_FACTOR may need to be adjusted for each architecture 18 | * since the dafault number of updates depends on the total 19 | * memory size. 20 | */ 21 | /* 1% of total number of updates */ 22 | #define RA_SAMPLE_FACTOR 100 23 | 24 | extern void HPCC_Power2NodesTime(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter); 25 | 26 | extern void HPCC_AnyNodesTime(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter); 27 | 28 | extern void HPCC_Power2NodesTimeLCG(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter); 29 | 30 | extern void HPCC_AnyNodesTimeLCG(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter); 31 | -------------------------------------------------------------------------------- /RandomAccess/utility.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- 2 | * 3 | * This file provides utility functions for the RandomAccess benchmark suite. 4 | */ 5 | 6 | #include 7 | #include "RandomAccess.h" 8 | 9 | 10 | /* Utility routine to start random number generator at Nth step */ 11 | u64Int 12 | HPCC_starts(s64Int n) 13 | { 14 | int i, j; 15 | u64Int m2[64]; 16 | u64Int temp, ran; 17 | 18 | while (n < 0) n += PERIOD; 19 | while (n > PERIOD) n -= PERIOD; 20 | if (n == 0) return 0x1; 21 | 22 | temp = 0x1; 23 | for (i=0; i<64; i++) { 24 | m2[i] = temp; 25 | temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0); 26 | temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0); 27 | } 28 | 29 | for (i=62; i>=0; i--) 30 | if ((n >> i) & 1) 31 | break; 32 | 33 | ran = 0x2; 34 | while (i > 0) { 35 | temp = 0; 36 | for (j=0; j<64; j++) 37 | if ((ran >> j) & 1) 38 | temp ^= m2[j]; 39 | ran = temp; 40 | i -= 1; 41 | if ((n >> i) & 1) 42 | ran = (ran << 1) ^ ((s64Int) ran < 0 ? POLY : 0); 43 | } 44 | 45 | return ran; 46 | } 47 | 48 | /* Utility routine to start LCG random number generator at Nth step */ 49 | u64Int 50 | HPCC_starts_LCG(s64Int n) 51 | { 52 | u64Int mul_k, add_k, ran, un; 53 | 54 | mul_k = LCG_MUL64; 55 | add_k = LCG_ADD64; 56 | 57 | ran = 1; 58 | for (un = (u64Int)n; un; un >>= 1) { 59 | if (un & 1) 60 | ran = mul_k * ran + add_k; 61 | add_k *= (mul_k + 1); 62 | mul_k *= mul_k; 63 | } 64 | 65 | return ran; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /STREAM/split_stream_funcs.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | Tuple = ( 5 | ("stream.c", ( 6 | ("HPCC_Stream(", "main"), 7 | ("checkSTREAMresults(FILE", "checkres"), 8 | ("checktick() {", "checktick"), 9 | ("void tuned_STREAM_Copy(", "copy"), 10 | ("tuned_STREAM_Scale(double", "scale"), 11 | ("void tuned_STREAM_Add(", "add"), 12 | ("tuned_STREAM_Triad(double", "triad"), 13 | ("void computeSTREAMerrors(", "checkerr"), 14 | ), "hpcc"), 15 | 16 | ("stream_mpi.c", ( 17 | ("main()" , "main"), 18 | ("^checktick()", "checktick"), 19 | ("computeSTREAMerrors(STREAM", "checkerr"), 20 | ("checkSTREAMresults (STREAM", "checkres"), 21 | ("void tuned_STREAM_Copy(", "copy"), 22 | ("tuned_STREAM_Scale(STREAM", "scale"), 23 | ("void tuned_STREAM_Add(", "add"), 24 | ("tuned_STREAM_Triad(STREAM", "triad"), 25 | ), "tstrm"), 26 | ) 27 | 28 | 29 | def swap_fd(fd, fname, prefix): 30 | fd.close() 31 | 32 | if not os.path.exists(prefix): 33 | os.mkdir(prefix) 34 | 35 | name = os.path.join(prefix, fname +".c") 36 | if fname.startswith("/dev"): 37 | name = fname 38 | fd = open(name, "w") 39 | return fd 40 | 41 | 42 | for tup in Tuple: 43 | fd = open("/dev/null", "w") 44 | 45 | prefix = tup[2] 46 | 47 | for line in open(tup[0]): 48 | for m in tup[1]: 49 | if m[0].startswith("^"): 50 | if line.startswith(m[0][1:]): 51 | fd = swap_fd(fd, m[1], prefix) 52 | 53 | elif line.find(m[0]) != -1: 54 | fd = swap_fd(fd, m[1], prefix) 55 | 56 | break 57 | 58 | fd.write(line) 59 | 60 | fd.close() 61 | 62 | Replacements = ( 63 | ("STREAM_TYPE", "double"), 64 | ("MAX", "Mmax"), 65 | ("MIN", "Mmin"), 66 | ("ssize_t", "int"), 67 | ("abs", "fabs"), 68 | ) 69 | 70 | for tup in Tuple: 71 | prefix = tup[2] 72 | for m in tup[1]: 73 | name = m[1] 74 | if name.startswith("/dev"): 75 | continue 76 | fname = os.path.join(prefix, name + ".c") 77 | code = open(fname).read() 78 | for rt in Replacements: 79 | code = code.replace(rt[0], rt[1]) 80 | open(fname, "w").write(code) 81 | -------------------------------------------------------------------------------- /TEST: -------------------------------------------------------------------------------- 1 | hg push test 2 | -------------------------------------------------------------------------------- /_hpccinf.txt: -------------------------------------------------------------------------------- 1 | HPLinpack benchmark input file 2 | Innovative Computing Laboratory, University of Tennessee 3 | HPL.out output file name (if any) 4 | 8 device out (6=stdout,7=stderr,file) 5 | 1 # of problems sizes (N) 6 | 1000 Ns 7 | 1 # of NBs 8 | 80 NBs 9 | 0 PMAP process mapping (0=Row-,1=Column-major) 10 | 1 # of process grids (P x Q) 11 | 2 Ps 12 | 2 Qs 13 | 16.0 threshold 14 | 1 # of panel fact 15 | 2 PFACTs (0=left, 1=Crout, 2=Right) 16 | 1 # of recursive stopping criterium 17 | 4 NBMINs (>= 1) 18 | 1 # of panels in recursion 19 | 2 NDIVs 20 | 1 # of recursive panel fact. 21 | 1 RFACTs (0=left, 1=Crout, 2=Right) 22 | 1 # of broadcast 23 | 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 24 | 1 # of lookahead depth 25 | 1 DEPTHs (>=0) 26 | 2 SWAP (0=bin-exch,1=long,2=mix) 27 | 64 swapping threshold 28 | 0 L1 in (0=transposed,1=no-transposed) form 29 | 0 U in (0=transposed,1=no-transposed) form 30 | 1 Equilibration (0=no,1=yes) 31 | 8 memory alignment in double (> 0) 32 | ##### This line (no. 32) is ignored (it serves as a separator). ###### 33 | 0 Number of additional problem sizes for PTRANS 34 | 1200 10000 30000 values of N 35 | 0 number of additional blocking sizes for PTRANS 36 | 40 9 8 13 13 20 16 32 64 values of NB 37 | -------------------------------------------------------------------------------- /doc/hpccusrg.tex: -------------------------------------------------------------------------------- 1 | % -*- LaTeX -*- 2 | \documentclass[twocolumn]{report} 3 | 4 | \usepackage{xspace} 5 | 6 | \newcommand{\STREAM}{\textsf{STREAM}\xspace} 7 | \newcommand{\RANDA}{\textsf{RandomAccess}\xspace} 8 | \newcommand{\PTRANS}{\textsf{PTRANS}\xspace} 9 | 10 | \begin{document} 11 | 12 | \title{HPC Challenge User Guide} 13 | \author{Piotr Luszczek} 14 | \maketitile 15 | 16 | \chapter{Performance Considerations} 17 | 18 | As mentioned earlier, we try to operate on large data objects. The 19 | size of these objects is determined at runtime which contrasts with 20 | the original version of the \STREAM benchmark which uses static 21 | storage~(determined at compile time) and size. The original benchmark 22 | gives the compiler more information~(and control) over data alignment, 23 | loop trip counts, etc. 24 | 25 | \RANDA is by design heavy in misses that occur at various levels of 26 | memory hierarchy. But also Translation Look-aside Buffer~(TLB) is 27 | streased. It might be the TLB pressure that comes into play in the SMP 28 | and multicore setting. If TLB is shared between multiple processing 29 | elements it will become the bottleneck. It's because TLBs do not have 30 | prefetch functionality nor any other latency hiding mechanism. 31 | 32 | \section{Tuning} 33 | 34 | \PTRANS 35 | 36 | \end{document} 37 | 38 | Rules: avoid, reference web page 39 | definitions: CPU, chip, core, process, node 40 | optimizations: FFTE parameters, using FFTW 2 by modyfing FFTW code to 41 | accomodate large vector sizes 42 | OpenMP: IBM xlc_r version 6 doesn't define _OPENMP, -qsmp, -qsmp=noauto 43 | Total,process,thread memory file 44 | Appendix with units 45 | -------------------------------------------------------------------------------- /hpl/BUGS: -------------------------------------------------------------------------------- 1 | ============================================================== 2 | List of the known problems with the HPL software 3 | 4 | Current as of release 2.0 - September 10, 2008 5 | ============================================================== 6 | 7 | ============================================================== 8 | 9 | ============================================================== 10 | -------------------------------------------------------------------------------- /hpl/Make.UNKNOWN: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | 3 | RM = exit 4 | CD = exit 5 | arch=UNKNOWN 6 | 7 | UNKNOWN: 8 | @echo 9 | @echo Please specify "'"arch"'" variable, for example: 10 | @echo 1. Create file Make.Unix in "'"hpl"'" directory 11 | @echo 2. Type: make arch=Unix 12 | @echo 13 | 14 | .PHONY: UNKNOWN 15 | -------------------------------------------------------------------------------- /hpl/README: -------------------------------------------------------------------------------- 1 | ============================================================== 2 | High Performance Computing Linpack Benchmark (HPL) 3 | HPL 2.0 - September 10, 2008 4 | ============================================================== 5 | 6 | HPL is a software package that solves a (random) dense linear 7 | system in double precision (64 bits) arithmetic on 8 | distributed-memory computers. It can thus be regarded as a 9 | portable as well as freely available implementation of the 10 | High Performance Computing Linpack Benchmark. 11 | 12 | The HPL software package requires the availibility on your 13 | system of an implementation of the Message Passing Interface 14 | MPI (1.1 compliant). An implementation of either the Basic 15 | Linear Algebra Subprograms BLAS or the Vector Signal Image 16 | Processing Library VSIPL is also needed. Machine-specific as 17 | well as generic implementations of MPI, the BLAS and VSIPL 18 | are available for a large variety of systems. 19 | 20 | Install See the file INSTALL in this directory. 21 | ------- 22 | 23 | Tuning See the file TUNING in this directory. 24 | ------ 25 | 26 | Bugs Known problems and bugs with this release are documen- 27 | ---- ted in the file hpl/BUGS. 28 | 29 | Check out the website www.netlib.org/benchmark/hpl for the 30 | latest information. 31 | 32 | ============================================================== 33 | -------------------------------------------------------------------------------- /hpl/TODO: -------------------------------------------------------------------------------- 1 | ============================================================== 2 | High Performance Computing Linpack Benchmark (HPL) 3 | HPL 2.0 - September 10, 2008 4 | ============================================================== 5 | 6 | Done list in version 1.0b, December 15th, 2004 7 | - Fixed problem with 32-bit integer overflow. 8 | Thanks to John Baron. 9 | 10 | Done list in version 1.0a, January 1st, 2004 11 | - Added Row- or Column-major process mapping in data file 12 | - Fixed compilation error for gcc 3.3 in walltime. 13 | - Fixed building problems on the T3E; 14 | Thanks to Edward Anderson. 15 | 16 | ============================================================== 17 | -------------------------------------------------------------------------------- /hpl/include/hpccmema.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ 2 | 3 | #ifndef HPCCMEMA_H 4 | #define HPCCMEMA_H 1 5 | 6 | #ifdef HPCC_MEMALLCTR 7 | extern int HPCC_alloc_init(size_t total_size); 8 | extern int HPCC_alloc_finalize(); 9 | extern void *HPCC_malloc(size_t size); 10 | extern void HPCC_free(void *ptr); 11 | #define HPCC_fftw_malloc HPCC_malloc 12 | #define HPCC_fftw_free HPCC_free 13 | #define HPCC_XMALLOC(t,s) ((t*)HPCC_malloc(sizeof(t)*(s))) 14 | #else 15 | #define HPCC_malloc malloc 16 | #define HPCC_free free 17 | #define HPCC_fftw_malloc fftw_malloc 18 | #define HPCC_fftw_free fftw_free 19 | #define HPCC_XMALLOC(t,s) XMALLOC(t,s) 20 | #endif 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_abort.3: -------------------------------------------------------------------------------- 1 | .TH HPL_abort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_abort \- halts execution. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_abort(\fR 9 | \fB\&int\fR 10 | \fI\&LINE\fR, 11 | \fB\&const char *\fR 12 | \fI\&SRNAME\fR, 13 | \fB\&const char *\fR 14 | \fI\&FORM\fR, 15 | \fB\&...\fR 16 | \fB\&);\fR 17 | .SH DESCRIPTION 18 | \fB\&HPL_abort\fR 19 | displays an error message on stderr and halts execution. 20 | .SH ARGUMENTS 21 | .TP 8 22 | LINE (local input) int 23 | On entry, LINE specifies the line number in the file where 24 | the error has occured. When LINE is not a positive line 25 | number, it is ignored. 26 | .TP 8 27 | SRNAME (local input) const char * 28 | On entry, SRNAME should be the name of the routine calling 29 | this error handler. 30 | .TP 8 31 | FORM (local input) const char * 32 | On entry, FORM specifies the format, i.e., how the subsequent 33 | arguments are converted for output. 34 | .TP 8 35 | (local input) ... 36 | On entry, ... is the list of arguments to be printed within 37 | the format string. 38 | .SH EXAMPLE 39 | \fI\&#include "hpl.h"\fR 40 | 41 | int main(int argc, char *argv[]) 42 | .br 43 | { 44 | .br 45 | HPL_abort( __LINE__, __FILE__, "Halt.\en" ); 46 | .br 47 | exit(0); return(0); 48 | .br 49 | } 50 | .SH SEE ALSO 51 | .BR HPL_fprintf \ (3), 52 | .BR HPL_warn \ (3). 53 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_all_reduce.3: -------------------------------------------------------------------------------- 1 | .TH HPL_all_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_all_reduce \- All reduce operation. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_all_reduce(\fR 9 | \fB\&void *\fR 10 | \fI\&BUFFER\fR, 11 | \fB\&const int\fR 12 | \fI\&COUNT\fR, 13 | \fB\&const HPL_T_TYPE\fR 14 | \fI\&DTYPE\fR, 15 | \fB\&const HPL_T_OP \fR 16 | \fI\&OP\fR, 17 | \fB\&MPI_Comm\fR 18 | \fI\&COMM\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_all_reduce\fR 22 | performs a global reduce operation across all 23 | processes of a group leaving the results on all processes. 24 | .SH ARGUMENTS 25 | .TP 8 26 | BUFFER (local input/global out void * 27 | On entry, BUFFER points to the buffer to be combined. On 28 | exit, this array contains the combined data and is identical 29 | on all processes in the group. 30 | .TP 8 31 | COUNT (global input) const int 32 | On entry, COUNT indicates the number of entries in BUFFER. 33 | COUNT must be at least zero. 34 | .TP 8 35 | DTYPE (global input) const HPL_T_TYPE 36 | On entry, DTYPE specifies the type of the buffers operands. 37 | .TP 8 38 | OP (global input) const HPL_T_OP 39 | On entry, OP is a pointer to the local combine function. 40 | .TP 8 41 | COMM (global/local input) MPI_Comm 42 | The MPI communicator identifying the process collection. 43 | .SH SEE ALSO 44 | .BR HPL_broadcast \ (3), 45 | .BR HPL_reduce \ (3), 46 | .BR HPL_barrier \ (3), 47 | .BR HPL_min \ (3), 48 | .BR HPL_max \ (3), 49 | .BR HPL_sum \ (3). 50 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_barrier.3: -------------------------------------------------------------------------------- 1 | .TH HPL_barrier 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_barrier \- Barrier operation. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_barrier(\fR 9 | \fB\&MPI_Comm\fR 10 | \fI\&COMM\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_barrier\fR 14 | blocks the caller until all process members have call it. 15 | The call returns at any process only after all group members have 16 | entered the call. 17 | .SH ARGUMENTS 18 | .TP 8 19 | COMM (global/local input) MPI_Comm 20 | The MPI communicator identifying the process collection. 21 | .SH SEE ALSO 22 | .BR HPL_broadcast \ (3), 23 | .BR HPL_reduce \ (3), 24 | .BR HPL_all_reduce \ (3), 25 | .BR HPL_min \ (3), 26 | .BR HPL_max \ (3), 27 | .BR HPL_sum \ (3). 28 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_bcast.3: -------------------------------------------------------------------------------- 1 | .TH HPL_bcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_bcast \- Perform the row broadcast. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_bcast(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR, 11 | \fB\&int *\fR 12 | \fI\&IFLAG\fR 13 | \fB\&);\fR 14 | .SH DESCRIPTION 15 | \fB\&HPL_bcast\fR 16 | broadcasts the current panel. Successful completion is 17 | indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to 18 | HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was 19 | not completed, in which case this function should be called again. 20 | .SH ARGUMENTS 21 | .TP 8 22 | PANEL (input/output) HPL_T_panel * 23 | On entry, PANEL points to the current panel data structure 24 | being broadcast. 25 | .TP 8 26 | IFLAG (output) int * 27 | On exit, IFLAG indicates whether or not the broadcast has 28 | occured. 29 | .SH SEE ALSO 30 | .BR HPL_binit \ (3), 31 | .BR HPL_bwait \ (3). 32 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_binit.3: -------------------------------------------------------------------------------- 1 | .TH HPL_binit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_binit \- Initialize the row broadcast. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_binit(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_binit\fR 14 | initializes a row broadcast. Successful completion is 15 | indicated by the returned error code HPL_SUCCESS. 16 | .SH ARGUMENTS 17 | .TP 8 18 | PANEL (input/output) HPL_T_panel * 19 | On entry, PANEL points to the current panel data structure 20 | being broadcast. 21 | .SH SEE ALSO 22 | .BR HPL_bcast \ (3), 23 | .BR HPL_bwait \ (3). 24 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_broadcast.3: -------------------------------------------------------------------------------- 1 | .TH HPL_broadcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_broadcast \- Broadcast operation. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_broadcast(\fR 9 | \fB\&void *\fR 10 | \fI\&BUFFER\fR, 11 | \fB\&const int\fR 12 | \fI\&COUNT\fR, 13 | \fB\&const HPL_T_TYPE\fR 14 | \fI\&DTYPE\fR, 15 | \fB\&const int\fR 16 | \fI\&ROOT\fR, 17 | \fB\&MPI_Comm\fR 18 | \fI\&COMM\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_broadcast\fR 22 | broadcasts a message from the process with rank ROOT to 23 | all processes in the group. 24 | .SH ARGUMENTS 25 | .TP 8 26 | BUFFER (local input/output) void * 27 | On entry, BUFFER points to the buffer to be broadcast. On 28 | exit, this array contains the broadcast data and is identical 29 | on all processes in the group. 30 | .TP 8 31 | COUNT (global input) const int 32 | On entry, COUNT indicates the number of entries in BUFFER. 33 | COUNT must be at least zero. 34 | .TP 8 35 | DTYPE (global input) const HPL_T_TYPE 36 | On entry, DTYPE specifies the type of the buffers operands. 37 | .TP 8 38 | ROOT (global input) const int 39 | On entry, ROOT is the coordinate of the source process. 40 | .TP 8 41 | COMM (global/local input) MPI_Comm 42 | The MPI communicator identifying the process collection. 43 | .SH SEE ALSO 44 | .BR HPL_reduce \ (3), 45 | .BR HPL_all_reduce \ (3), 46 | .BR HPL_barrier \ (3), 47 | .BR HPL_min \ (3), 48 | .BR HPL_max \ (3), 49 | .BR HPL_sum \ (3). 50 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_bwait.3: -------------------------------------------------------------------------------- 1 | .TH HPL_bwait 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_bwait \- Finalize the row broadcast. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_bwait(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_bwait\fR 14 | HPL_bwait waits for the row broadcast of the current panel to 15 | terminate. Successful completion is indicated by the returned error 16 | code HPL_SUCCESS. 17 | .SH ARGUMENTS 18 | .TP 8 19 | PANEL (input/output) HPL_T_panel * 20 | On entry, PANEL points to the current panel data structure 21 | being broadcast. 22 | .SH SEE ALSO 23 | .BR HPL_binit \ (3), 24 | .BR HPL_bcast \ (3). 25 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_copyL.3: -------------------------------------------------------------------------------- 1 | .TH HPL_copyL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_copyL \- Copy the current panel into a contiguous workspace. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_copyL(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_copyL\fR 14 | copies the panel of columns, the L1 replicated submatrix, 15 | the pivot array and the info scalar into a contiguous workspace for 16 | later broadcast. 17 | 18 | The copy of this panel into a contiguous buffer can be enforced by 19 | specifying -DHPL_COPY_L in the architecture specific Makefile. 20 | .SH ARGUMENTS 21 | .TP 8 22 | PANEL (input/output) HPL_T_panel * 23 | On entry, PANEL points to the current panel data structure 24 | being broadcast. 25 | .SH SEE ALSO 26 | .BR HPL_binit \ (3), 27 | .BR HPL_bcast \ (3), 28 | .BR HPL_bwait \ (3). 29 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dcopy.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dcopy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dcopy \- y := x. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dcopy(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const double *\fR 12 | \fI\&X\fR, 13 | \fB\&const int\fR 14 | \fI\&INCX\fR, 15 | \fB\&double *\fR 16 | \fI\&Y\fR, 17 | \fB\&const int\fR 18 | \fI\&INCY\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_dcopy\fR 22 | copies the vector x into the vector y. 23 | .SH ARGUMENTS 24 | .TP 8 25 | N (local input) const int 26 | On entry, N specifies the length of the vectors x and y. N 27 | must be at least zero. 28 | .TP 8 29 | X (local input) const double * 30 | On entry, X is an incremented array of dimension at least 31 | ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. 32 | .TP 8 33 | INCX (local input) const int 34 | On entry, INCX specifies the increment for the elements of X. 35 | INCX must not be zero. 36 | .TP 8 37 | Y (local input/output) double * 38 | On entry, Y is an incremented array of dimension at least 39 | ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. 40 | On exit, the entries of the incremented array Y are updated 41 | with the entries of the incremented array X. 42 | .TP 8 43 | INCY (local input) const int 44 | On entry, INCY specifies the increment for the elements of Y. 45 | INCY must not be zero. 46 | .SH EXAMPLE 47 | \fI\&#include "hpl.h"\fR 48 | 49 | int main(int argc, char *argv[]) 50 | .br 51 | { 52 | .br 53 | double x[3], y[3]; 54 | .br 55 | x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; 56 | .br 57 | y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; 58 | .br 59 | HPL_dcopy( 3, x, 1, y, 1 ); 60 | .br 61 | printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); 62 | .br 63 | exit(0); return(0); 64 | .br 65 | } 66 | .SH SEE ALSO 67 | .BR HPL_daxpy \ (3), 68 | .BR HPL_dscal \ (3), 69 | .BR HPL_dswap \ (3). 70 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlacpy.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlacpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlacpy \- B := A. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlacpy(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&const double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&LDA\fR, 17 | \fB\&double *\fR 18 | \fI\&B\fR, 19 | \fB\&const int\fR 20 | \fI\&LDB\fR 21 | \fB\&);\fR 22 | .SH DESCRIPTION 23 | \fB\&HPL_dlacpy\fR 24 | copies an array A into an array B. 25 | .SH ARGUMENTS 26 | .TP 8 27 | M (local input) const int 28 | On entry, M specifies the number of rows of the arrays A and 29 | B. M must be at least zero. 30 | .TP 8 31 | N (local input) const int 32 | On entry, N specifies the number of columns of the arrays A 33 | and B. N must be at least zero. 34 | .TP 8 35 | A (local input) const double * 36 | On entry, A points to an array of dimension (LDA,N). 37 | .TP 8 38 | LDA (local input) const int 39 | On entry, LDA specifies the leading dimension of the array A. 40 | LDA must be at least MAX(1,M). 41 | .TP 8 42 | B (local output) double * 43 | On entry, B points to an array of dimension (LDB,N). On exit, 44 | B is overwritten with A. 45 | .TP 8 46 | LDB (local input) const int 47 | On entry, LDB specifies the leading dimension of the array B. 48 | LDB must be at least MAX(1,M). 49 | .SH EXAMPLE 50 | \fI\&#include "hpl.h"\fR 51 | 52 | int main(int argc, char *argv[]) 53 | .br 54 | { 55 | .br 56 | double a[2*2], b[2*2]; 57 | .br 58 | a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; 59 | .br 60 | HPL_dlacpy( 2, 2, a, 2, b, 2 ); 61 | .br 62 | printf(" [%f,%f]\en", b[0], b[2]); 63 | .br 64 | printf("b=[%f,%f]\en", b[1], b[3]); 65 | .br 66 | exit(0); 67 | .br 68 | return(0); 69 | .br 70 | } 71 | .SH SEE ALSO 72 | .BR HPL_dlatcpy \ (3). 73 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlaprnt.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlaprnt 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlaprnt \- Print the matrix A. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlaprnt(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&IA\fR, 17 | \fB\&const int\fR 18 | \fI\&JA\fR, 19 | \fB\&const int\fR 20 | \fI\&LDA\fR, 21 | \fB\&const char *\fR 22 | \fI\&CMATNM\fR 23 | \fB\&);\fR 24 | .SH DESCRIPTION 25 | \fB\&HPL_dlaprnt\fR 26 | prints to standard error an M-by-N matrix A. 27 | .SH ARGUMENTS 28 | .TP 8 29 | M (local input) const int 30 | On entry, M specifies the number of rows of A. M must be at 31 | least zero. 32 | .TP 8 33 | N (local input) const int 34 | On entry, N specifies the number of columns of A. N must be 35 | at least zero. 36 | .TP 8 37 | A (local input) double * 38 | On entry, A points to an array of dimension (LDA,N). 39 | .TP 8 40 | IA (local input) const int 41 | On entry, IA specifies the starting row index to be printed. 42 | .TP 8 43 | JA (local input) const int 44 | On entry, JA specifies the starting column index to be 45 | printed. 46 | .TP 8 47 | LDA (local input) const int 48 | On entry, LDA specifies the leading dimension of the array A. 49 | LDA must be at least max(1,M). 50 | .TP 8 51 | CMATNM (local input) const char * 52 | On entry, CMATNM is the name of the matrix to be printed. 53 | .SH EXAMPLE 54 | \fI\&#include "hpl.h"\fR 55 | 56 | int main(int argc, char *argv[]) 57 | .br 58 | { 59 | .br 60 | double a[2*2]; 61 | .br 62 | a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; 63 | .br 64 | HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" ); 65 | .br 66 | exit(0); return(0); 67 | .br 68 | } 69 | .SH SEE ALSO 70 | .BR HPL_fprintf \ (3). 71 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlaswp00N.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlaswp00N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlaswp00N \- performs a series of row interchanges. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlaswp00N(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&LDA\fR, 17 | \fB\&const int *\fR 18 | \fI\&IPIV\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_dlaswp00N\fR 22 | performs a series of local row interchanges on a matrix 23 | A. One row interchange is initiated for rows 0 through M-1 of A. 24 | .SH ARGUMENTS 25 | .TP 8 26 | M (local input) const int 27 | On entry, M specifies the number of rows of the array A to be 28 | interchanged. M must be at least zero. 29 | .TP 8 30 | N (local input) const int 31 | On entry, N specifies the number of columns of the array A. 32 | N must be at least zero. 33 | .TP 8 34 | A (local input/output) double * 35 | On entry, A points to an array of dimension (LDA,N) to which 36 | the row interchanges will be applied. On exit, the permuted 37 | matrix. 38 | .TP 8 39 | LDA (local input) const int 40 | On entry, LDA specifies the leading dimension of the array A. 41 | LDA must be at least MAX(1,M). 42 | .TP 8 43 | IPIV (local input) const int * 44 | On entry, IPIV is an array of size M that contains the 45 | pivoting information. For k in [0..M), IPIV[k]=IROFF + l 46 | implies that local rows k and l are to be interchanged. 47 | .SH SEE ALSO 48 | .BR HPL_dlaswp00N \ (3), 49 | .BR HPL_dlaswp10N \ (3), 50 | .BR HPL_dlaswp01N \ (3), 51 | .BR HPL_dlaswp01T \ (3), 52 | .BR HPL_dlaswp02N \ (3), 53 | .BR HPL_dlaswp03N \ (3), 54 | .BR HPL_dlaswp03T \ (3), 55 | .BR HPL_dlaswp04N \ (3), 56 | .BR HPL_dlaswp04T \ (3), 57 | .BR HPL_dlaswp05N \ (3), 58 | .BR HPL_dlaswp05T \ (3), 59 | .BR HPL_dlaswp06N \ (3), 60 | .BR HPL_dlaswp06T \ (3). 61 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlaswp10N.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlaswp10N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlaswp10N \- performs a series column interchanges. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlaswp10N(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&LDA\fR, 17 | \fB\&const int *\fR 18 | \fI\&IPIV\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_dlaswp10N\fR 22 | performs a sequence of local column interchanges on a 23 | matrix A. One column interchange is initiated for columns 0 through 24 | N-1 of A. 25 | .SH ARGUMENTS 26 | .TP 8 27 | M (local input) const int 28 | __arg0__ 29 | .TP 8 30 | N (local input) const int 31 | On entry, M specifies the number of rows of the array A. M 32 | must be at least zero. 33 | .TP 8 34 | A (local input/output) double * 35 | On entry, N specifies the number of columns of the array A. N 36 | must be at least zero. 37 | .TP 8 38 | LDA (local input) const int 39 | On entry, A points to an array of dimension (LDA,N). This 40 | array contains the columns onto which the interchanges should 41 | be applied. On exit, A contains the permuted matrix. 42 | .TP 8 43 | IPIV (local input) const int * 44 | On entry, LDA specifies the leading dimension of the array A. 45 | LDA must be at least MAX(1,M). 46 | .SH SEE ALSO 47 | .BR HPL_dlaswp00N \ (3), 48 | .BR HPL_dlaswp10N \ (3), 49 | .BR HPL_dlaswp01N \ (3), 50 | .BR HPL_dlaswp01T \ (3), 51 | .BR HPL_dlaswp02N \ (3), 52 | .BR HPL_dlaswp03N \ (3), 53 | .BR HPL_dlaswp03T \ (3), 54 | .BR HPL_dlaswp04N \ (3), 55 | .BR HPL_dlaswp04T \ (3), 56 | .BR HPL_dlaswp05N \ (3), 57 | .BR HPL_dlaswp05T \ (3), 58 | .BR HPL_dlaswp06N \ (3), 59 | .BR HPL_dlaswp06T \ (3). 60 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlatcpy.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlatcpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlatcpy \- B := A^T 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlatcpy(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&const double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&LDA\fR, 17 | \fB\&double *\fR 18 | \fI\&B\fR, 19 | \fB\&const int\fR 20 | \fI\&LDB\fR 21 | \fB\&);\fR 22 | .SH DESCRIPTION 23 | \fB\&HPL_dlatcpy\fR 24 | copies the transpose of an array A into an array B. 25 | .SH ARGUMENTS 26 | .TP 8 27 | M (local input) const int 28 | On entry, M specifies the number of rows of the array B and 29 | the number of columns of A. M must be at least zero. 30 | .TP 8 31 | N (local input) const int 32 | On entry, N specifies the number of rows of the array A and 33 | the number of columns of B. N must be at least zero. 34 | .TP 8 35 | A (local input) const double * 36 | On entry, A points to an array of dimension (LDA,M). 37 | .TP 8 38 | LDA (local input) const int 39 | On entry, LDA specifies the leading dimension of the array A. 40 | LDA must be at least MAX(1,N). 41 | .TP 8 42 | B (local output) double * 43 | On entry, B points to an array of dimension (LDB,N). On exit, 44 | B is overwritten with the transpose of A. 45 | .TP 8 46 | LDB (local input) const int 47 | On entry, LDB specifies the leading dimension of the array B. 48 | LDB must be at least MAX(1,M). 49 | .SH EXAMPLE 50 | \fI\&#include "hpl.h"\fR 51 | 52 | int main(int argc, char *argv[]) 53 | .br 54 | { 55 | .br 56 | double a[2*2], b[2*2]; 57 | .br 58 | a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; 59 | .br 60 | HPL_dlacpy( 2, 2, a, 2, b, 2 ); 61 | .br 62 | printf(" [%f,%f]\en", b[0], b[2]); 63 | .br 64 | printf("b=[%f,%f]\en", b[1], b[3]); 65 | .br 66 | exit(0); return(0); 67 | .br 68 | } 69 | .SH SEE ALSO 70 | .BR HPL_dlacpy \ (3). 71 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dlocswpT.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dlocswpT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dlocswpT \- locally swaps rows within panel. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dlocswpT(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR, 11 | \fB\&const int\fR 12 | \fI\&II\fR, 13 | \fB\&const int\fR 14 | \fI\&JJ\fR, 15 | \fB\&double *\fR 16 | \fI\&WORK\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_dlocswpT\fR 20 | performs the local swapping operations within a panel. 21 | The lower triangular N0-by-N0 upper block of the panel is stored in 22 | transpose form. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PANEL (local input/output) HPL_T_panel * 26 | On entry, PANEL points to the data structure containing the 27 | panel information. 28 | .TP 8 29 | II (local input) const int 30 | On entry, II specifies the row offset where the column to be 31 | operated on starts with respect to the panel. 32 | .TP 8 33 | JJ (local input) const int 34 | On entry, JJ specifies the column offset where the column to 35 | be operated on starts with respect to the panel. 36 | .TP 8 37 | WORK (local workspace) double * 38 | On entry, WORK is a workarray of size at least 2 * (4+2*N0). 39 | WORK[0] contains the local maximum absolute value scalar, 40 | WORK[1] contains the corresponding local row index, WORK[2] 41 | contains the corresponding global row index, and WORK[3] is 42 | the coordinate of process owning this max. The N0 length max 43 | row is stored in WORK[4:4+N0-1]; Note that this is also the 44 | JJth row (or column) of L1. The remaining part of this array 45 | is used as workspace. 46 | .SH SEE ALSO 47 | .BR HPL_dlocmax \ (3), 48 | .BR HPL_dlocswpN \ (3), 49 | .BR HPL_pdmxswp \ (3), 50 | .BR HPL_pdpancrN \ (3), 51 | .BR HPL_pdpancrT \ (3), 52 | .BR HPL_pdpanllN \ (3), 53 | .BR HPL_pdpanllT \ (3), 54 | .BR HPL_pdpanrlN \ (3), 55 | .BR HPL_pdpanrlT \ (3), 56 | .BR HPL_pdrpancrN \ (3), 57 | .BR HPL_pdrpancrT \ (3), 58 | .BR HPL_pdrpanllN \ (3), 59 | .BR HPL_pdrpanllT \ (3), 60 | .BR HPL_pdrpanrlN \ (3), 61 | .BR HPL_pdrpanrlT \ (3), 62 | .BR HPL_pdfact \ (3). 63 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dmatgen.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dmatgen 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dmatgen \- random matrix generator. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dmatgen(\fR 9 | \fB\&const int\fR 10 | \fI\&M\fR, 11 | \fB\&const int\fR 12 | \fI\&N\fR, 13 | \fB\&double *\fR 14 | \fI\&A\fR, 15 | \fB\&const int\fR 16 | \fI\&LDA\fR, 17 | \fB\&const int\fR 18 | \fI\&ISEED\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_dmatgen\fR 22 | generates (or regenerates) a random matrix A. 23 | 24 | The pseudo-random generator uses the linear congruential algorithm: 25 | X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer 26 | Programming, Knuth 1973, Vol. 2. 27 | .SH ARGUMENTS 28 | .TP 8 29 | M (input) const int 30 | On entry, M specifies the number of rows of the matrix A. 31 | M must be at least zero. 32 | .TP 8 33 | N (input) const int 34 | On entry, N specifies the number of columns of the matrix A. 35 | N must be at least zero. 36 | .TP 8 37 | A (output) double * 38 | On entry, A points to an array of dimension (LDA,N). On exit, 39 | this array contains the coefficients of the randomly 40 | generated matrix. 41 | .TP 8 42 | LDA (input) const int 43 | On entry, LDA specifies the leading dimension of the array A. 44 | LDA must be at least max(1,M). 45 | .TP 8 46 | ISEED (input) const int 47 | On entry, ISEED specifies the seed number to generate the 48 | matrix A. ISEED must be at least zero. 49 | .SH SEE ALSO 50 | .BR HPL_ladd \ (3), 51 | .BR HPL_lmul \ (3), 52 | .BR HPL_setran \ (3), 53 | .BR HPL_xjumpm \ (3), 54 | .BR HPL_jumpit \ (3), 55 | .BR HPL_rand \ (3). 56 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dscal.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dscal 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dscal \- x = alpha * x. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dscal(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const double\fR 12 | \fI\&ALPHA\fR, 13 | \fB\&double *\fR 14 | \fI\&X\fR, 15 | \fB\&const int\fR 16 | \fI\&INCX\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_dscal\fR 20 | scales the vector x by alpha. 21 | .SH ARGUMENTS 22 | .TP 8 23 | N (local input) const int 24 | On entry, N specifies the length of the vector x. N must be 25 | at least zero. 26 | .TP 8 27 | ALPHA (local input) const double 28 | On entry, ALPHA specifies the scalar alpha. When ALPHA is 29 | supplied as zero, then the entries of the incremented array X 30 | need not be set on input. 31 | .TP 8 32 | X (local input/output) double * 33 | On entry, X is an incremented array of dimension at least 34 | ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. 35 | On exit, the entries of the incremented array X are scaled 36 | by the scalar alpha. 37 | .TP 8 38 | INCX (local input) const int 39 | On entry, INCX specifies the increment for the elements of X. 40 | INCX must not be zero. 41 | .SH EXAMPLE 42 | \fI\&#include "hpl.h"\fR 43 | 44 | int main(int argc, char *argv[]) 45 | .br 46 | { 47 | .br 48 | double x[3]; 49 | .br 50 | x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; 51 | .br 52 | HPL_dscal( 3, 2.0, x, 1 ); 53 | .br 54 | printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); 55 | .br 56 | exit(0); return(0); 57 | .br 58 | } 59 | .SH SEE ALSO 60 | .BR HPL_daxpy \ (3), 61 | .BR HPL_dcopy \ (3), 62 | .BR HPL_dswap \ (3). 63 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_dswap.3: -------------------------------------------------------------------------------- 1 | .TH HPL_dswap 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_dswap \- y <-> x. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_dswap(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&double *\fR 12 | \fI\&X\fR, 13 | \fB\&const int\fR 14 | \fI\&INCX\fR, 15 | \fB\&double *\fR 16 | \fI\&Y\fR, 17 | \fB\&const int\fR 18 | \fI\&INCY\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_dswap\fR 22 | swaps the vectors x and y. 23 | .SH ARGUMENTS 24 | .TP 8 25 | N (local input) const int 26 | On entry, N specifies the length of the vectors x and y. N 27 | must be at least zero. 28 | .TP 8 29 | X (local input/output) double * 30 | On entry, X is an incremented array of dimension at least 31 | ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. 32 | On exit, the entries of the incremented array X are updated 33 | with the entries of the incremented array Y. 34 | .TP 8 35 | INCX (local input) const int 36 | On entry, INCX specifies the increment for the elements of X. 37 | INCX must not be zero. 38 | .TP 8 39 | Y (local input/output) double * 40 | On entry, Y is an incremented array of dimension at least 41 | ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. 42 | On exit, the entries of the incremented array Y are updated 43 | with the entries of the incremented array X. 44 | .TP 8 45 | INCY (local input) const int 46 | On entry, INCY specifies the increment for the elements of Y. 47 | INCY must not be zero. 48 | .SH EXAMPLE 49 | \fI\&#include "hpl.h"\fR 50 | 51 | int main(int argc, char *argv[]) 52 | .br 53 | { 54 | .br 55 | double x[3], y[3]; 56 | .br 57 | x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; 58 | .br 59 | y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; 60 | .br 61 | HPL_dswap( 3, x, 1, y, 1 ); 62 | .br 63 | printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); 64 | .br 65 | printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); 66 | .br 67 | exit(0); return(0); 68 | .br 69 | } 70 | .SH SEE ALSO 71 | .BR HPL_daxpy \ (3), 72 | .BR HPL_dcopy \ (3), 73 | .BR HPL_dscal \ (3). 74 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_fprintf.3: -------------------------------------------------------------------------------- 1 | .TH HPL_fprintf 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_fprintf \- fprintf + fflush wrapper. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_fprintf(\fR 9 | \fB\&FILE *\fR 10 | \fI\&STREAM\fR, 11 | \fB\&const char *\fR 12 | \fI\&FORM\fR, 13 | \fB\&...\fR 14 | \fB\&);\fR 15 | .SH DESCRIPTION 16 | \fB\&HPL_fprintf\fR 17 | is a wrapper around fprintf flushing the output stream. 18 | .SH ARGUMENTS 19 | .TP 8 20 | STREAM (local input) FILE * 21 | On entry, STREAM specifies the output stream. 22 | .TP 8 23 | FORM (local input) const char * 24 | On entry, FORM specifies the format, i.e., how the subsequent 25 | arguments are converted for output. 26 | .TP 8 27 | (local input) ... 28 | On entry, ... is the list of arguments to be printed within 29 | the format string. 30 | .SH EXAMPLE 31 | \fI\&#include "hpl.h"\fR 32 | 33 | int main(int argc, char *argv[]) 34 | .br 35 | { 36 | .br 37 | HPL_fprintf( stdout, "Hello World.\en" ); 38 | .br 39 | exit(0); return(0); 40 | .br 41 | } 42 | .SH SEE ALSO 43 | .BR HPL_abort \ (3), 44 | .BR HPL_warn \ (3). 45 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_grid_exit.3: -------------------------------------------------------------------------------- 1 | .TH HPL_grid_exit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_grid_exit \- Exit process grid. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_grid_exit(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_grid_exit\fR 14 | marks the process grid object for deallocation. The 15 | returned error code MPI_SUCCESS indicates successful completion. 16 | Other error codes are (MPI) implementation dependent. 17 | .SH ARGUMENTS 18 | .TP 8 19 | GRID (local input/output) HPL_T_grid * 20 | On entry, GRID points to the data structure containing the 21 | process grid to be released. 22 | .SH SEE ALSO 23 | .BR HPL_pnum \ (3), 24 | .BR HPL_grid_init \ (3), 25 | .BR HPL_grid_info \ (3). 26 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_grid_info.3: -------------------------------------------------------------------------------- 1 | .TH HPL_grid_info 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_grid_info \- Retrieve grid information. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_grid_info(\fR 9 | \fB\&const HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&int *\fR 12 | \fI\&NPROW\fR, 13 | \fB\&int *\fR 14 | \fI\&NPCOL\fR, 15 | \fB\&int *\fR 16 | \fI\&MYROW\fR, 17 | \fB\&int *\fR 18 | \fI\&MYCOL\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_grid_info\fR 22 | returns the grid shape and the coordinates in the grid 23 | of the calling process. Successful completion is indicated by the 24 | returned error code MPI_SUCCESS. Other error codes depend on the MPI 25 | implementation. 26 | .SH ARGUMENTS 27 | .TP 8 28 | GRID (local input) const HPL_T_grid * 29 | On entry, GRID points to the data structure containing the 30 | process grid information. 31 | .TP 8 32 | NPROW (global output) int * 33 | On exit, NPROW specifies the number of process rows in the 34 | grid. NPROW is at least one. 35 | .TP 8 36 | NPCOL (global output) int * 37 | On exit, NPCOL specifies the number of process columns in 38 | the grid. NPCOL is at least one. 39 | .TP 8 40 | MYROW (global output) int * 41 | On exit, MYROW specifies my row process coordinate in the 42 | grid. MYROW is greater than or equal to zero and less than 43 | NPROW. 44 | .TP 8 45 | MYCOL (global output) int * 46 | On exit, MYCOL specifies my column process coordinate in the 47 | grid. MYCOL is greater than or equal to zero and less than 48 | NPCOL. 49 | .SH SEE ALSO 50 | .BR HPL_pnum \ (3), 51 | .BR HPL_grid_init \ (3), 52 | .BR HPL_grid_exit \ (3). 53 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_grid_init.3: -------------------------------------------------------------------------------- 1 | .TH HPL_grid_init 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_grid_init \- Create a process grid. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_grid_init(\fR 9 | \fB\&MPI_Comm\fR 10 | \fI\&COMM\fR, 11 | \fB\&const HPL_T_ORDER\fR 12 | \fI\&ORDER\fR, 13 | \fB\&const int\fR 14 | \fI\&NPROW\fR, 15 | \fB\&const int\fR 16 | \fI\&NPCOL\fR, 17 | \fB\&HPL_T_grid *\fR 18 | \fI\&GRID\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_grid_init\fR 22 | creates a NPROW x NPCOL process grid using column- or 23 | row-major ordering from an initial collection of processes identified 24 | by an MPI communicator. Successful completion is indicated by the 25 | returned error code MPI_SUCCESS. Other error codes depend on the MPI 26 | implementation. The coordinates of processes that are not part of the 27 | grid are set to values outside of [0..NPROW) x [0..NPCOL). 28 | .SH ARGUMENTS 29 | .TP 8 30 | COMM (global/local input) MPI_Comm 31 | On entry, COMM is the MPI communicator identifying the 32 | initial collection of processes out of which the grid is 33 | formed. 34 | .TP 8 35 | ORDER (global input) const HPL_T_ORDER 36 | On entry, ORDER specifies how the processes should be ordered 37 | in the grid as follows: 38 | ORDER = HPL_ROW_MAJOR row-major ordering; 39 | ORDER = HPL_COLUMN_MAJOR column-major ordering; 40 | .TP 8 41 | NPROW (global input) const int 42 | On entry, NPROW specifies the number of process rows in the 43 | grid to be created. NPROW must be at least one. 44 | .TP 8 45 | NPCOL (global input) const int 46 | On entry, NPCOL specifies the number of process columns in 47 | the grid to be created. NPCOL must be at least one. 48 | .TP 8 49 | GRID (local input/output) HPL_T_grid * 50 | On entry, GRID points to the data structure containing the 51 | process grid information to be initialized. 52 | .SH SEE ALSO 53 | .BR HPL_pnum \ (3), 54 | .BR HPL_grid_info \ (3), 55 | .BR HPL_grid_exit \ (3). 56 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_idamax.3: -------------------------------------------------------------------------------- 1 | .TH HPL_idamax 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|). 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_idamax(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const double *\fR 12 | \fI\&X\fR, 13 | \fB\&const int\fR 14 | \fI\&INCX\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_idamax\fR 18 | returns the index in an n-vector x of the first element 19 | having maximum absolute value. 20 | .SH ARGUMENTS 21 | .TP 8 22 | N (local input) const int 23 | On entry, N specifies the length of the vector x. N must be 24 | at least zero. 25 | .TP 8 26 | X (local input) const double * 27 | On entry, X is an incremented array of dimension at least 28 | ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. 29 | .TP 8 30 | INCX (local input) const int 31 | On entry, INCX specifies the increment for the elements of X. 32 | INCX must not be zero. 33 | .SH EXAMPLE 34 | \fI\&#include "hpl.h"\fR 35 | 36 | int main(int argc, char *argv[]) 37 | .br 38 | { 39 | .br 40 | double x[3]; 41 | .br 42 | int imax; 43 | .br 44 | x[0] = 1.0; x[1] = 3.0; x[2] = 2.0; 45 | .br 46 | imax = HPL_idamax( 3, x, 1 ); 47 | .br 48 | printf("imax=%d\en", imax); 49 | .br 50 | exit(0); 51 | .br 52 | return(0); 53 | .br 54 | } 55 | .SH SEE ALSO 56 | .BR HPL_daxpy \ (3), 57 | .BR HPL_dcopy \ (3), 58 | .BR HPL_dscal \ (3), 59 | .BR HPL_dswap \ (3). 60 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_indxg2l.3: -------------------------------------------------------------------------------- 1 | .TH HPL_indxg2l 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_indxg2l \- Map a global index into a local one. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_indxg2l(\fR 9 | \fB\&const int\fR 10 | \fI\&IG\fR, 11 | \fB\&const int\fR 12 | \fI\&INB\fR, 13 | \fB\&const int\fR 14 | \fI\&NB\fR, 15 | \fB\&const int\fR 16 | \fI\&SRCPROC\fR, 17 | \fB\&const int\fR 18 | \fI\&NPROCS\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_indxg2l\fR 22 | computes the local index of a matrix entry pointed to by 23 | the global index IG. This local returned index is the same in all 24 | processes. 25 | .SH ARGUMENTS 26 | .TP 8 27 | IG (input) const int 28 | On entry, IG specifies the global index of the matrix entry. 29 | IG must be at least zero. 30 | .TP 8 31 | INB (input) const int 32 | On entry, INB specifies the size of the first block of the 33 | global matrix. INB must be at least one. 34 | .TP 8 35 | NB (input) const int 36 | On entry, NB specifies the blocking factor used to partition 37 | and distribute the matrix. NB must be larger than one. 38 | .TP 8 39 | SRCPROC (input) const int 40 | On entry, if SRCPROC = -1, the data is not distributed but 41 | replicated, in which case this routine returns IG in all 42 | processes. Otherwise, the value of SRCPROC is ignored. 43 | .TP 8 44 | NPROCS (input) const int 45 | On entry, NPROCS specifies the total number of process rows 46 | or columns over which the matrix is distributed. NPROCS must 47 | be at least one. 48 | .SH SEE ALSO 49 | .BR HPL_indxg2lp \ (3), 50 | .BR HPL_indxg2p \ (3), 51 | .BR HPL_indxl2g \ (3), 52 | .BR HPL_numroc \ (3), 53 | .BR HPL_numrocI \ (3). 54 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_indxg2p.3: -------------------------------------------------------------------------------- 1 | .TH HPL_indxg2p 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_indxg2p \- Map a global index into a process coordinate. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_indxg2p(\fR 9 | \fB\&const int\fR 10 | \fI\&IG\fR, 11 | \fB\&const int\fR 12 | \fI\&INB\fR, 13 | \fB\&const int\fR 14 | \fI\&NB\fR, 15 | \fB\&const int\fR 16 | \fI\&SRCPROC\fR, 17 | \fB\&const int\fR 18 | \fI\&NPROCS\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_indxg2p\fR 22 | computes the process coordinate which posseses the entry 23 | of a matrix specified by a global index IG. 24 | .SH ARGUMENTS 25 | .TP 8 26 | IG (input) const int 27 | On entry, IG specifies the global index of the matrix entry. 28 | IG must be at least zero. 29 | .TP 8 30 | INB (input) const int 31 | On entry, INB specifies the size of the first block of the 32 | global matrix. INB must be at least one. 33 | .TP 8 34 | NB (input) const int 35 | On entry, NB specifies the blocking factor used to partition 36 | and distribute the matrix A. NB must be larger than one. 37 | .TP 8 38 | SRCPROC (input) const int 39 | On entry, SRCPROC specifies the coordinate of the process 40 | that possesses the first row or column of the matrix. SRCPROC 41 | must be at least zero and strictly less than NPROCS. 42 | .TP 8 43 | NPROCS (input) const int 44 | On entry, NPROCS specifies the total number of process rows 45 | or columns over which the matrix is distributed. NPROCS must 46 | be at least one. 47 | .SH SEE ALSO 48 | .BR HPL_indxg2l \ (3), 49 | .BR HPL_indxg2p \ (3), 50 | .BR HPL_indxl2g \ (3), 51 | .BR HPL_numroc \ (3), 52 | .BR HPL_numrocI \ (3). 53 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_indxl2g.3: -------------------------------------------------------------------------------- 1 | .TH HPL_indxl2g 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_indxl2g \- Map a index-process pair into a global index. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_indxl2g(\fR 9 | \fB\&const int\fR 10 | \fI\&IL\fR, 11 | \fB\&const int\fR 12 | \fI\&INB\fR, 13 | \fB\&const int\fR 14 | \fI\&NB\fR, 15 | \fB\&const int\fR 16 | \fI\&PROC\fR, 17 | \fB\&const int\fR 18 | \fI\&SRCPROC\fR, 19 | \fB\&const int\fR 20 | \fI\&NPROCS\fR 21 | \fB\&);\fR 22 | .SH DESCRIPTION 23 | \fB\&HPL_indxl2g\fR 24 | computes the global index of a matrix entry pointed to 25 | by the local index IL of the process indicated by PROC. 26 | .SH ARGUMENTS 27 | .TP 8 28 | IL (input) const int 29 | On entry, IL specifies the local index of the matrix entry. 30 | IL must be at least zero. 31 | .TP 8 32 | INB (input) const int 33 | On entry, INB specifies the size of the first block of the 34 | global matrix. INB must be at least one. 35 | .TP 8 36 | NB (input) const int 37 | On entry, NB specifies the blocking factor used to partition 38 | and distribute the matrix A. NB must be larger than one. 39 | .TP 8 40 | PROC (input) const int 41 | On entry, PROC specifies the coordinate of the process whose 42 | local array row or column is to be determined. PROC must be 43 | at least zero and strictly less than NPROCS. 44 | .TP 8 45 | SRCPROC (input) const int 46 | On entry, SRCPROC specifies the coordinate of the process 47 | that possesses the first row or column of the matrix. SRCPROC 48 | must be at least zero and strictly less than NPROCS. 49 | .TP 8 50 | NPROCS (input) const int 51 | On entry, NPROCS specifies the total number of process rows 52 | or columns over which the matrix is distributed. NPROCS must 53 | be at least one. 54 | .SH SEE ALSO 55 | .BR HPL_indxg2l \ (3), 56 | .BR HPL_indxg2lp \ (3), 57 | .BR HPL_indxg2p \ (3), 58 | .BR HPL_numroc \ (3), 59 | .BR HPL_numrocI \ (3). 60 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_jumpit.3: -------------------------------------------------------------------------------- 1 | .TH HPL_jumpit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_jumpit \- jump into the random sequence. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_jumpit(\fR 9 | \fB\&int *\fR 10 | \fI\&MULT\fR, 11 | \fB\&int *\fR 12 | \fI\&IADD\fR, 13 | \fB\&int *\fR 14 | \fI\&IRANN\fR, 15 | \fB\&int *\fR 16 | \fI\&IRANM\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_jumpit\fR 20 | jumps in the random sequence from the number X(n) encoded 21 | in IRANN to the number X(m) encoded in IRANM using the constants A 22 | and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A 23 | and C obviously depend on m and n, see the function HPL_xjumpm in 24 | order to initialize them. 25 | .SH ARGUMENTS 26 | .TP 8 27 | MULT (local input) int * 28 | On entry, MULT is an array of dimension 2, that contains the 29 | 16-lower and 15-higher bits of the constant A. 30 | .TP 8 31 | IADD (local input) int * 32 | On entry, IADD is an array of dimension 2, that contains the 33 | 16-lower and 15-higher bits of the constant C. 34 | .TP 8 35 | IRANN (local input) int * 36 | On entry, IRANN is an array of dimension 2, that contains 37 | the 16-lower and 15-higher bits of the encoding of X(n). 38 | .TP 8 39 | IRANM (local output) int * 40 | On entry, IRANM is an array of dimension 2. On exit, this 41 | array contains respectively the 16-lower and 15-higher bits 42 | of the encoding of X(m). 43 | .SH SEE ALSO 44 | .BR HPL_ladd \ (3), 45 | .BR HPL_lmul \ (3), 46 | .BR HPL_setran \ (3), 47 | .BR HPL_xjumpm \ (3), 48 | .BR HPL_rand \ (3). 49 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_ladd.3: -------------------------------------------------------------------------------- 1 | .TH HPL_ladd 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_ladd \- Adds two long positive integers. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_ladd(\fR 9 | \fB\&int *\fR 10 | \fI\&J\fR, 11 | \fB\&int *\fR 12 | \fI\&K\fR, 13 | \fB\&int *\fR 14 | \fI\&I\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_ladd\fR 18 | adds without carry two long positive integers K and J and 19 | puts the result into I. The long integers I, J, K are encoded on 64 20 | bits using an array of 2 integers. The 32-lower bits are stored in 21 | the first entry of each array, the 32-higher bits in the second 22 | entry. 23 | .SH ARGUMENTS 24 | .TP 8 25 | J (local input) int * 26 | On entry, J is an integer array of dimension 2 containing the 27 | encoded long integer J. 28 | .TP 8 29 | K (local input) int * 30 | On entry, K is an integer array of dimension 2 containing the 31 | encoded long integer K. 32 | .TP 8 33 | I (local output) int * 34 | On entry, I is an integer array of dimension 2. On exit, this 35 | array contains the encoded long integer result. 36 | .SH SEE ALSO 37 | .BR HPL_lmul \ (3), 38 | .BR HPL_setran \ (3), 39 | .BR HPL_xjumpm \ (3), 40 | .BR HPL_jumpit \ (3), 41 | .BR HPL_rand \ (3). 42 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_lmul.3: -------------------------------------------------------------------------------- 1 | .TH HPL_lmul 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_lmul \- multiplies 2 long positive integers. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_lmul(\fR 9 | \fB\&int *\fR 10 | \fI\&K\fR, 11 | \fB\&int *\fR 12 | \fI\&J\fR, 13 | \fB\&int *\fR 14 | \fI\&I\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_lmul\fR 18 | multiplies without carry two long positive integers K and J 19 | and puts the result into I. The long integers I, J, K are encoded on 20 | 64 bits using an array of 2 integers. The 32-lower bits are stored in 21 | the first entry of each array, the 32-higher bits in the second entry 22 | of each array. For efficiency purposes, the intrisic modulo function 23 | is inlined. 24 | .SH ARGUMENTS 25 | .TP 8 26 | K (local input) int * 27 | On entry, K is an integer array of dimension 2 containing the 28 | encoded long integer K. 29 | .TP 8 30 | J (local input) int * 31 | On entry, J is an integer array of dimension 2 containing the 32 | encoded long integer J. 33 | .TP 8 34 | I (local output) int * 35 | On entry, I is an integer array of dimension 2. On exit, this 36 | array contains the encoded long integer result. 37 | .SH SEE ALSO 38 | .BR HPL_ladd \ (3), 39 | .BR HPL_setran \ (3), 40 | .BR HPL_xjumpm \ (3), 41 | .BR HPL_jumpit \ (3), 42 | .BR HPL_rand \ (3). 43 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_max.3: -------------------------------------------------------------------------------- 1 | .TH HPL_max 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_max \- Combine (max) two buffers. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_max(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const void *\fR 12 | \fI\&IN\fR, 13 | \fB\&void *\fR 14 | \fI\&INOUT\fR, 15 | \fB\&const HPL_T_TYPE\fR 16 | \fI\&DTYPE\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_max\fR 20 | combines (max) two buffers. 21 | .SH ARGUMENTS 22 | .TP 8 23 | N (input) const int 24 | On entry, N specifies the length of the buffers to be 25 | combined. N must be at least zero. 26 | .TP 8 27 | IN (input) const void * 28 | On entry, IN points to the input-only buffer to be combined. 29 | .TP 8 30 | INOUT (input/output) void * 31 | On entry, INOUT points to the input-output buffer to be 32 | combined. On exit, the entries of this array contains the 33 | combined results. 34 | .TP 8 35 | DTYPE (input) const HPL_T_TYPE 36 | On entry, DTYPE specifies the type of the buffers operands. 37 | .SH SEE ALSO 38 | .BR HPL_broadcast \ (3), 39 | .BR HPL_reduce \ (3), 40 | .BR HPL_all_reduce \ (3), 41 | .BR HPL_barrier \ (3), 42 | .BR HPL_min \ (3), 43 | .BR HPL_sum \ (3). 44 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_min.3: -------------------------------------------------------------------------------- 1 | .TH HPL_min 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_min \- Combine (min) two buffers. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_min(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const void *\fR 12 | \fI\&IN\fR, 13 | \fB\&void *\fR 14 | \fI\&INOUT\fR, 15 | \fB\&const HPL_T_TYPE\fR 16 | \fI\&DTYPE\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_min\fR 20 | combines (min) two buffers. 21 | .SH ARGUMENTS 22 | .TP 8 23 | N (input) const int 24 | On entry, N specifies the length of the buffers to be 25 | combined. N must be at least zero. 26 | .TP 8 27 | IN (input) const void * 28 | On entry, IN points to the input-only buffer to be combined. 29 | .TP 8 30 | INOUT (input/output) void * 31 | On entry, INOUT points to the input-output buffer to be 32 | combined. On exit, the entries of this array contains the 33 | combined results. 34 | .TP 8 35 | DTYPE (input) const HPL_T_TYPE 36 | On entry, DTYPE specifies the type of the buffers operands. 37 | .SH SEE ALSO 38 | .BR HPL_broadcast \ (3), 39 | .BR HPL_reduce \ (3), 40 | .BR HPL_all_reduce \ (3), 41 | .BR HPL_barrier \ (3), 42 | .BR HPL_max \ (3), 43 | .BR HPL_sum \ (3). 44 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_numroc.3: -------------------------------------------------------------------------------- 1 | .TH HPL_numroc 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_numroc \- Compute the local number of row/columns. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_numroc(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const int\fR 12 | \fI\&INB\fR, 13 | \fB\&const int\fR 14 | \fI\&NB\fR, 15 | \fB\&const int\fR 16 | \fI\&PROC\fR, 17 | \fB\&const int\fR 18 | \fI\&SRCPROC\fR, 19 | \fB\&const int\fR 20 | \fI\&NPROCS\fR 21 | \fB\&);\fR 22 | .SH DESCRIPTION 23 | \fB\&HPL_numroc\fR 24 | returns the local number of matrix rows/columns process 25 | PROC will get if we give out N rows/columns starting from global 26 | index 0. 27 | .SH ARGUMENTS 28 | .TP 8 29 | N (input) const int 30 | On entry, N specifies the number of rows/columns being dealt 31 | out. N must be at least zero. 32 | .TP 8 33 | INB (input) const int 34 | On entry, INB specifies the size of the first block of the 35 | global matrix. INB must be at least one. 36 | .TP 8 37 | NB (input) const int 38 | On entry, NB specifies the blocking factor used to partition 39 | and distribute the matrix A. NB must be larger than one. 40 | .TP 8 41 | PROC (input) const int 42 | On entry, PROC specifies the coordinate of the process whose 43 | local portion is determined. PROC must be at least zero and 44 | strictly less than NPROCS. 45 | .TP 8 46 | SRCPROC (input) const int 47 | On entry, SRCPROC specifies the coordinate of the process 48 | that possesses the first row or column of the matrix. SRCPROC 49 | must be at least zero and strictly less than NPROCS. 50 | .TP 8 51 | NPROCS (input) const int 52 | On entry, NPROCS specifies the total number of process rows 53 | or columns over which the matrix is distributed. NPROCS must 54 | be at least one. 55 | .SH SEE ALSO 56 | .BR HPL_indxg2l \ (3), 57 | .BR HPL_indxg2lp \ (3), 58 | .BR HPL_indxg2p \ (3), 59 | .BR HPL_indxl2g \ (3), 60 | .BR HPL_numrocI \ (3). 61 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pabort.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pabort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pabort \- halts execution. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pabort(\fR 9 | \fB\&int\fR 10 | \fI\&LINE\fR, 11 | \fB\&const char *\fR 12 | \fI\&SRNAME\fR, 13 | \fB\&const char *\fR 14 | \fI\&FORM\fR, 15 | \fB\&...\fR 16 | \fB\&);\fR 17 | .SH DESCRIPTION 18 | \fB\&HPL_pabort\fR 19 | displays an error message on stderr and halts execution. 20 | .SH ARGUMENTS 21 | .TP 8 22 | LINE (local input) int 23 | On entry, LINE specifies the line number in the file where 24 | the error has occured. When LINE is not a positive line 25 | number, it is ignored. 26 | .TP 8 27 | SRNAME (local input) const char * 28 | On entry, SRNAME should be the name of the routine calling 29 | this error handler. 30 | .TP 8 31 | FORM (local input) const char * 32 | On entry, FORM specifies the format, i.e., how the subsequent 33 | arguments are converted for output. 34 | .TP 8 35 | (local input) ... 36 | On entry, ... is the list of arguments to be printed within 37 | the format string. 38 | .SH SEE ALSO 39 | .BR HPL_fprintf \ (3), 40 | .BR HPL_pwarn \ (3). 41 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_packL.3: -------------------------------------------------------------------------------- 1 | .TH HPL_packL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_packL \- Form the MPI structure for the row ring broadcasts. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_packL(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR, 11 | \fB\&const int\fR 12 | \fI\&INDEX\fR, 13 | \fB\&const int\fR 14 | \fI\&LEN\fR, 15 | \fB\&const int\fR 16 | \fI\&IBUF\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_packL\fR 20 | forms the MPI data type for the panel to be broadcast. 21 | Successful completion is indicated by the returned error code 22 | MPI_SUCCESS. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PANEL (input/output) HPL_T_panel * 26 | On entry, PANEL points to the current panel data structure 27 | being broadcast. 28 | .TP 8 29 | INDEX (input) const int 30 | On entry, INDEX points to the first entry of the packed 31 | buffer being broadcast. 32 | .TP 8 33 | LEN (input) const int 34 | On entry, LEN is the length of the packed buffer. 35 | .TP 8 36 | IBUF (input) const int 37 | On entry, IBUF specifies the panel buffer/count/type entries 38 | that should be initialized. 39 | .SH SEE ALSO 40 | .BR HPL_binit \ (3), 41 | .BR HPL_bcast \ (3), 42 | .BR HPL_bwait \ (3). 43 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pddriver.3: -------------------------------------------------------------------------------- 1 | .TH main 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | main \- HPL main timing program. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&main();\fR 9 | .SH DESCRIPTION 10 | \fB\&main\fR 11 | is the main driver program for testing the HPL routines. 12 | This program is driven by a short data file named "HPL.dat". 13 | .SH SEE ALSO 14 | .BR HPL_pdinfo \ (3), 15 | .BR HPL_pdtest \ (3). 16 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdgesv.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdgesv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdgesv \- Solve A x = b. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdgesv(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&HPL_T_palg *\fR 12 | \fI\&ALGO\fR, 13 | \fB\&HPL_T_pmat *\fR 14 | \fI\&A\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_pdgesv\fR 18 | factors a N+1-by-N matrix using LU factorization with row 19 | partial pivoting. The main algorithm is the "right looking" variant 20 | with or without look-ahead. The lower triangular factor is left 21 | unpivoted and the pivots are not returned. The right hand side is the 22 | N+1 column of the coefficient matrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | GRID (local input) HPL_T_grid * 26 | On entry, GRID points to the data structure containing the 27 | process grid information. 28 | .TP 8 29 | ALGO (global input) HPL_T_palg * 30 | On entry, ALGO points to the data structure containing the 31 | algorithmic parameters. 32 | .TP 8 33 | A (local input/output) HPL_T_pmat * 34 | On entry, A points to the data structure containing the local 35 | array information. 36 | .SH SEE ALSO 37 | .BR HPL_pdgesv0 \ (3), 38 | .BR HPL_pdgesvK1 \ (3), 39 | .BR HPL_pdgesvK2 \ (3), 40 | .BR HPL_pdtrsv \ (3). 41 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdgesv0.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdgesv0 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdgesv0 \- Factor an N x N+1 matrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdgesv0(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&HPL_T_palg *\fR 12 | \fI\&ALGO\fR, 13 | \fB\&HPL_T_pmat *\fR 14 | \fI\&A\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_pdgesv0\fR 18 | factors a N+1-by-N matrix using LU factorization with row 19 | partial pivoting. The main algorithm is the "right looking" variant 20 | without look-ahead. The lower triangular factor is left unpivoted and 21 | the pivots are not returned. The right hand side is the N+1 column of 22 | the coefficient matrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | GRID (local input) HPL_T_grid * 26 | On entry, GRID points to the data structure containing the 27 | process grid information. 28 | .TP 8 29 | ALGO (global input) HPL_T_palg * 30 | On entry, ALGO points to the data structure containing the 31 | algorithmic parameters. 32 | .TP 8 33 | A (local input/output) HPL_T_pmat * 34 | On entry, A points to the data structure containing the local 35 | array information. 36 | .SH SEE ALSO 37 | .BR HPL_pdgesv \ (3), 38 | .BR HPL_pdgesvK1 \ (3), 39 | .BR HPL_pdgesvK2 \ (3), 40 | .BR HPL_pdfact \ (3), 41 | .BR HPL_binit \ (3), 42 | .BR HPL_bcast \ (3), 43 | .BR HPL_bwait \ (3), 44 | .BR HPL_pdupdateNN \ (3), 45 | .BR HPL_pdupdateNT \ (3), 46 | .BR HPL_pdupdateTN \ (3), 47 | .BR HPL_pdupdateTT \ (3). 48 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdgesvK1.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdgesvK1 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdgesvK1 \- Factor an N x N+1 matrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdgesvK1(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&HPL_T_palg *\fR 12 | \fI\&ALGO\fR, 13 | \fB\&HPL_T_pmat *\fR 14 | \fI\&A\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_pdgesvK1\fR 18 | factors a N+1-by-N matrix using LU factorization with row 19 | partial pivoting. The main algorithm is the "right looking" variant 20 | with look-ahead. The lower triangular factor is left unpivoted and 21 | the pivots are not returned. The right hand side is the N+1 column of 22 | the coefficient matrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | GRID (local input) HPL_T_grid * 26 | On entry, GRID points to the data structure containing the 27 | process grid information. 28 | .TP 8 29 | ALGO (global input) HPL_T_palg * 30 | On entry, ALGO points to the data structure containing the 31 | algorithmic parameters. 32 | .TP 8 33 | A (local input/output) HPL_T_pmat * 34 | On entry, A points to the data structure containing the local 35 | array information. 36 | .SH SEE ALSO 37 | .BR HPL_pdgesv \ (3), 38 | .BR HPL_pdgesvK2 \ (3), 39 | .BR HPL_pdfact \ (3), 40 | .BR HPL_binit \ (3), 41 | .BR HPL_bcast \ (3), 42 | .BR HPL_bwait \ (3), 43 | .BR HPL_pdupdateNN \ (3), 44 | .BR HPL_pdupdateNT \ (3), 45 | .BR HPL_pdupdateTN \ (3), 46 | .BR HPL_pdupdateTT \ (3). 47 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdgesvK2.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdgesvK2 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdgesvK2 \- Factor an N x N+1 matrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdgesvK2(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&HPL_T_palg *\fR 12 | \fI\&ALGO\fR, 13 | \fB\&HPL_T_pmat *\fR 14 | \fI\&A\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_pdgesvK2\fR 18 | factors a N+1-by-N matrix using LU factorization with row 19 | partial pivoting. The main algorithm is the "right looking" variant 20 | with look-ahead. The lower triangular factor is left unpivoted and 21 | the pivots are not returned. The right hand side is the N+1 column of 22 | the coefficient matrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | GRID (local input) HPL_T_grid * 26 | On entry, GRID points to the data structure containing the 27 | process grid information. 28 | .TP 8 29 | ALGO (global input) HPL_T_palg * 30 | On entry, ALGO points to the data structure containing the 31 | algorithmic parameters. 32 | .TP 8 33 | A (local input/output) HPL_T_pmat * 34 | On entry, A points to the data structure containing the local 35 | array information. 36 | .SH SEE ALSO 37 | .BR HPL_pdgesv \ (3), 38 | .BR HPL_pdgesv0 \ (3), 39 | .BR HPL_pdgesvK1 \ (3), 40 | .BR HPL_pdfact \ (3), 41 | .BR HPL_binit \ (3), 42 | .BR HPL_bcast \ (3), 43 | .BR HPL_bwait \ (3), 44 | .BR HPL_pdupdateNN \ (3), 45 | .BR HPL_pdupdateNT \ (3), 46 | .BR HPL_pdupdateTN \ (3), 47 | .BR HPL_pdupdateTT \ (3). 48 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdpanel_disp.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdpanel_disp 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdpanel_disp \- Deallocate a panel data structure. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_pdpanel_disp(\fR 9 | \fB\&HPL_T_panel * *\fR 10 | \fI\&PANEL\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_pdpanel_disp\fR 14 | deallocates the panel structure and resources and 15 | stores the error code returned by the panel factorization. 16 | .SH ARGUMENTS 17 | .TP 8 18 | PANEL (local input/output) HPL_T_panel * * 19 | On entry, PANEL points to the address of the panel data 20 | structure to be deallocated. 21 | .SH SEE ALSO 22 | .BR HPL_pdpanel_new \ (3), 23 | .BR HPL_pdpanel_init \ (3), 24 | .BR HPL_pdpanel_free \ (3). 25 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdpanel_free.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdpanel_free 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdpanel_free \- Deallocate the panel ressources. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_pdpanel_free(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PANEL\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_pdpanel_free\fR 14 | deallocates the panel resources and stores the error 15 | code returned by the panel factorization. 16 | .SH ARGUMENTS 17 | .TP 8 18 | PANEL (local input/output) HPL_T_panel * 19 | On entry, PANEL points to the panel data structure from 20 | which the resources should be deallocated. 21 | .SH SEE ALSO 22 | .BR HPL_pdpanel_new \ (3), 23 | .BR HPL_pdpanel_init \ (3), 24 | .BR HPL_pdpanel_disp \ (3). 25 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdtrsv.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdtrsv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdtrsv \- Solve triu( A ) x = b. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdtrsv(\fR 9 | \fB\&HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&HPL_T_pmat *\fR 12 | \fI\&AMAT\fR 13 | \fB\&);\fR 14 | .SH DESCRIPTION 15 | \fB\&HPL_pdtrsv\fR 16 | solves an upper triangular system of linear equations. 17 | 18 | The rhs is the last column of the N by N+1 matrix A. The solve starts 19 | in the process column owning the Nth column of A, so the rhs b may 20 | need to be moved one process column to the left at the beginning. The 21 | routine therefore needs a column vector in every process column but 22 | the one owning b. The result is replicated in all process rows, and 23 | returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. 24 | 25 | The algorithm uses decreasing one-ring broadcast in process rows and 26 | columns implemented in terms of synchronous communication point to 27 | point primitives. The lookahead of depth 1 is used to minimize the 28 | critical path. This entire operation is essentially ``latency'' bound 29 | and an estimate of its running time is given by: 30 | 31 | (move rhs) lat + N / ( P bdwth ) + 32 | (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + 33 | gam2 N^2 / ( P Q ), 34 | 35 | where gam2 is an estimate of the Level 2 BLAS rate of execution. 36 | There are N / NB diagonal blocks. One must exchange 2 messages of 37 | length NB to compute the next NB entries of the vector solution, as 38 | well as performing a total of N^2 floating point operations. 39 | .SH ARGUMENTS 40 | .TP 8 41 | GRID (local input) HPL_T_grid * 42 | On entry, GRID points to the data structure containing the 43 | process grid information. 44 | .TP 8 45 | AMAT (local input/output) HPL_T_pmat * 46 | On entry, AMAT points to the data structure containing the 47 | local array information. 48 | .SH SEE ALSO 49 | .BR HPL_pdgesv \ (3). 50 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdupdateNN.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdupdateNN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdupdateNN(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PBCST\fR, 11 | \fB\&int *\fR 12 | \fI\&IFLAG\fR, 13 | \fB\&HPL_T_panel *\fR 14 | \fI\&PANEL\fR, 15 | \fB\&const int\fR 16 | \fI\&NN\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_pdupdateNN\fR 20 | broadcast - forward the panel PBCST and simultaneously 21 | applies the row interchanges and updates part of the trailing (using 22 | the panel PANEL) submatrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PBCST (local input/output) HPL_T_panel * 26 | On entry, PBCST points to the data structure containing the 27 | panel (to be broadcast) information. 28 | .TP 8 29 | IFLAG (local output) int * 30 | On exit, IFLAG indicates whether or not the broadcast has 31 | been completed when PBCST is not NULL on entry. In that case, 32 | IFLAG is left unchanged. 33 | .TP 8 34 | PANEL (local input/output) HPL_T_panel * 35 | On entry, PANEL points to the data structure containing the 36 | panel (to be updated) information. 37 | .TP 8 38 | NN (local input) const int 39 | On entry, NN specifies the local number of columns of the 40 | trailing submatrix to be updated starting at the current 41 | position. NN must be at least zero. 42 | .SH SEE ALSO 43 | .BR HPL_pdgesv \ (3), 44 | .BR HPL_pdgesv0 \ (3), 45 | .BR HPL_pdgesvK1 \ (3), 46 | .BR HPL_pdgesvK2 \ (3), 47 | .BR HPL_pdlaswp00N \ (3), 48 | .BR HPL_pdlaswp01N \ (3). 49 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdupdateNT.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdupdateNT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdupdateNT(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PBCST\fR, 11 | \fB\&int *\fR 12 | \fI\&IFLAG\fR, 13 | \fB\&HPL_T_panel *\fR 14 | \fI\&PANEL\fR, 15 | \fB\&const int\fR 16 | \fI\&NN\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_pdupdateNT\fR 20 | broadcast - forward the panel PBCST and simultaneously 21 | applies the row interchanges and updates part of the trailing (using 22 | the panel PANEL) submatrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PBCST (local input/output) HPL_T_panel * 26 | On entry, PBCST points to the data structure containing the 27 | panel (to be broadcast) information. 28 | .TP 8 29 | IFLAG (local output) int * 30 | On exit, IFLAG indicates whether or not the broadcast has 31 | been completed when PBCST is not NULL on entry. In that case, 32 | IFLAG is left unchanged. 33 | .TP 8 34 | PANEL (local input/output) HPL_T_panel * 35 | On entry, PANEL points to the data structure containing the 36 | panel (to be updated) information. 37 | .TP 8 38 | NN (local input) const int 39 | On entry, NN specifies the local number of columns of the 40 | trailing submatrix to be updated starting at the current 41 | position. NN must be at least zero. 42 | .SH SEE ALSO 43 | .BR HPL_pdgesv \ (3), 44 | .BR HPL_pdgesv0 \ (3), 45 | .BR HPL_pdgesvK1 \ (3), 46 | .BR HPL_pdgesvK2 \ (3), 47 | .BR HPL_pdlaswp00T \ (3), 48 | .BR HPL_pdlaswp01T \ (3). 49 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdupdateTN.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdupdateTN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdupdateTN(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PBCST\fR, 11 | \fB\&int *\fR 12 | \fI\&IFLAG\fR, 13 | \fB\&HPL_T_panel *\fR 14 | \fI\&PANEL\fR, 15 | \fB\&const int\fR 16 | \fI\&NN\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_pdupdateTN\fR 20 | broadcast - forward the panel PBCST and simultaneously 21 | applies the row interchanges and updates part of the trailing (using 22 | the panel PANEL) submatrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PBCST (local input/output) HPL_T_panel * 26 | On entry, PBCST points to the data structure containing the 27 | panel (to be broadcast) information. 28 | .TP 8 29 | IFLAG (local output) int * 30 | On exit, IFLAG indicates whether or not the broadcast has 31 | been completed when PBCST is not NULL on entry. In that case, 32 | IFLAG is left unchanged. 33 | .TP 8 34 | PANEL (local input/output) HPL_T_panel * 35 | On entry, PANEL points to the data structure containing the 36 | panel (to be updated) information. 37 | .TP 8 38 | NN (local input) const int 39 | On entry, NN specifies the local number of columns of the 40 | trailing submatrix to be updated starting at the current 41 | position. NN must be at least zero. 42 | .SH SEE ALSO 43 | .BR HPL_pdgesv \ (3), 44 | .BR HPL_pdgesv0 \ (3), 45 | .BR HPL_pdgesvK1 \ (3), 46 | .BR HPL_pdgesvK2 \ (3), 47 | .BR HPL_pdlaswp00N \ (3), 48 | .BR HPL_pdlaswp01N \ (3). 49 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pdupdateTT.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pdupdateTT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pdupdateTT(\fR 9 | \fB\&HPL_T_panel *\fR 10 | \fI\&PBCST\fR, 11 | \fB\&int *\fR 12 | \fI\&IFLAG\fR, 13 | \fB\&HPL_T_panel *\fR 14 | \fI\&PANEL\fR, 15 | \fB\&const int\fR 16 | \fI\&NN\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_pdupdateTT\fR 20 | broadcast - forward the panel PBCST and simultaneously 21 | applies the row interchanges and updates part of the trailing (using 22 | the panel PANEL) submatrix. 23 | .SH ARGUMENTS 24 | .TP 8 25 | PBCST (local input/output) HPL_T_panel * 26 | On entry, PBCST points to the data structure containing the 27 | panel (to be broadcast) information. 28 | .TP 8 29 | IFLAG (local output) int * 30 | On exit, IFLAG indicates whether or not the broadcast has 31 | been completed when PBCST is not NULL on entry. In that case, 32 | IFLAG is left unchanged. 33 | .TP 8 34 | PANEL (local input/output) HPL_T_panel * 35 | On entry, PANEL points to the data structure containing the 36 | panel (to be updated) information. 37 | .TP 8 38 | NN (local input) const int 39 | On entry, NN specifies the local number of columns of the 40 | trailing submatrix to be updated starting at the current 41 | position. NN must be at least zero. 42 | .SH SEE ALSO 43 | .BR HPL_pdgesv \ (3), 44 | .BR HPL_pdgesv0 \ (3), 45 | .BR HPL_pdgesvK1 \ (3), 46 | .BR HPL_pdgesvK2 \ (3), 47 | .BR HPL_pdlaswp00T \ (3), 48 | .BR HPL_pdlaswp01T \ (3). 49 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_perm.3: -------------------------------------------------------------------------------- 1 | .TH HPL_perm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_perm \- Combine 2 index arrays - Generate the permutation. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_perm(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&int *\fR 12 | \fI\&LINDXA\fR, 13 | \fB\&int *\fR 14 | \fI\&LINDXAU\fR, 15 | \fB\&int *\fR 16 | \fI\&IWORK\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_perm\fR 20 | combines two index arrays and generate the corresponding 21 | permutation. First, this function computes the inverse of LINDXA, and 22 | then combine it with LINDXAU. Second, in order to be able to perform 23 | the permutation in place, LINDXAU is overwritten by the sequence of 24 | permutation producing the same result. What we ultimately want to 25 | achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the 26 | call to this function, this in place permutation can be performed by 27 | for i in [0..N) swap U[i] with U[LINDXAU[i]]. 28 | .SH ARGUMENTS 29 | .TP 8 30 | N (global input) const int 31 | On entry, N specifies the length of the arrays LINDXA and 32 | LINDXAU. N should be at least zero. 33 | .TP 8 34 | LINDXA (global input/output) int * 35 | On entry, LINDXA is an array of dimension N containing the 36 | source indexes. On exit, LINDXA contains the combined index 37 | array. 38 | .TP 8 39 | LINDXAU (global input/output) int * 40 | On entry, LINDXAU is an array of dimension N containing the 41 | target indexes. On exit, LINDXAU contains the sequence of 42 | permutation, that should be applied in increasing order to 43 | permute the underlying array U in place. 44 | .TP 8 45 | IWORK (workspace) int * 46 | On entry, IWORK is a workarray of dimension N. 47 | .SH SEE ALSO 48 | .BR HPL_plindx1 \ (3), 49 | .BR HPL_pdlaswp01N \ (3), 50 | .BR HPL_pdlaswp01T \ (3). 51 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pnum.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pnum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pnum \- Rank determination. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_pnum(\fR 9 | \fB\&const HPL_T_grid *\fR 10 | \fI\&GRID\fR, 11 | \fB\&const int\fR 12 | \fI\&MYROW\fR, 13 | \fB\&const int\fR 14 | \fI\&MYCOL\fR 15 | \fB\&);\fR 16 | .SH DESCRIPTION 17 | \fB\&HPL_pnum\fR 18 | determines the rank of a process as a function of its 19 | coordinates in the grid. 20 | .SH ARGUMENTS 21 | .TP 8 22 | GRID (local input) const HPL_T_grid * 23 | On entry, GRID points to the data structure containing the 24 | process grid information. 25 | .TP 8 26 | MYROW (local input) const int 27 | On entry, MYROW specifies the row coordinate of the process 28 | whose rank is to be determined. MYROW must be greater than or 29 | equal to zero and less than NPROW. 30 | .TP 8 31 | MYCOL (local input) const int 32 | On entry, MYCOL specifies the column coordinate of the 33 | process whose rank is to be determined. MYCOL must be greater 34 | than or equal to zero and less than NPCOL. 35 | .SH SEE ALSO 36 | .BR HPL_grid_init \ (3), 37 | .BR HPL_grid_info \ (3), 38 | .BR HPL_grid_exit \ (3). 39 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_ptimer.3: -------------------------------------------------------------------------------- 1 | .TH HPL_ptimer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_ptimer \- Timer facility. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_ptimer(\fR 9 | \fB\&const int\fR 10 | \fI\&I\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_ptimer\fR 14 | provides a "stopwatch" functionality cpu/wall timer in 15 | seconds. Up to 64 separate timers can be functioning at once. The 16 | first call starts the timer, and the second stops it. This routine 17 | can be disenabled by calling HPL_ptimer_disable(), so that calls to 18 | the timer are ignored. This feature can be used to make sure certain 19 | sections of code do not affect timings, even if they call routines 20 | which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable 21 | the timer functionality. One can retrieve the current value of a 22 | timer by calling 23 | 24 | t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) 25 | 26 | where I is the timer index in [0..64). To inititialize the timer 27 | functionality, one must have called HPL_ptimer_boot() prior to any of 28 | the functions mentioned above. 29 | .SH ARGUMENTS 30 | .TP 8 31 | I (global input) const int 32 | On entry, I specifies the timer to stop/start. 33 | .SH SEE ALSO 34 | .BR HPL_ptimer_cputime \ (3), 35 | .BR HPL_ptimer_walltime \ (3). 36 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_ptimer_cputime.3: -------------------------------------------------------------------------------- 1 | .TH HPL_ptimer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_ptimer_cputime \- Return the CPU time. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&double\fR 8 | \fB\&HPL_ptimer_cputime();\fR 9 | .SH DESCRIPTION 10 | \fB\&HPL_ptimer_cputime\fR 11 | returns the cpu time. If HPL_USE_CLOCK is defined, 12 | the clock() function is used to return an approximation of processor 13 | time used by the program. The value returned is the CPU time used so 14 | far as a clock_t; to get the number of seconds used, the result is 15 | divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C 16 | standard library. If HPL_USE_TIMES is defined, the times() function 17 | is used instead. This function returns the current process times. 18 | times() returns the number of clock ticks that have elapsed since the 19 | system has been up. Otherwise and by default, the standard library 20 | function getrusage() is used. 21 | .SH SEE ALSO 22 | .BR HPL_ptimer_walltime \ (3), 23 | .BR HPL_ptimer \ (3). 24 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_ptimer_walltime.3: -------------------------------------------------------------------------------- 1 | .TH HPL_ptimer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_ptimer_walltime \- Return the elapsed (wall-clock) time. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&double\fR 8 | \fB\&HPL_ptimer_walltime();\fR 9 | .SH DESCRIPTION 10 | \fB\&HPL_ptimer_walltime\fR 11 | returns the elapsed (wall-clock) time. 12 | .SH SEE ALSO 13 | .BR HPL_ptimer_cputime \ (3), 14 | .BR HPL_ptimer \ (3). 15 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_pwarn.3: -------------------------------------------------------------------------------- 1 | .TH HPL_pwarn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_pwarn \- displays an error message. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_pwarn(\fR 9 | \fB\&FILE *\fR 10 | \fI\&STREAM\fR, 11 | \fB\&int\fR 12 | \fI\&LINE\fR, 13 | \fB\&const char *\fR 14 | \fI\&SRNAME\fR, 15 | \fB\&const char *\fR 16 | \fI\&FORM\fR, 17 | \fB\&...\fR 18 | \fB\&);\fR 19 | .SH DESCRIPTION 20 | \fB\&HPL_pwarn\fR 21 | displays an error message. 22 | .SH ARGUMENTS 23 | .TP 8 24 | STREAM (local input) FILE * 25 | On entry, STREAM specifies the output stream. 26 | .TP 8 27 | LINE (local input) int 28 | On entry, LINE specifies the line number in the file where 29 | the error has occured. When LINE is not a positive line 30 | number, it is ignored. 31 | .TP 8 32 | SRNAME (local input) const char * 33 | On entry, SRNAME should be the name of the routine calling 34 | this error handler. 35 | .TP 8 36 | FORM (local input) const char * 37 | On entry, FORM specifies the format, i.e., how the subsequent 38 | arguments are converted for output. 39 | .TP 8 40 | (local input) ... 41 | On entry, ... is the list of arguments to be printed within 42 | the format string. 43 | .SH SEE ALSO 44 | .BR HPL_pabort \ (3), 45 | .BR HPL_fprintf \ (3). 46 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_rand.3: -------------------------------------------------------------------------------- 1 | .TH HPL_rand 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_rand \- random number generator. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&double\fR 8 | \fB\&HPL_rand();\fR 9 | .SH DESCRIPTION 10 | \fB\&HPL_rand\fR 11 | generates the next number in the random sequence. This 12 | function ensures that this number lies in the interval (-0.5, 0.5]. 13 | 14 | The static array irand contains the information (2 integers) required 15 | to generate the next number in the sequence X(n). This number is 16 | computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the 17 | constant d is the largest 64 bit positive integer. The array irand is 18 | then updated for the generation of the next number X(n+1) in the 19 | random sequence as follows X(n+1) = a * X(n) + c. The constants a and 20 | c should have been preliminarily stored in the arrays ias and ics as 21 | 2 pairs of integers. The initialization of ias, ics and irand is 22 | performed by the function HPL_setran. 23 | .SH SEE ALSO 24 | .BR HPL_ladd \ (3), 25 | .BR HPL_lmul \ (3), 26 | .BR HPL_setran \ (3), 27 | .BR HPL_xjumpm \ (3), 28 | .BR HPL_jumpit \ (3). 29 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_recv.3: -------------------------------------------------------------------------------- 1 | .TH HPL_recv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_recv \- Receive a message. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_recv(\fR 9 | \fB\&double *\fR 10 | \fI\&RBUF\fR, 11 | \fB\&int\fR 12 | \fI\&RCOUNT\fR, 13 | \fB\&int\fR 14 | \fI\&SRC\fR, 15 | \fB\&int\fR 16 | \fI\&RTAG\fR, 17 | \fB\&MPI_Comm\fR 18 | \fI\&COMM\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_recv\fR 22 | is a simple wrapper around MPI_Recv. Its main purpose is 23 | to allow for some experimentation / tuning of this simple routine. 24 | Successful completion is indicated by the returned error code 25 | HPL_SUCCESS. In the case of messages of length less than or equal to 26 | zero, this function returns immediately. 27 | .SH ARGUMENTS 28 | .TP 8 29 | RBUF (local output) double * 30 | On entry, RBUF specifies the starting address of buffer to be 31 | received. 32 | .TP 8 33 | RCOUNT (local input) int 34 | On entry, RCOUNT specifies the number of double precision 35 | entries in RBUF. RCOUNT must be at least zero. 36 | .TP 8 37 | SRC (local input) int 38 | On entry, SRC specifies the rank of the sending process in 39 | the communication space defined by COMM. 40 | .TP 8 41 | RTAG (local input) int 42 | On entry, STAG specifies the message tag to be used for this 43 | communication operation. 44 | .TP 8 45 | COMM (local input) MPI_Comm 46 | The MPI communicator identifying the communication space. 47 | .SH SEE ALSO 48 | .BR HPL_send \ (3), 49 | .BR HPL_sendrecv \ (3). 50 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_reduce.3: -------------------------------------------------------------------------------- 1 | .TH HPL_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_reduce \- Reduce operation. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_reduce(\fR 9 | \fB\&void *\fR 10 | \fI\&BUFFER\fR, 11 | \fB\&const int\fR 12 | \fI\&COUNT\fR, 13 | \fB\&const HPL_T_TYPE\fR 14 | \fI\&DTYPE\fR, 15 | \fB\&const HPL_T_OP \fR 16 | \fI\&OP\fR, 17 | \fB\&const int\fR 18 | \fI\&ROOT\fR, 19 | \fB\&MPI_Comm\fR 20 | \fI\&COMM\fR 21 | \fB\&);\fR 22 | .SH DESCRIPTION 23 | \fB\&HPL_reduce\fR 24 | performs a global reduce operation across all processes of 25 | a group. Note that the input buffer is used as workarray and in all 26 | processes but the accumulating process corrupting the original data. 27 | .SH ARGUMENTS 28 | .TP 8 29 | BUFFER (local input/output) void * 30 | On entry, BUFFER points to the buffer to be reduced. On 31 | exit, and in process of rank ROOT this array contains the 32 | reduced data. This buffer is also used as workspace during 33 | the operation in the other processes of the group. 34 | .TP 8 35 | COUNT (global input) const int 36 | On entry, COUNT indicates the number of entries in BUFFER. 37 | COUNT must be at least zero. 38 | .TP 8 39 | DTYPE (global input) const HPL_T_TYPE 40 | On entry, DTYPE specifies the type of the buffers operands. 41 | .TP 8 42 | OP (global input) const HPL_T_OP 43 | On entry, OP is a pointer to the local combine function. 44 | .TP 8 45 | ROOT (global input) const int 46 | On entry, ROOT is the coordinate of the accumulating process. 47 | .TP 8 48 | COMM (global/local input) MPI_Comm 49 | The MPI communicator identifying the process collection. 50 | .SH SEE ALSO 51 | .BR HPL_broadcast \ (3), 52 | .BR HPL_all_reduce \ (3), 53 | .BR HPL_barrier \ (3), 54 | .BR HPL_min \ (3), 55 | .BR HPL_max \ (3), 56 | .BR HPL_sum \ (3). 57 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_sdrv.3: -------------------------------------------------------------------------------- 1 | .TH HPL_sdrv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_sdrv \- Send and receive a message. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_sdrv(\fR 9 | \fB\&double *\fR 10 | \fI\&SBUF\fR, 11 | \fB\&int\fR 12 | \fI\&SCOUNT\fR, 13 | \fB\&int\fR 14 | \fI\&STAG\fR, 15 | \fB\&double *\fR 16 | \fI\&RBUF\fR, 17 | \fB\&int\fR 18 | \fI\&RCOUNT\fR, 19 | \fB\&int\fR 20 | \fI\&RTAG\fR, 21 | \fB\&int\fR 22 | \fI\&PARTNER\fR, 23 | \fB\&MPI_Comm\fR 24 | \fI\&COMM\fR 25 | \fB\&);\fR 26 | .SH DESCRIPTION 27 | \fB\&HPL_sdrv\fR 28 | is a simple wrapper around MPI_Sendrecv. Its main purpose is 29 | to allow for some experimentation and tuning of this simple function. 30 | Messages of length less than or equal to zero are not sent nor 31 | received. Successful completion is indicated by the returned error 32 | code HPL_SUCCESS. 33 | .SH ARGUMENTS 34 | .TP 8 35 | SBUF (local input) double * 36 | On entry, SBUF specifies the starting address of buffer to be 37 | sent. 38 | .TP 8 39 | SCOUNT (local input) int 40 | On entry, SCOUNT specifies the number of double precision 41 | entries in SBUF. SCOUNT must be at least zero. 42 | .TP 8 43 | STAG (local input) int 44 | On entry, STAG specifies the message tag to be used for the 45 | sending communication operation. 46 | .TP 8 47 | RBUF (local output) double * 48 | On entry, RBUF specifies the starting address of buffer to be 49 | received. 50 | .TP 8 51 | RCOUNT (local input) int 52 | On entry, RCOUNT specifies the number of double precision 53 | entries in RBUF. RCOUNT must be at least zero. 54 | .TP 8 55 | RTAG (local input) int 56 | On entry, RTAG specifies the message tag to be used for the 57 | receiving communication operation. 58 | .TP 8 59 | PARTNER (local input) int 60 | On entry, PARTNER specifies the rank of the collaborative 61 | process in the communication space defined by COMM. 62 | .TP 8 63 | COMM (local input) MPI_Comm 64 | The MPI communicator identifying the communication space. 65 | .SH SEE ALSO 66 | .BR HPL_send \ (3), 67 | .BR HPL_recv \ (3). 68 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_send.3: -------------------------------------------------------------------------------- 1 | .TH HPL_send 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_send \- Send a message. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&int\fR 8 | \fB\&HPL_send(\fR 9 | \fB\&double *\fR 10 | \fI\&SBUF\fR, 11 | \fB\&int\fR 12 | \fI\&SCOUNT\fR, 13 | \fB\&int\fR 14 | \fI\&DEST\fR, 15 | \fB\&int\fR 16 | \fI\&STAG\fR, 17 | \fB\&MPI_Comm\fR 18 | \fI\&COMM\fR 19 | \fB\&);\fR 20 | .SH DESCRIPTION 21 | \fB\&HPL_send\fR 22 | is a simple wrapper around MPI_Send. Its main purpose is 23 | to allow for some experimentation / tuning of this simple routine. 24 | Successful completion is indicated by the returned error code 25 | MPI_SUCCESS. In the case of messages of length less than or equal to 26 | zero, this function returns immediately. 27 | .SH ARGUMENTS 28 | .TP 8 29 | SBUF (local input) double * 30 | On entry, SBUF specifies the starting address of buffer to be 31 | sent. 32 | .TP 8 33 | SCOUNT (local input) int 34 | On entry, SCOUNT specifies the number of double precision 35 | entries in SBUF. SCOUNT must be at least zero. 36 | .TP 8 37 | DEST (local input) int 38 | On entry, DEST specifies the rank of the receiving process in 39 | the communication space defined by COMM. 40 | .TP 8 41 | STAG (local input) int 42 | On entry, STAG specifies the message tag to be used for this 43 | communication operation. 44 | .TP 8 45 | COMM (local input) MPI_Comm 46 | The MPI communicator identifying the communication space. 47 | .SH SEE ALSO 48 | .BR HPL_recv \ (3), 49 | .BR HPL_sendrecv \ (3). 50 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_setran.3: -------------------------------------------------------------------------------- 1 | .TH HPL_setran 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_setran \- Manage the random number generator. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_setran(\fR 9 | \fB\&const int\fR 10 | \fI\&OPTION\fR, 11 | \fB\&int *\fR 12 | \fI\&IRAN\fR 13 | \fB\&);\fR 14 | .SH DESCRIPTION 15 | \fB\&HPL_setran\fR 16 | initializes the random generator with the encoding of the 17 | first number X(0) in the sequence, and the constants a and c used to 18 | compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), 19 | a and c are stored in the static variables irand, ias and ics. When 20 | OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the 21 | values of the input array IRAN. When OPTION is 3, IRAN is set to the 22 | current value of irand, and irand is then incremented. 23 | .SH ARGUMENTS 24 | .TP 8 25 | OPTION (local input) const int 26 | On entry, OPTION is an integer that specifies the operations 27 | to be performed on the random generator as specified above. 28 | .TP 8 29 | IRAN (local input/output) int * 30 | On entry, IRAN is an array of dimension 2, that contains the 31 | 16-lower and 15-higher bits of a random number. 32 | .SH SEE ALSO 33 | .BR HPL_ladd \ (3), 34 | .BR HPL_lmul \ (3), 35 | .BR HPL_xjumpm \ (3), 36 | .BR HPL_jumpit \ (3), 37 | .BR HPL_rand \ (3). 38 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_sum.3: -------------------------------------------------------------------------------- 1 | .TH HPL_sum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_sum \- Combine (sum) two buffers. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_sum(\fR 9 | \fB\&const int\fR 10 | \fI\&N\fR, 11 | \fB\&const void *\fR 12 | \fI\&IN\fR, 13 | \fB\&void *\fR 14 | \fI\&INOUT\fR, 15 | \fB\&const HPL_T_TYPE\fR 16 | \fI\&DTYPE\fR 17 | \fB\&);\fR 18 | .SH DESCRIPTION 19 | \fB\&HPL_sum\fR 20 | combines (sum) two buffers. 21 | .SH ARGUMENTS 22 | .TP 8 23 | N (input) const int 24 | On entry, N specifies the length of the buffers to be 25 | combined. N must be at least zero. 26 | .TP 8 27 | IN (input) const void * 28 | On entry, IN points to the input-only buffer to be combined. 29 | .TP 8 30 | INOUT (input/output) void * 31 | On entry, INOUT points to the input-output buffer to be 32 | combined. On exit, the entries of this array contains the 33 | combined results. 34 | .TP 8 35 | DTYPE (input) const HPL_T_TYPE 36 | On entry, DTYPE specifies the type of the buffers operands. 37 | .SH SEE ALSO 38 | .BR HPL_broadcast \ (3), 39 | .BR HPL_reduce \ (3), 40 | .BR HPL_all_reduce \ (3), 41 | .BR HPL_barrier \ (3), 42 | .BR HPL_min \ (3), 43 | .BR HPL_max \ (3), 44 | .BR HPL_sum \ (3). 45 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_timer.3: -------------------------------------------------------------------------------- 1 | .TH HPL_timer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_timer \- Timer facility. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_timer(\fR 9 | \fB\&const int\fR 10 | \fI\&I\fR 11 | \fB\&);\fR 12 | .SH DESCRIPTION 13 | \fB\&HPL_timer\fR 14 | provides a "stopwatch" functionality cpu/wall timer in 15 | seconds. Up to 64 separate timers can be functioning at once. The 16 | first call starts the timer, and the second stops it. This routine 17 | can be disenabled by calling HPL_timer_disable(), so that calls to 18 | the timer are ignored. This feature can be used to make sure certain 19 | sections of code do not affect timings, even if they call routines 20 | which have HPL_timer calls in them. HPL_timer_enable() will re-enable 21 | the timer functionality. One can retrieve the current value of a 22 | timer by calling 23 | 24 | t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) 25 | 26 | where I is the timer index in [0..64). To initialize the timer 27 | functionality, one must have called HPL_timer_boot() prior to any of 28 | the functions mentioned above. 29 | .SH ARGUMENTS 30 | .TP 8 31 | I (global input) const int 32 | On entry, I specifies the timer to stop/start. 33 | .SH SEE ALSO 34 | .BR HPL_timer_cputime \ (3), 35 | .BR HPL_timer_walltime \ (3). 36 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_timer_cputime.3: -------------------------------------------------------------------------------- 1 | .TH HPL_timer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_timer_cputime \- Return the CPU time. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&double\fR 8 | \fB\&HPL_timer_cputime();\fR 9 | .SH DESCRIPTION 10 | \fB\&HPL_timer_cputime\fR 11 | returns the cpu time. If HPL_USE_CLOCK is defined, 12 | the clock() function is used to return an approximation of processor 13 | time used by the program. The value returned is the CPU time used so 14 | far as a clock_t; to get the number of seconds used, the result is 15 | divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C 16 | standard library. If HPL_USE_TIMES is defined, the times() function 17 | is used instead. This function returns the current process times. 18 | times() returns the number of clock ticks that have elapsed since the 19 | system has been up. Otherwise and by default, the standard library 20 | function getrusage() is used. 21 | .SH SEE ALSO 22 | .BR HPL_timer_walltime \ (3), 23 | .BR HPL_timer \ (3). 24 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_timer_walltime.3: -------------------------------------------------------------------------------- 1 | .TH HPL_timer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_timer_walltime \- Return the elapsed (wall-clock) time. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&double\fR 8 | \fB\&HPL_timer_walltime();\fR 9 | .SH DESCRIPTION 10 | \fB\&HPL_timer_walltime\fR 11 | returns the elapsed (wall-clock) time. 12 | .SH SEE ALSO 13 | .BR HPL_timer_cputime \ (3), 14 | .BR HPL_timer \ (3). 15 | -------------------------------------------------------------------------------- /hpl/man/man3/HPL_warn.3: -------------------------------------------------------------------------------- 1 | .TH HPL_warn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" 2 | .SH NAME 3 | HPL_warn \- displays an error message. 4 | .SH SYNOPSIS 5 | \fB\&#include "hpl.h"\fR 6 | 7 | \fB\&void\fR 8 | \fB\&HPL_warn(\fR 9 | \fB\&FILE *\fR 10 | \fI\&STREAM\fR, 11 | \fB\&int\fR 12 | \fI\&LINE\fR, 13 | \fB\&const char *\fR 14 | \fI\&SRNAME\fR, 15 | \fB\&const char *\fR 16 | \fI\&FORM\fR, 17 | \fB\&...\fR 18 | \fB\&);\fR 19 | .SH DESCRIPTION 20 | \fB\&HPL_warn\fR 21 | displays an error message. 22 | .SH ARGUMENTS 23 | .TP 8 24 | STREAM (local input) FILE * 25 | On entry, STREAM specifies the output stream. 26 | .TP 8 27 | LINE (local input) int 28 | On entry, LINE specifies the line number in the file where 29 | the error has occured. When LINE is not a positive line 30 | number, it is ignored. 31 | .TP 8 32 | SRNAME (local input) const char * 33 | On entry, SRNAME should be the name of the routine calling 34 | this error handler. 35 | .TP 8 36 | FORM (local input) const char * 37 | On entry, FORM specifies the format, i.e., how the subsequent 38 | arguments are converted for output. 39 | .TP 8 40 | (local input) ... 41 | On entry, ... is the list of arguments to be printed within 42 | the format string. 43 | .SH EXAMPLE 44 | \fI\&#include "hpl.h"\fR 45 | 46 | int main(int argc, char *argv[]) 47 | .br 48 | { 49 | .br 50 | HPL_warn( stderr, __LINE__, __FILE__, 51 | .br 52 | "Demo.\en" ); 53 | .br 54 | exit(0); return(0); 55 | .br 56 | } 57 | .SH SEE ALSO 58 | .BR HPL_abort \ (3), 59 | .BR HPL_fprintf \ (3). 60 | -------------------------------------------------------------------------------- /hpl/testing/ptest/HPL.dat: -------------------------------------------------------------------------------- 1 | HPLinpack benchmark input file 2 | Innovative Computing Laboratory, University of Tennessee 3 | HPL.out output file name (if any) 4 | 6 device out (6=stdout,7=stderr,file) 5 | 4 # of problems sizes (N) 6 | 29 30 34 35 Ns 7 | 4 # of NBs 8 | 1 2 3 4 NBs 9 | 0 PMAP process mapping (0=Row-,1=Column-major) 10 | 3 # of process grids (P x Q) 11 | 2 1 4 Ps 12 | 2 4 1 Qs 13 | 16.0 threshold 14 | 3 # of panel fact 15 | 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) 16 | 2 # of recursive stopping criterium 17 | 2 4 NBMINs (>= 1) 18 | 1 # of panels in recursion 19 | 2 NDIVs 20 | 3 # of recursive panel fact. 21 | 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 22 | 1 # of broadcast 23 | 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 24 | 1 # of lookahead depth 25 | 0 DEPTHs (>=0) 26 | 2 SWAP (0=bin-exch,1=long,2=mix) 27 | 64 swapping threshold 28 | 0 L1 in (0=transposed,1=no-transposed) form 29 | 0 U in (0=transposed,1=no-transposed) form 30 | 1 Equilibration (0=no,1=yes) 31 | 8 memory alignment in double (> 0) 32 | -------------------------------------------------------------------------------- /hpl/www/1rinM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/1rinM.jpg -------------------------------------------------------------------------------- /hpl/www/1ring.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/1ring.jpg -------------------------------------------------------------------------------- /hpl/www/2-273x48.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2-273x48.jpg -------------------------------------------------------------------------------- /hpl/www/2rinM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2rinM.jpg -------------------------------------------------------------------------------- /hpl/www/2ring.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2ring.jpg -------------------------------------------------------------------------------- /hpl/www/HPL_abort.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_abort HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_abort halts execution. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_abort( 16 | int 17 | LINE, 18 | const char * 19 | SRNAME, 20 | const char * 21 | FORM, 22 | ... 23 | ); 24 | 25 |

Description

26 | HPL_abort 27 | displays an error message on stderr and halts execution. 28 | 29 |

Arguments

30 |
31 | LINE    (local input)                 int
32 |         On entry,  LINE  specifies the line  number in the file where
33 |         the  error  has  occured.  When  LINE  is not a positive line
34 |         number, it is ignored.
35 | 
36 |
37 | SRNAME  (local input)                 const char *
38 |         On entry, SRNAME  should  be the name of the routine  calling
39 |         this error handler.
40 | 
41 |
42 | FORM    (local input)                 const char *
43 |         On entry, FORM specifies the format, i.e., how the subsequent
44 |         arguments are converted for output.
45 | 
46 |
47 |         (local input)                 ...
48 |         On entry,  ...  is the list of arguments to be printed within
49 |         the format string.
50 | 
51 | 52 |

Example

53 | #include "hpl.h"

54 |
55 | int main(int argc, char *argv[])
56 | {
57 |    HPL_abort( __LINE__, __FILE__, "Halt.\n" );
58 |    exit(0); return(0);
59 | }
60 | 
61 | 62 |

See Also

63 | HPL_fprintf, 64 | HPL_warn. 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hpl/www/HPL_all_reduce.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_all_reduce HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_all_reduce All reduce operation. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_all_reduce( 16 | void * 17 | BUFFER, 18 | const int 19 | COUNT, 20 | const HPL_T_TYPE 21 | DTYPE, 22 | const HPL_T_OP 23 | OP, 24 | MPI_Comm 25 | COMM 26 | ); 27 | 28 |

Description

29 | HPL_all_reduce 30 | performs a global reduce operation across all 31 | processes of a group leaving the results on all processes. 32 | 33 |

Arguments

34 |
35 | BUFFER  (local input/global output)   void *
36 |         On entry,  BUFFER  points to  the  buffer to be combined.  On
37 |         exit, this array contains the combined data and  is identical
38 |         on all processes in the group.
39 | 
40 |
41 | COUNT   (global input)                const int
42 |         On entry,  COUNT  indicates the number of entries in  BUFFER.
43 |         COUNT must be at least zero.
44 | 
45 |
46 | DTYPE   (global input)                const HPL_T_TYPE
47 |         On entry,  DTYPE  specifies the type of the buffers operands.
48 | 
49 |
50 | OP      (global input)                const HPL_T_OP 
51 |         On entry, OP is a pointer to the local combine function.
52 | 
53 |
54 | COMM    (global/local input)          MPI_Comm
55 |         The MPI communicator identifying the process collection.
56 | 
57 | 58 |

See Also

59 | HPL_broadcast, 60 | HPL_reduce, 61 | HPL_barrier, 62 | HPL_min, 63 | HPL_max, 64 | HPL_sum. 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hpl/www/HPL_barrier.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_barrier HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_barrier Barrier operation. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_barrier( 16 | MPI_Comm 17 | COMM 18 | ); 19 | 20 |

Description

21 | HPL_barrier 22 | blocks the caller until all process members have call it. 23 | The call returns at any process only after all group members have 24 | entered the call. 25 | 26 |

Arguments

27 |
28 | COMM    (global/local input)          MPI_Comm
29 |         The MPI communicator identifying the process collection.
30 | 
31 | 32 |

See Also

33 | HPL_broadcast, 34 | HPL_reduce, 35 | HPL_all_reduce, 36 | HPL_min, 37 | HPL_max, 38 | HPL_sum. 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /hpl/www/HPL_bcast.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_bcast HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_bcast Perform the row broadcast. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_bcast( 16 | HPL_T_panel * 17 | PANEL, 18 | int * 19 | IFLAG 20 | ); 21 | 22 |

Description

23 | HPL_bcast 24 | broadcasts the current panel. Successful completion is 25 | indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to 26 | HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was 27 | not completed, in which case this function should be called again. 28 | 29 |

Arguments

30 |
31 | PANEL   (input/output)                HPL_T_panel *
32 |         On entry,  PANEL  points to the  current panel data structure
33 |         being broadcast.
34 | 
35 |
36 | IFLAG   (output)                      int *
37 |         On exit,  IFLAG  indicates  whether  or not the broadcast has
38 |         occured.
39 | 
40 | 41 |

See Also

42 | HPL_binit, 43 | HPL_bwait. 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /hpl/www/HPL_binit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_binit HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_binit Initialize the row broadcast. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_binit( 16 | HPL_T_panel * 17 | PANEL 18 | ); 19 | 20 |

Description

21 | HPL_binit 22 | initializes a row broadcast. Successful completion is 23 | indicated by the returned error code HPL_SUCCESS. 24 | 25 |

Arguments

26 |
27 | PANEL   (input/output)                HPL_T_panel *
28 |         On entry,  PANEL  points to the  current panel data structure
29 |         being broadcast.
30 | 
31 | 32 |

See Also

33 | HPL_bcast, 34 | HPL_bwait. 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /hpl/www/HPL_broadcast.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_broadcast HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_broadcast Broadcast operation. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_broadcast( 16 | void * 17 | BUFFER, 18 | const int 19 | COUNT, 20 | const HPL_T_TYPE 21 | DTYPE, 22 | const int 23 | ROOT, 24 | MPI_Comm 25 | COMM 26 | ); 27 | 28 |

Description

29 | HPL_broadcast 30 | broadcasts a message from the process with rank ROOT to 31 | all processes in the group. 32 | 33 |

Arguments

34 |
35 | BUFFER  (local input/output)          void *
36 |         On entry,  BUFFER  points to  the  buffer to be broadcast. On
37 |         exit, this array contains the broadcast data and is identical
38 |         on all processes in the group.
39 | 
40 |
41 | COUNT   (global input)                const int
42 |         On entry,  COUNT  indicates the number of entries in  BUFFER.
43 |         COUNT must be at least zero.
44 | 
45 |
46 | DTYPE   (global input)                const HPL_T_TYPE
47 |         On entry,  DTYPE  specifies the type of the buffers operands.
48 | 
49 |
50 | ROOT    (global input)                const int
51 |         On entry, ROOT is the coordinate of the source process.
52 | 
53 |
54 | COMM    (global/local input)          MPI_Comm
55 |         The MPI communicator identifying the process collection.
56 | 
57 | 58 |

See Also

59 | HPL_reduce, 60 | HPL_all_reduce, 61 | HPL_barrier, 62 | HPL_min, 63 | HPL_max, 64 | HPL_sum. 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hpl/www/HPL_bwait.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_bwait HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_bwait Finalize the row broadcast. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_bwait( 16 | HPL_T_panel * 17 | PANEL 18 | ); 19 | 20 |

Description

21 | HPL_bwait 22 | HPL_bwait waits for the row broadcast of the current panel to 23 | terminate. Successful completion is indicated by the returned error 24 | code HPL_SUCCESS. 25 | 26 |

Arguments

27 |
28 | PANEL   (input/output)                HPL_T_panel *
29 |         On entry,  PANEL  points to the  current panel data structure
30 |         being broadcast.
31 | 
32 | 33 |

See Also

34 | HPL_binit, 35 | HPL_bcast. 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /hpl/www/HPL_copyL.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_copyL HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_copyL Copy the current panel into a contiguous workspace. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_copyL( 16 | HPL_T_panel * 17 | PANEL 18 | ); 19 | 20 |

Description

21 | HPL_copyL 22 | copies the panel of columns, the L1 replicated submatrix, 23 | the pivot array and the info scalar into a contiguous workspace for 24 | later broadcast. 25 | 26 | The copy of this panel into a contiguous buffer can be enforced by 27 | specifying -DHPL_COPY_L in the architecture specific Makefile. 28 | 29 |

Arguments

30 |
31 | PANEL   (input/output)                HPL_T_panel *
32 |         On entry,  PANEL  points to the  current panel data structure
33 |         being broadcast.
34 | 
35 | 36 |

See Also

37 | HPL_binit, 38 | HPL_bcast, 39 | HPL_bwait. 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /hpl/www/HPL_fprintf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_fprintf HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_fprintf fprintf + fflush wrapper. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_fprintf( 16 | FILE * 17 | STREAM, 18 | const char * 19 | FORM, 20 | ... 21 | ); 22 | 23 |

Description

24 | HPL_fprintf 25 | is a wrapper around fprintf flushing the output stream. 26 | 27 |

Arguments

28 |
29 | STREAM  (local input)                 FILE *
30 |         On entry, STREAM specifies the output stream.
31 | 
32 |
33 | FORM    (local input)                 const char *
34 |         On entry, FORM specifies the format, i.e., how the subsequent
35 |         arguments are converted for output.
36 | 
37 |
38 |         (local input)                 ...
39 |         On entry,  ...  is the list of arguments to be printed within
40 |         the format string.
41 | 
42 | 43 |

Example

44 | #include "hpl.h"

45 |
46 | int main(int argc, char *argv[])
47 | {
48 |    HPL_fprintf( stdout, "Hello World.\n" );
49 |    exit(0); return(0);
50 | }
51 | 
52 | 53 |

See Also

54 | HPL_abort, 55 | HPL_warn. 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /hpl/www/HPL_grid_exit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_grid_exit HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_grid_exit Exit process grid. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_grid_exit( 16 | HPL_T_grid * 17 | GRID 18 | ); 19 | 20 |

Description

21 | HPL_grid_exit 22 | marks the process grid object for deallocation. The 23 | returned error code MPI_SUCCESS indicates successful completion. 24 | Other error codes are (MPI) implementation dependent. 25 | 26 |

Arguments

27 |
28 | GRID    (local input/output)          HPL_T_grid *
29 |         On entry,  GRID  points  to the data structure containing the
30 |         process grid to be released.
31 | 
32 | 33 |

See Also

34 | HPL_pnum, 35 | HPL_grid_init, 36 | HPL_grid_info. 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /hpl/www/HPL_idamax.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_idamax HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_idamax 1st k s.t. |x_k| = max_i(|x_i|). 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_idamax( 16 | const int 17 | N, 18 | const double * 19 | X, 20 | const int 21 | INCX 22 | ); 23 | 24 |

Description

25 | HPL_idamax 26 | returns the index in an n-vector x of the first element 27 | having maximum absolute value. 28 | 29 |

Arguments

30 |
31 | N       (local input)                 const int
32 |         On entry, N specifies the length of the vector x. N  must  be
33 |         at least zero.
34 | 
35 |
36 | X       (local input)                 const double *
37 |         On entry,  X  is an incremented array of dimension  at  least
38 |         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
39 | 
40 |
41 | INCX    (local input)                 const int
42 |         On entry, INCX specifies the increment for the elements of X.
43 |         INCX must not be zero.
44 | 
45 | 46 |

Example

47 | #include "hpl.h"

48 |
49 | int main(int argc, char *argv[])
50 | {
51 |    double x[3];
52 |    int    imax;
53 |    x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
54 |    imax = HPL_idamax( 3, x, 1 );
55 |    printf("imax=%d\n", imax);
56 |    exit(0);
57 |    return(0);
58 | }
59 | 
60 | 61 |

See Also

62 | HPL_daxpy, 63 | HPL_dcopy, 64 | HPL_dscal, 65 | HPL_dswap. 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /hpl/www/HPL_jumpit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_jumpit HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_jumpit jump into the random sequence. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_jumpit( 16 | int * 17 | MULT, 18 | int * 19 | IADD, 20 | int * 21 | IRANN, 22 | int * 23 | IRANM 24 | ); 25 | 26 |

Description

27 | HPL_jumpit 28 | jumps in the random sequence from the number X(n) encoded 29 | in IRANN to the number X(m) encoded in IRANM using the constants A 30 | and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A 31 | and C obviously depend on m and n, see the function HPL_xjumpm in 32 | order to initialize them. 33 | 34 |

Arguments

35 |
36 | MULT    (local input)                 int *
37 |         On entry, MULT is an array of dimension 2, that contains the
38 |         16-lower and 15-higher bits of the constant A.
39 | 
40 |
41 | IADD    (local input)                 int *
42 |         On entry, IADD is an array of dimension 2, that contains the
43 |         16-lower and 15-higher bits of the constant C.
44 | 
45 |
46 | IRANN   (local input)                 int *
47 |         On entry,  IRANN  is an array of dimension 2,  that contains 
48 |         the 16-lower and 15-higher bits of the encoding of X(n).
49 | 
50 |
51 | IRANM   (local output)                int *
52 |         On entry,  IRANM  is an array of dimension 2.  On exit, this
53 |         array contains respectively the 16-lower and  15-higher bits
54 |         of the encoding of X(m).
55 | 
56 | 57 |

See Also

58 | HPL_ladd, 59 | HPL_lmul, 60 | HPL_setran, 61 | HPL_xjumpm, 62 | HPL_rand. 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /hpl/www/HPL_ladd.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_ladd HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_ladd Adds two long positive integers. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_ladd( 16 | int * 17 | J, 18 | int * 19 | K, 20 | int * 21 | I 22 | ); 23 | 24 |

Description

25 | HPL_ladd 26 | adds without carry two long positive integers K and J and 27 | puts the result into I. The long integers I, J, K are encoded on 64 28 | bits using an array of 2 integers. The 32-lower bits are stored in 29 | the first entry of each array, the 32-higher bits in the second 30 | entry. 31 | 32 |

Arguments

33 |
34 | J       (local input)                 int *
35 |         On entry, J is an integer array of dimension 2 containing the
36 |         encoded long integer J.
37 | 
38 |
39 | K       (local input)                 int *
40 |         On entry, K is an integer array of dimension 2 containing the
41 |         encoded long integer K.
42 | 
43 |
44 | I       (local output)                int *
45 |         On entry, I is an integer array of dimension 2. On exit, this
46 |         array contains the encoded long integer result.
47 | 
48 | 49 |

See Also

50 | HPL_lmul, 51 | HPL_setran, 52 | HPL_xjumpm, 53 | HPL_jumpit, 54 | HPL_rand. 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /hpl/www/HPL_lmul.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_lmul HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_lmul multiplies 2 long positive integers. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_lmul( 16 | int * 17 | K, 18 | int * 19 | J, 20 | int * 21 | I 22 | ); 23 | 24 |

Description

25 | HPL_lmul 26 | multiplies without carry two long positive integers K and J 27 | and puts the result into I. The long integers I, J, K are encoded on 28 | 64 bits using an array of 2 integers. The 32-lower bits are stored in 29 | the first entry of each array, the 32-higher bits in the second entry 30 | of each array. For efficiency purposes, the intrisic modulo function 31 | is inlined. 32 | 33 |

Arguments

34 |
35 | K       (local input)                 int *
36 |         On entry, K is an integer array of dimension 2 containing the
37 |         encoded long integer K.
38 | 
39 |
40 | J       (local input)                 int *
41 |         On entry, J is an integer array of dimension 2 containing the
42 |         encoded long integer J.
43 | 
44 |
45 | I       (local output)                int *
46 |         On entry, I is an integer array of dimension 2. On exit, this
47 |         array contains the encoded long integer result.
48 | 
49 | 50 |

See Also

51 | HPL_ladd, 52 | HPL_setran, 53 | HPL_xjumpm, 54 | HPL_jumpit, 55 | HPL_rand. 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /hpl/www/HPL_max.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_max HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_max Combine (max) two buffers. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_max( 16 | const int 17 | N, 18 | const void * 19 | IN, 20 | void * 21 | INOUT, 22 | const HPL_T_TYPE 23 | DTYPE 24 | ); 25 | 26 |

Description

27 | HPL_max 28 | combines (max) two buffers. 29 | 30 |

Arguments

31 |
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | 
36 |
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | 
40 |
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | 
46 |
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | 
50 | 51 |

See Also

52 | HPL_broadcast, 53 | HPL_reduce, 54 | HPL_all_reduce, 55 | HPL_barrier, 56 | HPL_min, 57 | HPL_sum. 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /hpl/www/HPL_min.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_min HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_min Combine (min) two buffers. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_min( 16 | const int 17 | N, 18 | const void * 19 | IN, 20 | void * 21 | INOUT, 22 | const HPL_T_TYPE 23 | DTYPE 24 | ); 25 | 26 |

Description

27 | HPL_min 28 | combines (min) two buffers. 29 | 30 |

Arguments

31 |
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | 
36 |
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | 
40 |
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | 
46 |
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | 
50 | 51 |

See Also

52 | HPL_broadcast, 53 | HPL_reduce, 54 | HPL_all_reduce, 55 | HPL_barrier, 56 | HPL_max, 57 | HPL_sum. 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /hpl/www/HPL_pabort.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pabort HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pabort halts execution. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pabort( 16 | int 17 | LINE, 18 | const char * 19 | SRNAME, 20 | const char * 21 | FORM, 22 | ... 23 | ); 24 | 25 |

Description

26 | HPL_pabort 27 | displays an error message on stderr and halts execution. 28 | 29 |

Arguments

30 |
31 | LINE    (local input)                 int
32 |         On entry,  LINE  specifies the line  number in the file where
33 |         the  error  has  occured.  When  LINE  is not a positive line
34 |         number, it is ignored.
35 | 
36 |
37 | SRNAME  (local input)                 const char *
38 |         On entry, SRNAME  should  be the name of the routine  calling
39 |         this error handler.
40 | 
41 |
42 | FORM    (local input)                 const char *
43 |         On entry, FORM specifies the format, i.e., how the subsequent
44 |         arguments are converted for output.
45 | 
46 |
47 |         (local input)                 ...
48 |         On entry,  ...  is the list of arguments to be printed within
49 |         the format string.
50 | 
51 | 52 |

See Also

53 | HPL_fprintf, 54 | HPL_pwarn. 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /hpl/www/HPL_packL.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_packL HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_packL Form the MPI structure for the row ring broadcasts. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_packL( 16 | HPL_T_panel * 17 | PANEL, 18 | const int 19 | INDEX, 20 | const int 21 | LEN, 22 | const int 23 | IBUF 24 | ); 25 | 26 |

Description

27 | HPL_packL 28 | forms the MPI data type for the panel to be broadcast. 29 | Successful completion is indicated by the returned error code 30 | MPI_SUCCESS. 31 | 32 |

Arguments

33 |
34 | PANEL   (input/output)                HPL_T_panel *
35 |         On entry,  PANEL  points to the  current panel data structure
36 |         being broadcast.
37 | 
38 |
39 | INDEX   (input)                       const int
40 |         On entry,  INDEX  points  to  the  first entry of the  packed
41 |         buffer being broadcast.
42 | 
43 |
44 | LEN     (input)                       const int
45 |         On entry, LEN is the length of the packed buffer.
46 | 
47 |
48 | IBUF    (input)                       const int
49 |         On entry, IBUF  specifies the panel buffer/count/type entries
50 |         that should be initialized.
51 | 
52 | 53 |

See Also

54 | HPL_binit, 55 | HPL_bcast, 56 | HPL_bwait. 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /hpl/www/HPL_pddriver.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | main HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | main HPL main timing program. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | main(); 16 | 17 |

Description

18 | main 19 | is the main driver program for testing the HPL routines. 20 | This program is driven by a short data file named "HPL.dat". 21 | 22 |

See Also

23 | HPL_pdinfo, 24 | HPL_pdtest. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdgesv.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdgesv HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdgesv Solve A x = b. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pdgesv( 16 | HPL_T_grid * 17 | GRID, 18 | HPL_T_palg * 19 | ALGO, 20 | HPL_T_pmat * 21 | A 22 | ); 23 | 24 |

Description

25 | HPL_pdgesv 26 | factors a N+1-by-N matrix using LU factorization with row 27 | partial pivoting. The main algorithm is the "right looking" variant 28 | with or without look-ahead. The lower triangular factor is left 29 | unpivoted and the pivots are not returned. The right hand side is the 30 | N+1 column of the coefficient matrix. 31 | 32 |

Arguments

33 |
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | 
38 |
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | 
43 |
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | 
48 | 49 |

See Also

50 | HPL_pdgesv0, 51 | HPL_pdgesvK1, 52 | HPL_pdgesvK2, 53 | HPL_pdtrsv. 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdgesv0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdgesv0 HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdgesv0 Factor an N x N+1 matrix. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pdgesv0( 16 | HPL_T_grid * 17 | GRID, 18 | HPL_T_palg * 19 | ALGO, 20 | HPL_T_pmat * 21 | A 22 | ); 23 | 24 |

Description

25 | HPL_pdgesv0 26 | factors a N+1-by-N matrix using LU factorization with row 27 | partial pivoting. The main algorithm is the "right looking" variant 28 | without look-ahead. The lower triangular factor is left unpivoted and 29 | the pivots are not returned. The right hand side is the N+1 column of 30 | the coefficient matrix. 31 | 32 |

Arguments

33 |
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | 
38 |
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | 
43 |
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | 
48 | 49 |

See Also

50 | HPL_pdgesv, 51 | HPL_pdgesvK1, 52 | HPL_pdgesvK2, 53 | HPL_pdfact, 54 | HPL_binit, 55 | HPL_bcast, 56 | HPL_bwait, 57 | HPL_pdupdateNN, 58 | HPL_pdupdateNT, 59 | HPL_pdupdateTN, 60 | HPL_pdupdateTT. 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdgesvK1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdgesvK1 HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdgesvK1 Factor an N x N+1 matrix. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pdgesvK1( 16 | HPL_T_grid * 17 | GRID, 18 | HPL_T_palg * 19 | ALGO, 20 | HPL_T_pmat * 21 | A 22 | ); 23 | 24 |

Description

25 | HPL_pdgesvK1 26 | factors a N+1-by-N matrix using LU factorization with row 27 | partial pivoting. The main algorithm is the "right looking" variant 28 | with look-ahead. The lower triangular factor is left unpivoted and 29 | the pivots are not returned. The right hand side is the N+1 column of 30 | the coefficient matrix. 31 | 32 |

Arguments

33 |
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | 
38 |
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | 
43 |
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | 
48 | 49 |

See Also

50 | HPL_pdgesv, 51 | HPL_pdgesvK2, 52 | HPL_pdfact, 53 | HPL_binit, 54 | HPL_bcast, 55 | HPL_bwait, 56 | HPL_pdupdateNN, 57 | HPL_pdupdateNT, 58 | HPL_pdupdateTN, 59 | HPL_pdupdateTT. 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdgesvK2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdgesvK2 HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdgesvK2 Factor an N x N+1 matrix. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pdgesvK2( 16 | HPL_T_grid * 17 | GRID, 18 | HPL_T_palg * 19 | ALGO, 20 | HPL_T_pmat * 21 | A 22 | ); 23 | 24 |

Description

25 | HPL_pdgesvK2 26 | factors a N+1-by-N matrix using LU factorization with row 27 | partial pivoting. The main algorithm is the "right looking" variant 28 | with look-ahead. The lower triangular factor is left unpivoted and 29 | the pivots are not returned. The right hand side is the N+1 column of 30 | the coefficient matrix. 31 | 32 |

Arguments

33 |
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | 
38 |
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | 
43 |
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | 
48 | 49 |

See Also

50 | HPL_pdgesv, 51 | HPL_pdgesv0, 52 | HPL_pdgesvK1, 53 | HPL_pdfact, 54 | HPL_binit, 55 | HPL_bcast, 56 | HPL_bwait, 57 | HPL_pdupdateNN, 58 | HPL_pdupdateNT, 59 | HPL_pdupdateTN, 60 | HPL_pdupdateTT. 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdpanel_disp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdpanel_disp HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdpanel_disp Deallocate a panel data structure. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_pdpanel_disp( 16 | HPL_T_panel * * 17 | PANEL 18 | ); 19 | 20 |

Description

21 | HPL_pdpanel_disp 22 | deallocates the panel structure and resources and 23 | stores the error code returned by the panel factorization. 24 | 25 |

Arguments

26 |
27 | PANEL   (local input/output)          HPL_T_panel * *
28 |         On entry,  PANEL  points  to  the  address  of the panel data
29 |         structure to be deallocated.
30 | 
31 | 32 |

See Also

33 | HPL_pdpanel_new, 34 | HPL_pdpanel_init, 35 | HPL_pdpanel_free. 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /hpl/www/HPL_pdpanel_free.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pdpanel_free HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pdpanel_free Deallocate the panel ressources. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_pdpanel_free( 16 | HPL_T_panel * 17 | PANEL 18 | ); 19 | 20 |

Description

21 | HPL_pdpanel_free 22 | deallocates the panel resources and stores the error 23 | code returned by the panel factorization. 24 | 25 |

Arguments

26 |
27 | PANEL   (local input/output)          HPL_T_panel *
28 |         On entry,  PANEL  points  to  the  panel data  structure from
29 |         which the resources should be deallocated.
30 | 
31 | 32 |

See Also

33 | HPL_pdpanel_new, 34 | HPL_pdpanel_init, 35 | HPL_pdpanel_disp. 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /hpl/www/HPL_pnum.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pnum HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pnum Rank determination. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_pnum( 16 | const HPL_T_grid * 17 | GRID, 18 | const int 19 | MYROW, 20 | const int 21 | MYCOL 22 | ); 23 | 24 |

Description

25 | HPL_pnum 26 | determines the rank of a process as a function of its 27 | coordinates in the grid. 28 | 29 |

Arguments

30 |
31 | GRID    (local input)                 const HPL_T_grid *
32 |         On entry,  GRID  points  to the data structure containing the
33 |         process grid information.
34 | 
35 |
36 | MYROW   (local input)                 const int
37 |         On entry,  MYROW  specifies the row coordinate of the process
38 |         whose rank is to be determined. MYROW must be greater than or
39 |         equal to zero and less than NPROW.
40 | 
41 |
42 | MYCOL   (local input)                 const int
43 |         On entry,  MYCOL  specifies  the  column  coordinate  of  the
44 |         process whose rank is to be determined. MYCOL must be greater
45 |         than or equal to zero and less than NPCOL.
46 | 
47 | 48 |

See Also

49 | HPL_grid_init, 50 | HPL_grid_info, 51 | HPL_grid_exit. 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /hpl/www/HPL_ptimer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_ptimer HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_ptimer Timer facility. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_ptimer( 16 | const int 17 | I 18 | ); 19 | 20 |

Description

21 | HPL_ptimer 22 | provides a "stopwatch" functionality cpu/wall timer in 23 | seconds. Up to 64 separate timers can be functioning at once. The 24 | first call starts the timer, and the second stops it. This routine 25 | can be disenabled by calling HPL_ptimer_disable(), so that calls to 26 | the timer are ignored. This feature can be used to make sure certain 27 | sections of code do not affect timings, even if they call routines 28 | which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable 29 | the timer functionality. One can retrieve the current value of a 30 | timer by calling 31 | 32 | t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) 33 | 34 | where I is the timer index in [0..64). To inititialize the timer 35 | functionality, one must have called HPL_ptimer_boot() prior to any of 36 | the functions mentioned above. 37 | 38 |

Arguments

39 |
40 | I       (global input)                const int
41 |         On entry, I specifies the timer to stop/start.
42 | 
43 | 44 |

See Also

45 | HPL_ptimer_cputime, 46 | HPL_ptimer_walltime. 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /hpl/www/HPL_ptimer_cputime.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_ptimer_cputime HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_ptimer_cputime Return the CPU time. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | double 15 | HPL_ptimer_cputime(); 16 | 17 |

Description

18 | HPL_ptimer_cputime 19 | returns the cpu time. If HPL_USE_CLOCK is defined, 20 | the clock() function is used to return an approximation of processor 21 | time used by the program. The value returned is the CPU time used so 22 | far as a clock_t; to get the number of seconds used, the result is 23 | divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C 24 | standard library. If HPL_USE_TIMES is defined, the times() function 25 | is used instead. This function returns the current process times. 26 | times() returns the number of clock ticks that have elapsed since the 27 | system has been up. Otherwise and by default, the standard library 28 | function getrusage() is used. 29 | 30 |

See Also

31 | HPL_ptimer_walltime, 32 | HPL_ptimer. 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /hpl/www/HPL_ptimer_walltime.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_ptimer_walltime HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_ptimer_walltime Return the elapsed (wall-clock) time. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | double 15 | HPL_ptimer_walltime(); 16 | 17 |

Description

18 | HPL_ptimer_walltime 19 | returns the elapsed (wall-clock) time. 20 | 21 |

See Also

22 | HPL_ptimer_cputime, 23 | HPL_ptimer. 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /hpl/www/HPL_pwarn.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_pwarn HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_pwarn displays an error message. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_pwarn( 16 | FILE * 17 | STREAM, 18 | int 19 | LINE, 20 | const char * 21 | SRNAME, 22 | const char * 23 | FORM, 24 | ... 25 | ); 26 | 27 |

Description

28 | HPL_pwarn 29 | displays an error message. 30 | 31 |

Arguments

32 |
33 | STREAM  (local input)                 FILE *
34 |         On entry, STREAM specifies the output stream.
35 | 
36 |
37 | LINE    (local input)                 int
38 |         On entry,  LINE  specifies the line  number in the file where
39 |         the  error  has  occured.  When  LINE  is not a positive line
40 |         number, it is ignored.
41 | 
42 |
43 | SRNAME  (local input)                 const char *
44 |         On entry, SRNAME  should  be the name of the routine  calling
45 |         this error handler.
46 | 
47 |
48 | FORM    (local input)                 const char *
49 |         On entry, FORM specifies the format, i.e., how the subsequent
50 |         arguments are converted for output.
51 | 
52 |
53 |         (local input)                 ...
54 |         On entry,  ...  is the list of arguments to be printed within
55 |         the format string.
56 | 
57 | 58 |

See Also

59 | HPL_pabort, 60 | HPL_fprintf. 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /hpl/www/HPL_rand.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_rand HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_rand random number generator. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | double 15 | HPL_rand(); 16 | 17 |

Description

18 | HPL_rand 19 | generates the next number in the random sequence. This 20 | function ensures that this number lies in the interval (-0.5, 0.5]. 21 | 22 | The static array irand contains the information (2 integers) required 23 | to generate the next number in the sequence X(n). This number is 24 | computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the 25 | constant d is the largest 64 bit positive integer. The array irand is 26 | then updated for the generation of the next number X(n+1) in the 27 | random sequence as follows X(n+1) = a * X(n) + c. The constants a and 28 | c should have been preliminarily stored in the arrays ias and ics as 29 | 2 pairs of integers. The initialization of ias, ics and irand is 30 | performed by the function HPL_setran. 31 | 32 |

See Also

33 | HPL_ladd, 34 | HPL_lmul, 35 | HPL_setran, 36 | HPL_xjumpm, 37 | HPL_jumpit. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /hpl/www/HPL_recv.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_recv HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_recv Receive a message. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_recv( 16 | double * 17 | RBUF, 18 | int 19 | RCOUNT, 20 | int 21 | SRC, 22 | int 23 | RTAG, 24 | MPI_Comm 25 | COMM 26 | ); 27 | 28 |

Description

29 | HPL_recv 30 | is a simple wrapper around MPI_Recv. Its main purpose is 31 | to allow for some experimentation / tuning of this simple routine. 32 | Successful completion is indicated by the returned error code 33 | HPL_SUCCESS. In the case of messages of length less than or equal to 34 | zero, this function returns immediately. 35 | 36 |

Arguments

37 |
38 | RBUF    (local output)                double *
39 |         On entry, RBUF specifies the starting address of buffer to be
40 |         received.
41 | 
42 |
43 | RCOUNT  (local input)                 int
44 |         On entry,  RCOUNT  specifies  the number  of double precision
45 |         entries in RBUF. RCOUNT must be at least zero.
46 | 
47 |
48 | SRC     (local input)                 int
49 |         On entry, SRC  specifies the rank of the  sending  process in
50 |         the communication space defined by COMM.
51 | 
52 |
53 | RTAG    (local input)                 int
54 |         On entry,  STAG specifies the message tag to be used for this
55 |         communication operation.
56 | 
57 |
58 | COMM    (local input)                 MPI_Comm
59 |         The MPI communicator identifying the communication space.
60 | 
61 | 62 |

See Also

63 | HPL_send, 64 | HPL_sendrecv. 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hpl/www/HPL_send.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_send HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_send Send a message. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | int 15 | HPL_send( 16 | double * 17 | SBUF, 18 | int 19 | SCOUNT, 20 | int 21 | DEST, 22 | int 23 | STAG, 24 | MPI_Comm 25 | COMM 26 | ); 27 | 28 |

Description

29 | HPL_send 30 | is a simple wrapper around MPI_Send. Its main purpose is 31 | to allow for some experimentation / tuning of this simple routine. 32 | Successful completion is indicated by the returned error code 33 | MPI_SUCCESS. In the case of messages of length less than or equal to 34 | zero, this function returns immediately. 35 | 36 |

Arguments

37 |
38 | SBUF    (local input)                 double *
39 |         On entry, SBUF specifies the starting address of buffer to be
40 |         sent.
41 | 
42 |
43 | SCOUNT  (local input)                 int
44 |         On entry,  SCOUNT  specifies  the number of  double precision
45 |         entries in SBUF. SCOUNT must be at least zero.
46 | 
47 |
48 | DEST    (local input)                 int
49 |         On entry, DEST specifies the rank of the receiving process in
50 |         the communication space defined by COMM.
51 | 
52 |
53 | STAG    (local input)                 int
54 |         On entry,  STAG specifies the message tag to be used for this
55 |         communication operation.
56 | 
57 |
58 | COMM    (local input)                 MPI_Comm
59 |         The MPI communicator identifying the communication space.
60 | 
61 | 62 |

See Also

63 | HPL_recv, 64 | HPL_sendrecv. 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hpl/www/HPL_setran.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_setran HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_setran Manage the random number generator. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_setran( 16 | const int 17 | OPTION, 18 | int * 19 | IRAN 20 | ); 21 | 22 |

Description

23 | HPL_setran 24 | initializes the random generator with the encoding of the 25 | first number X(0) in the sequence, and the constants a and c used to 26 | compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), 27 | a and c are stored in the static variables irand, ias and ics. When 28 | OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the 29 | values of the input array IRAN. When OPTION is 3, IRAN is set to the 30 | current value of irand, and irand is then incremented. 31 | 32 |

Arguments

33 |
34 | OPTION  (local input)                 const int
35 |         On entry, OPTION  is an integer that specifies the operations
36 |         to be performed on the random generator as specified above.
37 | 
38 |
39 | IRAN    (local input/output)          int *
40 |         On entry,  IRAN is an array of dimension 2, that contains the
41 |         16-lower and 15-higher bits of a random number.
42 | 
43 | 44 |

See Also

45 | HPL_ladd, 46 | HPL_lmul, 47 | HPL_xjumpm, 48 | HPL_jumpit, 49 | HPL_rand. 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /hpl/www/HPL_sum.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_sum HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_sum Combine (sum) two buffers. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_sum( 16 | const int 17 | N, 18 | const void * 19 | IN, 20 | void * 21 | INOUT, 22 | const HPL_T_TYPE 23 | DTYPE 24 | ); 25 | 26 |

Description

27 | HPL_sum 28 | combines (sum) two buffers. 29 | 30 |

Arguments

31 |
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | 
36 |
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | 
40 |
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | 
46 |
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | 
50 | 51 |

See Also

52 | HPL_broadcast, 53 | HPL_reduce, 54 | HPL_all_reduce, 55 | HPL_barrier, 56 | HPL_min, 57 | HPL_max, 58 | HPL_sum. 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /hpl/www/HPL_timer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_timer HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_timer Timer facility. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_timer( 16 | const int 17 | I 18 | ); 19 | 20 |

Description

21 | HPL_timer 22 | provides a "stopwatch" functionality cpu/wall timer in 23 | seconds. Up to 64 separate timers can be functioning at once. The 24 | first call starts the timer, and the second stops it. This routine 25 | can be disenabled by calling HPL_timer_disable(), so that calls to 26 | the timer are ignored. This feature can be used to make sure certain 27 | sections of code do not affect timings, even if they call routines 28 | which have HPL_timer calls in them. HPL_timer_enable() will re-enable 29 | the timer functionality. One can retrieve the current value of a 30 | timer by calling 31 | 32 | t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) 33 | 34 | where I is the timer index in [0..64). To initialize the timer 35 | functionality, one must have called HPL_timer_boot() prior to any of 36 | the functions mentioned above. 37 | 38 |

Arguments

39 |
40 | I       (global input)                const int
41 |         On entry, I specifies the timer to stop/start.
42 | 
43 | 44 |

See Also

45 | HPL_timer_cputime, 46 | HPL_timer_walltime. 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /hpl/www/HPL_timer_cputime.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_timer_cputime HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_timer_cputime Return the CPU time. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | double 15 | HPL_timer_cputime(); 16 | 17 |

Description

18 | HPL_timer_cputime 19 | returns the cpu time. If HPL_USE_CLOCK is defined, 20 | the clock() function is used to return an approximation of processor 21 | time used by the program. The value returned is the CPU time used so 22 | far as a clock_t; to get the number of seconds used, the result is 23 | divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C 24 | standard library. If HPL_USE_TIMES is defined, the times() function 25 | is used instead. This function returns the current process times. 26 | times() returns the number of clock ticks that have elapsed since the 27 | system has been up. Otherwise and by default, the standard library 28 | function getrusage() is used. 29 | 30 |

See Also

31 | HPL_timer_walltime, 32 | HPL_timer. 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /hpl/www/HPL_timer_walltime.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_timer_walltime HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_timer_walltime Return the elapsed (wall-clock) time. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | double 15 | HPL_timer_walltime(); 16 | 17 |

Description

18 | HPL_timer_walltime 19 | returns the elapsed (wall-clock) time. 20 | 21 |

See Also

22 | HPL_timer_cputime, 23 | HPL_timer. 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /hpl/www/HPL_warn.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | HPL_warn HPL 2.0 Library Functions September 10, 2008 4 | 5 | 6 | 8 | 9 |

Name

10 | HPL_warn displays an error message. 11 | 12 |

Synopsis

13 | #include "hpl.h"

14 | void 15 | HPL_warn( 16 | FILE * 17 | STREAM, 18 | int 19 | LINE, 20 | const char * 21 | SRNAME, 22 | const char * 23 | FORM, 24 | ... 25 | ); 26 | 27 |

Description

28 | HPL_warn 29 | displays an error message. 30 | 31 |

Arguments

32 |
33 | STREAM  (local input)                 FILE *
34 |         On entry, STREAM specifies the output stream.
35 | 
36 |
37 | LINE    (local input)                 int
38 |         On entry,  LINE  specifies the line  number in the file where
39 |         the  error  has  occured.  When  LINE  is not a positive line
40 |         number, it is ignored.
41 | 
42 |
43 | SRNAME  (local input)                 const char *
44 |         On entry, SRNAME  should  be the name of the routine  calling
45 |         this error handler.
46 | 
47 |
48 | FORM    (local input)                 const char *
49 |         On entry, FORM specifies the format, i.e., how the subsequent
50 |         arguments are converted for output.
51 | 
52 |
53 |         (local input)                 ...
54 |         On entry,  ...  is the list of arguments to be printed within
55 |         the format string.
56 | 
57 | 58 |

Example

59 | #include "hpl.h"

60 |
61 | int main(int argc, char *argv[])
62 | {
63 |    HPL_warn( stderr, __LINE__, __FILE__,
64 |              "Demo.\n" );
65 |    exit(0); return(0);
66 | }
67 | 
68 | 69 |

See Also

70 | HPL_abort, 71 | HPL_fprintf. 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hpl/www/aprunner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/aprunner.gif -------------------------------------------------------------------------------- /hpl/www/main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/main.jpg -------------------------------------------------------------------------------- /hpl/www/mat2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/mat2.jpg -------------------------------------------------------------------------------- /hpl/www/pfact.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/pfact.jpg -------------------------------------------------------------------------------- /hpl/www/roll.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/roll.jpg -------------------------------------------------------------------------------- /hpl/www/rollM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/rollM.jpg -------------------------------------------------------------------------------- /hpl/www/spread.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/spread.jpg -------------------------------------------------------------------------------- /hpl/www/spreadM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/spreadM.jpg -------------------------------------------------------------------------------- /include/hpccver.h: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ 2 | 3 | /* 4 | Version has four components: MAJOR, MINOR, MICRO and RELEASE. 5 | RELEASE is a, b, c, f (alpha, beta, candidate, and final). 6 | */ 7 | #define HPCC_VERSION_MAJOR 1 8 | #define HPCC_VERSION_MINOR 5 9 | #define HPCC_VERSION_MICRO 0 10 | #define HPCC_VERSION_RELEASE 'f' 11 | -------------------------------------------------------------------------------- /python/Makefile: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | # 3 | 4 | PYTHON = python 5 | RM = rm -rf 6 | PYREX = $(HOME)/build/Pyrex-0.9.3/pyrexc.py 7 | 8 | all: mpi.so 9 | 10 | mpi.c: mpi.pyx 11 | $(PYTHON) $(PYREX) mpi.pyx 12 | 13 | mpi.so: mpi.c 14 | $(PYTHON) setup.py build_ext --inplace 15 | 16 | .PHONY: clean dist dist-clean 17 | 18 | clean: 19 | $(RM) mpi.c mpi.so *.o *.a *.pxi 20 | 21 | dist: 22 | cd ../.. ; tar cvf hpcc.tar hpcc/python/mpi.pyx 23 | gzip --best ../hpcc.tar 24 | 25 | dist-clean: clean 26 | $(RM) build *.pyc 27 | -------------------------------------------------------------------------------- /python/Makefile-grig: -------------------------------------------------------------------------------- 1 | # -*- Makefile -*- 2 | 3 | PYTHON_PREFIX=/home/luszczek/install/Python-2.4.1 4 | 5 | CC = /opt/mpich-gm/bin/mpicc 6 | CC = mpicc 7 | CFLAGS = -I$(PYTHON_PREFIX)/include/python2.4 8 | BLACS = /opt/lib/blacsCinit_MPI-LINUX-0.a /opt/lib/blacsF77init_MPI-LINUX-0.a /opt/lib/blacs_MPI-LINUX-0.a 9 | LDFLAGS = -L$(PYTHON_PREFIX)/lib/python2.4/config -L/opt/lib -Xlinker -export-dynamic 10 | LOADLIBES = -lpython2.4 -lpthread -ldl -lutil -lscalapack /opt/lib/lapack_LINUX.a -lf77blas -latlas $(BLACS) -lg2c -lm 11 | LOADLIBES = -lpython2.4 -lpthread -ldl -lutil -lg2c -lm 12 | 13 | hpcc: hpcc.o mpi.o 14 | -------------------------------------------------------------------------------- /python/grig-data/README: -------------------------------------------------------------------------------- 1 | Tests were performed on two dual Intel Xeon EMT64 3.2 GHz computers with 2 GiBytes per processor. 2 | Inteconnect was Myrnet2000 and Lam 7.1.1 over TCP/IP was used as messaging layer. 3 | -------------------------------------------------------------------------------- /python/grig-data/hpccoutf.txt-1procs: -------------------------------------------------------------------------------- 1 | ep_stream_add=2.444186 2 | ep_stream_copy=2.114080 3 | ep_stream_scale=2.091390 4 | ep_stream_triad=0.876273 5 | ep_stream_vector_size=5592405.000000 6 | mpira_errors=0.000000 7 | mpira_gups=0.000010 8 | mpira_time=6770.738119 9 | mpira_updates=67108864.000000 10 | mpira_vtime=44837.195384 11 | -------------------------------------------------------------------------------- /python/grig-data/hpccoutf.txt-2procs: -------------------------------------------------------------------------------- 1 | ep_stream_add=1.161543 2 | ep_stream_copy=1.099663 3 | ep_stream_scale=1.066691 4 | ep_stream_triad=0.487169 5 | ep_stream_vector_size=5592405.000000 6 | mpira_errors=0.000000 7 | mpira_gups=0.000002 8 | mpira_time=38408.924820 9 | mpira_updates=67108864.000000 10 | mpira_vtime=45160.052983 11 | -------------------------------------------------------------------------------- /python/grig-data/hpccoutf.txt-3procs: -------------------------------------------------------------------------------- 1 | ep_stream_add=1.512155 2 | ep_stream_copy=1.310381 3 | ep_stream_scale=1.300129 4 | ep_stream_triad=0.604198 5 | ep_stream_vector_size=5592405.000000 6 | mpira_errors=0.000000 7 | mpira_gups=0.000002 8 | mpira_time=26591.705770 9 | mpira_updates=44739244.000000 10 | mpira_vtime=31486.854541 11 | -------------------------------------------------------------------------------- /python/grig-data/hpccoutf.txt-4procs: -------------------------------------------------------------------------------- 1 | ep_stream_add=1.079364 2 | ep_stream_copy=1.041416 3 | ep_stream_scale=1.013868 4 | ep_stream_triad=0.458820 5 | ep_stream_vector_size=5592405.000000 6 | mpira_errors=0.000000 7 | mpira_gups=0.000002 8 | mpira_time=44599.139596 9 | mpira_updates=67108864.000000 10 | mpira_vtime=48049.464034 11 | -------------------------------------------------------------------------------- /python/hpcc.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ 2 | 3 | #include 4 | 5 | DL_EXPORT(void) init_netlib(void); /*proto*/ 6 | 7 | int 8 | main(int argc, char *argv[]) { 9 | PyObject *pName, *pModule, *pDict, *pFunc; 10 | 11 | Py_Initialize(); 12 | 13 | if (argc < 2) { 14 | fprintf( stderr, "%s file.py\n", argv[0] ); 15 | return 0; 16 | } 17 | 18 | PyRun_SimpleString( "import sys; sys.argv = ['hpcc.py']" ); 19 | initmpi(); 20 | 21 | /* 22 | pName = PyString_FromString("mpi"); 23 | pModule = PyImport_Import(pName); 24 | if (!pModule) { 25 | PyErr_Print(); 26 | return 0; 27 | } 28 | */ 29 | 30 | PyRun_SimpleString( "import sys; sys.path.append('.')" ); 31 | PyRun_SimpleString( "import hpcc; hpcc.main('hpcc.py')" ); 32 | /* 33 | PyRun_SimpleString( "import sys; print sys.path" ); 34 | PyRun_SimpleString( "execfile(\"server.py\")" ); 35 | PyRun_SimpleFile( stderr, argv[1] ); 36 | */ 37 | 38 | Py_Finalize(); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /python/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Sample HPCC Awards Class 2 Submission 4 | 5 | 6 |

Sample HPCC Awards Class 2 Submission

7 |

Overview

8 |

9 | The language for implementation is Python. The messaging used is 10 | MPI. The implemented tests are EP-STREAM and RandomAccess. 11 |

12 |

Files

13 |

14 | The complete implementation of the benchmark tests is in file 15 | hpcc.py (raw source code 16 | is available as well). 17 |

18 |

19 | There is no default (out-of-the-box) interface to MPI in Python 20 | so the submission includes one written in 21 | Pyrex. 22 | It's in the file mpi.pyx 23 |

24 |

25 | Installation procedure should be self-explanatory after reading 26 | a make(1) file. 27 |

28 |

Results

29 | The tests were performed on two dual Intel Xeon EMT64 3.2 GHz computers with 30 | 2 GiBytes per processor. Inteconnect was Myrnet2000 and Lam 7.1.1 over TCP/IP 31 | was used as messaging layer. The output files are: 32 |
    33 |
  1. One MPI process
  2. 34 |
  3. Two MPI processes
  4. 35 |
  5. Three MPI processes
  6. 36 |
  7. Four MPI processes
  8. 37 |
38 | 39 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- mode: Python; tab-width: 4; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- 3 | # 4 | 5 | import os, sys 6 | 7 | from distutils.core import setup, Extension 8 | 9 | module = Extension("mpi", 10 | libraries = ["mpi", "lam"], 11 | sources = ["mpi.c"]) 12 | setup (name = "mpi", 13 | version = "0.1", 14 | description = "MPI binding", 15 | author = "Piotr Luszczek", 16 | author_email = "luszczek __at__ cs __dot__ utk __dot__ edu", 17 | url = "http://icl.cs.utk.edu/hpcc/", 18 | long_description = """MPI Python binding using numarray.""", 19 | ext_modules = [module]) 20 | -------------------------------------------------------------------------------- /src/extfinalize.c: -------------------------------------------------------------------------------- 1 | /* 2 | This routine is called right before MPI_Finalize() and allows finalization 3 | of external software and hardware components. It can be replaced 4 | at the time of installation. A sample implemenation may finialize 5 | proprietary computational and communication libraries. 6 | The parameter "extdata" points to an object of size of a pointer. 7 | "extdata" comes from HPCC_external_init(). 8 | Upon success, the function should return 0. 9 | */ 10 | int 11 | HPCC_external_finalize(int argc, char *argv[], void *extdata) { 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /src/extinit.c: -------------------------------------------------------------------------------- 1 | /* 2 | This routine is called right after MPI_Init() and allows initialization 3 | of external software and hardware components. It can be replaced 4 | at the time of installation. A sample implemenation may initialize 5 | proprietary computational and communication libraries. 6 | The parameter "extdata" points to an object of size of a pointer. 7 | The function may choose to store a pointer to its internal data 8 | and it will be passed to the finalization routine HPCC_external_finalize(). 9 | Upon success, the function should return 0. 10 | */ 11 | int 12 | HPCC_external_init(int argc, char *argv[], void *extdata) { 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /src/noopt.c: -------------------------------------------------------------------------------- 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ 2 | 3 | double 4 | HPCC_dweps() { 5 | double eps, one, half; 6 | 7 | one = 1.0; 8 | half = one / 2.0; 9 | eps = one; 10 | 11 | while (one + eps != one) 12 | eps *= half; 13 | 14 | return eps; 15 | } 16 | 17 | float 18 | HPCC_sweps() { 19 | float eps, one, half; 20 | 21 | one = 1.0f; 22 | half = one / 2.0f; 23 | eps = one; 24 | 25 | while (one + eps != one) 26 | eps *= half; 27 | 28 | return eps; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /tools/hpccoutf.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import re 4 | import sys 5 | 6 | HPCC_out = dict(re.findall(r"^(\w+)=(\d.*)$", sys.stdin.read(), re.MULTILINE)) 7 | 8 | Walk_Order = ( 9 | "HPL_Tflops", "PTRANS_GBs", 10 | "MPIRandomAccess_GUPs", "MPIFFT_Gflops", 11 | "StarSTREAM_Triad*CommWorldProcs", "StarSTREAM_Triad", 12 | "StarDGEMM_Gflops", "RandomlyOrderedRingBandwidth_GBytes", 13 | "RandomlyOrderedRingLatency_usec" ) 14 | 15 | Walk_Units = ( 16 | "Tera Flops per Second", "Tera Bytes per Second", 17 | "Giga Updates per Second", "Tera Flops per Second", 18 | "Tera Bytes per Second", "Giga Bytes per Second", 19 | "Giga Flops per Second", "Giga Bytes per second", 20 | "micro-seconds"); 21 | 22 | Cross_Walk = { 23 | "HPL_Tflops" : "G-HPL", 24 | "PTRANS_GBs" : "G-PTRANS", 25 | "MPIRandomAccess_GUPs" : "G-RandomAccess", 26 | "MPIFFT_Gflops" : "G-FFT", 27 | "StarSTREAM_Triad*CommWorldProcs" : "EP-STREAM Sys", 28 | "CommWorldProcs" : "MPI Processes", 29 | # StarSTREAM_Triad * CommWorldProcs : EP-STREAM Sys 30 | "StarSTREAM_Triad" : "EP-STREAM Triad", 31 | "StarDGEMM_Gflops" : "EP-DGEMM", 32 | "RandomlyOrderedRingBandwidth_GBytes" : "RandomRing Bandwidth", 33 | "RandomlyOrderedRingLatency_usec" : "RandomRing Latency", 34 | } 35 | 36 | def show_all(): 37 | for key in sorted(HPCC_out.keys()): 38 | print key +":", HPCC_out[key] 39 | 40 | def show_web(): 41 | count = 0 42 | for key in Walk_Order: 43 | if key == "StarSTREAM_Triad*CommWorldProcs": 44 | print key, Cross_Walk[key], float(HPCC_out["StarSTREAM_Triad"]) * int(HPCC_out["CommWorldProcs"]), Walk_Units[count] 45 | else: 46 | print key, Cross_Walk[key], HPCC_out[key], Walk_Units[count] 47 | count += 1 48 | 49 | Show_all = 1 50 | Show_web = 0 51 | 52 | if Show_all: 53 | show_all() 54 | 55 | if Show_web: 56 | show_web() 57 | -------------------------------------------------------------------------------- /tools/hpccxml.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | from xml.dom.minidom import parse 5 | 6 | class XMLNODE: 7 | prefix_el = "HPCC:" 8 | 9 | def __init__(self, node): 10 | self.node = node 11 | 12 | def __getattr__(self, name): 13 | prfx = self.prefix_el 14 | if not name.startswith(prfx): 15 | name = prfx + name 16 | 17 | name = name.replace("_", "-") 18 | 19 | for n in self.node.childNodes: 20 | #print "N", name, n.nodeName, n.attributes, n.nodeValue, len(n.childNodes) 21 | if len(n.childNodes) > 0 and n.nodeName == name: 22 | return n.childNodes[0].nodeValue 23 | 24 | class XML: 25 | site_el = "HPCC:Site" 26 | id_el = "HPCC:ID" 27 | 28 | def __init__(self, filename_or_file): 29 | self.dom = parse(filename_or_file) 30 | 31 | def __getitem__(self, idx): 32 | sidx = "%d" % idx 33 | if idx < 1 or idx > 500: 34 | raise ValueError, sidx 35 | for n in self.dom.childNodes[0].childNodes: 36 | if n.ELEMENT_NODE == n.nodeType and n.nodeName == self.site_el: 37 | name = self.id_el 38 | for nn in n.childNodes: 39 | if nn.nodeName == name: 40 | if nn.childNodes[0].nodeValue == sidx: 41 | return XMLNODE(n) 42 | 43 | def min_id(self): return self.minmax_id()[0] 44 | def max_id(self): return self.minmax_id()[1] 45 | def minmax_id(self): 46 | min_idx = 391 47 | max_idx = 1 48 | for n in self.dom.childNodes[0].childNodes: 49 | if n.ELEMENT_NODE == n.nodeType and n.nodeName == self.site_el: 50 | name = self.id_el 51 | for nn in n.childNodes: 52 | if nn.nodeName == name: 53 | idx = int(nn.childNodes[0].nodeValue) 54 | if idx < min_idx: 55 | min_idx = idx 56 | if idx > max_idx: 57 | max_idx = idx 58 | return min_idx, max_idx 59 | 60 | def main(argv): 61 | fname = argv[1] 62 | d = XML(fname) 63 | for idx in range(d.min_id(), d.max_id()+1): 64 | nde = d[idx] 65 | if nde is None: continue 66 | print idx, nde.HPL, nde.SingleMPIProcessDGEMM, nde.HPLNodes 67 | 68 | if "__main__" == __name__: 69 | sys.exit(main(sys.argv)) 70 | -------------------------------------------------------------------------------- /tools/readme.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | if len(sys.argv) > 1: 5 | filename = sys.argv[1] 6 | else: 7 | filename = "README.html" 8 | 9 | d=open(filename).read().replace("","tt {color:navy}\nh2,h3,h4 {color:#527bbd;}\nh2 {border-bottom: 2px solid silver;}\n") 10 | open(filename,"w").write(re.sub("^[.]c000.*",".c000{font-family:monospace;color:navy;}", d, flags=re.MULTILINE)) 11 | -------------------------------------------------------------------------------- /work/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC = g++ 3 | CFLAGS = -pipe -g -W -Wall 4 | CXXFLAGS = -pipe -g -W -Wall 5 | LDFLAGS = -g 6 | 7 | mem4fft: mem4fft.o fft235.o zfft1d.o 8 | 9 | clean: 10 | rm -f *.o mem4fft 11 | -------------------------------------------------------------------------------- /work/conly/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CFLAGS = -g -pipe -W -Wall 3 | LDFLAGS = -g 4 | 5 | mem4fft: fft235.o zfft1d.o 6 | 7 | clean: 8 | rm -f *.o mem4fft 9 | -------------------------------------------------------------------------------- /work/conly/c.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^c'|sort -n -k 2" using 2:3 title "c", \ 4 | sqrt(x)*16 5 | -------------------------------------------------------------------------------- /work/conly/d.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^d'|sort -n -k 2" using 2:3 title "d", \ 4 | sqrt(x) 5 | -------------------------------------------------------------------------------- /work/conly/results.txt: -------------------------------------------------------------------------------- 1 | INPUT[1048576] 1048575 2 | OUTPUT[1048576] 1048575 3 | w1[1048576] 127 4 | w2[1048576] 127 5 | ww[1048576] 1023 6 | ww2[1048576] 1023 7 | ww3[1048576] 1023 8 | ww4[1048576] 1023 9 | c[1048576] 16503 10 | d[1048576] 1023 11 | INPUT[33554432] 33554431 12 | OUTPUT[33554432] 33554431 13 | w1[33554432] 1023 14 | w2[33554432] 511 15 | ww[33554432] 4095 16 | ww2[33554432] 4095 17 | ww3[33554432] 8191 18 | ww4[33554432] 8191 19 | c[33554432] 65655 20 | d[33554432] 4095 21 | INPUT[67108864] 67108863 22 | OUTPUT[67108864] 67108863 23 | w1[67108864] 1023 24 | w2[67108864] 1023 25 | ww[67108864] 4095 26 | ww2[67108864] 8191 27 | ww3[67108864] 8191 28 | ww4[67108864] 16383 29 | c[67108864] 131191 30 | d[67108864] 8191 31 | INPUT[134217728] 134217727 32 | OUTPUT[134217728] 134217727 33 | w1[134217728] 2047 34 | w2[134217728] 1023 35 | ww[134217728] 8191 36 | ww2[134217728] 16383 37 | ww3[134217728] 8191 38 | ww4[134217728] 16383 39 | c[134217728] 131191 40 | d[134217728] 8191 41 | INPUT[268435456] 268435455 42 | OUTPUT[268435456] 268435455 43 | w1[268435456] 2047 44 | w2[268435456] 2047 45 | ww[268435456] 16383 46 | ww2[268435456] 16383 47 | ww3[268435456] 16383 48 | ww4[268435456] 16383 49 | c[268435456] 262263 50 | d[268435456] 16383 51 | INPUT[536870912] 536870911 52 | OUTPUT[536870912] 536870911 53 | w1[536870912] 4095 54 | w2[536870912] 2047 55 | ww[536870912] 16383 56 | ww2[536870912] 16383 57 | ww3[536870912] 32767 58 | ww4[536870912] 32767 59 | c[536870912] 262263 60 | d[536870912] 16383 61 | INPUT[1073741824] 1073741823 62 | OUTPUT[1073741824] 1073741823 63 | w1[1073741824] 4095 64 | w2[1073741824] 4095 65 | ww[1073741824] 16383 66 | ww2[1073741824] 32767 67 | ww3[1073741824] 32767 68 | ww4[1073741824] 65535 69 | c[1073741824] 524407 70 | d[1073741824] 32767 71 | -------------------------------------------------------------------------------- /work/conly/w1.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w1|sort -n -k 2" using 2:3 title "w1", \ 4 | 1.1*sqrt(x) 5 | -------------------------------------------------------------------------------- /work/conly/w2.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:3 title "w2", \ 4 | sqrt(x)*0.375 5 | 6 | # exp(log(x)*0.50)/sqrt(2*3*5), \ 7 | # exp(log(x)*0.33) 8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \ 9 | -------------------------------------------------------------------------------- /work/conly/ww.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww[^0-9]'|sort -n -k 2" using 2:3 title "ww", \ 4 | sqrt(x) 5 | 6 | # exp(log(x)*0.50)/sqrt(2*3*5), \ 7 | # exp(log(x)*0.33) 8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \ 9 | -------------------------------------------------------------------------------- /work/conly/ww2.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww2'|sort -n -k 2" using 2:3 title "ww2", \ 4 | sqrt(x)*3.9 5 | 6 | # exp(log(x)*0.50)/sqrt(2*3*5), \ 7 | # exp(log(x)*0.33) 8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \ 9 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww2'|sort -n -k 2" using 2:(sqrt($2)/$3/6) title "ww2"#, \ 10 | -------------------------------------------------------------------------------- /work/conly/ww3.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww3'|sort -n -k 2" using 2:3 title "ww3", \ 4 | sqrt(x)*5.4772 5 | -------------------------------------------------------------------------------- /work/conly/ww4.gpt: -------------------------------------------------------------------------------- 1 | 2 | plot \ 3 | "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww4'|sort -n -k 2" using 2:3 title "ww4", \ 4 | x/256 5 | -------------------------------------------------------------------------------- /work/cpp.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Preprocess C files. 4 | 5 | Change c_re() c_im() under some circumstances. 6 | """ 7 | 8 | import sys 9 | 10 | def errlog(msg): 11 | sys.stderr.write(str(msg)) 12 | sys.stderr.write("\n") 13 | 14 | def proc_c_xx(oldline): 15 | l = list() 16 | idx = -1 17 | while 1: 18 | idx = oldline.find("c_", idx+1) 19 | if idx < 0: 20 | break 21 | 22 | if oldline[idx+2:].startswith("re") or oldline[idx+2:].startswith("im"): 23 | idx = oldline.find("(", idx)+1 24 | while oldline[idx].isspace(): 25 | idx += 1 26 | 27 | l.append(idx) 28 | 29 | newline = oldline 30 | 31 | for idx in l: 32 | oparen = oldline.find("(", idx) 33 | cparen = oldline.find(")", idx) 34 | sqobrkt = oldline.find("[", idx) 35 | sqcbrkt = oldline.find("]", idx) 36 | 37 | if sqobrkt < 0 or sqobrkt > cparen: # '[' is not there or is beyond ')' 38 | continue 39 | 40 | if oparen >= 0 and oparen < sqobrkt: # if '(' is there and it's before '[' 41 | continue 42 | 43 | newline = newline[:sqobrkt] + "->sqbracket(" + newline[sqobrkt+1:sqcbrkt] + ")" + newline[sqcbrkt+1:] 44 | 45 | return newline 46 | 47 | 48 | def cpp(fname): 49 | for fline in open(fname): 50 | if fline.find("c_re") >= 0 or fline.find("c_im"): 51 | newline = proc_c_xx(fline) 52 | else: 53 | newline = fline 54 | print newline, 55 | 56 | def main(argv): 57 | for a in argv[1:]: 58 | print "/****", a, "****/" 59 | cpp(a) 60 | 61 | if "__main__" == __name__: 62 | sys.exit(main(sys.argv)) 63 | -------------------------------------------------------------------------------- /work/fft.txt: -------------------------------------------------------------------------------- 1 | 2 | settbls(w1, w2, w3, w4, n1, n2, m1, m2) { 3 | w1[m1, m2] 4 | w3[m2, n1/m1] 5 | w2[m1,n2/m2] 6 | w4[n1/m1,n2/m2] 7 | n1*n2=N 8 | n2*2*3*5 >= n1 >=n2 9 | } 10 | -------------------------------------------------------------------------------- /work/fftbug.txt: -------------------------------------------------------------------------------- 1 | On 7/2/2011 11:05 AM, Piotr Luszczek wrote: 2 | John, 3 | 4 | thank you for the report. I would definitely like to see 5 | more details on this. My access to large machines 6 | is limited to few architectures. Maybe that's why 7 | I'm not seeing this on my side. 8 | 9 | Well, this one was very strange. I've been running HPCC 1.4.1 with FFTE for a while on a wide number of systems without seeing problems. But recently on SGI UV systems I started seeing glibc heap corruption errors in StarFFT. The same binary, MPI library, and input libraries that ran successfully on one of our clusters gave the heap errors on UV. I can reproduce the problem with only a single MPI rank. And using Totalview, the memory checker found the array bounds overrun. 10 | 11 | Hopefully you'll be able to reproduce what I'm seeing under the following conditions: 12 | 13 | src/io.c modified to run only StarFFT 14 | P = Q = 1 15 | N = 20120 16 | 17 | Compile with -DHPCC_FFT_235 (I also use -DHPCC_MEMALLCTR -DRA_SANDIA_OPT2, not sure if the former plays a role). 18 | I use -g optimization so I don't think it's a compiler issue. 19 | 20 | For this case when HPCC_fftw_create_plan() is called, p->ww has 262184 elements. This routine calls HPCC_zfft1d(), from which settbls() is called with 4th argument w4 = ww+nw4, where nw4 is 41752. 21 | 22 | In settbls() w4 is initialized in a double loop with loop variable 'is' ranging up to n2/m2-1 = 161 and 'ir' ranging up to n1/m1-1 = 2429, with ldw4 = 2430. With ARR2D(w4,ir,is,ldw4) expanding to w4[ir+is*ldw4], the maximum array element is 2429+161*2430 = 393659, which is well past the allocated end index 262184-41752 = 220432. 23 | 24 | Totalview dies when the index hits 220432. When I run outside of Totalview (on UV), the job doesn't die until HPCC_fftw_destroy_plan() tries to free p->ww. And as I said on some systems the job doesn't die at all and the results produced are correct. 25 | 26 | thanks, 27 | John 28 | -------------------------------------------------------------------------------- /www/hpcchallenge.org/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: index.html 3 | 4 | index.html: pre_all.html Home.html poste_all.html 5 | cat pre_all.html Home.html poste_all.html > index.html 6 | 7 | .PHONY: all 8 | -------------------------------------------------------------------------------- /www/hpcchallenge.org/poste_all.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /www/hpcchallenge.org/pre_all.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 11 | 12 | 13 | 14 | 15 |
16 | --------------------------------------------------------------------------------