├── .gitignore
├── DGEMM
    ├── onecpu.c
    └── tstdgemm.c
├── FFT
    ├── bcnrand.c
    ├── fft235.c
    ├── hpccfft.h
    ├── mpifft.c
    ├── onecpu.c
    ├── pzfft1d.c
    ├── tstfft.c
    ├── wrapfftw.c
    ├── wrapfftw.h
    ├── wrapmpifftw.c
    ├── wrapmpifftw.h
    └── zfft1d.c
├── Makefile
├── PTRANS
    ├── cblacslt.c
    ├── cblacslt.h
    ├── mem.c
    ├── pdmatcmp.c
    ├── pdmatgen.c
    ├── pdtrans.c
    ├── pdtransdriver.c
    ├── pmatgeninc.c
    └── sclapack.c
├── README.tex
├── README.xml
├── RandomAccess
    ├── MPIRandomAccess.c
    ├── MPIRandomAccessLCG.c
    ├── MPIRandomAccessLCG_opt.c
    ├── MPIRandomAccessLCG_vanilla.c
    ├── MPIRandomAccess_opt.c
    ├── MPIRandomAccess_vanilla.c
    ├── RandomAccess.h
    ├── buckets.c
    ├── buckets.h
    ├── core_single_cpu.c
    ├── core_single_cpu_lcg.c
    ├── heap.c
    ├── heap.h
    ├── pool.c
    ├── pool.h
    ├── single_cpu.c
    ├── single_cpu_lcg.c
    ├── star_single_cpu.c
    ├── star_single_cpu_lcg.c
    ├── time_bound.c
    ├── time_bound.h
    ├── time_bound_lcg.c
    ├── utility.c
    ├── verification.c
    └── verification_lcg.c
├── STREAM
    ├── onecpu.c
    ├── split_stream_funcs.py
    ├── stream.c
    └── stream_mpi.c
├── TEST
├── _hpccinf.txt
├── doc
    ├── class2specs.tex
    ├── hpccimpg.tex
    └── hpccusrg.tex
├── hpl
    ├── BUGS
    ├── COPYRIGHT
    ├── HISTORY
    ├── HPL.build.log.220120040613
    ├── INSTALL
    ├── Make.MacOSX
    ├── Make.UNKNOWN
    ├── Make.top
    ├── Makefile
    ├── README
    ├── TODO
    ├── TUNING
    ├── include
    │   ├── hpccmema.h
    │   ├── hpl.h
    │   ├── hpl_auxil.h
    │   ├── hpl_blas.h
    │   ├── hpl_comm.h
    │   ├── hpl_gesv.h
    │   ├── hpl_grid.h
    │   ├── hpl_matgen.h
    │   ├── hpl_misc.h
    │   ├── hpl_panel.h
    │   ├── hpl_pauxil.h
    │   ├── hpl_pfact.h
    │   ├── hpl_pgesv.h
    │   ├── hpl_pmatgen.h
    │   ├── hpl_pmisc.h
    │   ├── hpl_ptest.h
    │   ├── hpl_ptimer.h
    │   ├── hpl_test.h
    │   ├── hpl_timer.h
    │   └── hpl_units.h
    ├── lib
    │   └── arch
    │   │   └── build
    │   │       └── Makefile.hpcc
    ├── makes
    │   ├── Make.auxil
    │   ├── Make.blas
    │   ├── Make.comm
    │   ├── Make.gesv
    │   ├── Make.grid
    │   ├── Make.matgen
    │   ├── Make.panel
    │   ├── Make.pauxil
    │   ├── Make.pfact
    │   ├── Make.pgesv
    │   ├── Make.pmatgen
    │   ├── Make.ptest
    │   ├── Make.ptimer
    │   ├── Make.test
    │   ├── Make.timer
    │   └── Make.units
    ├── man
    │   └── man3
    │   │   ├── HPL_abort.3
    │   │   ├── HPL_all_reduce.3
    │   │   ├── HPL_barrier.3
    │   │   ├── HPL_bcast.3
    │   │   ├── HPL_binit.3
    │   │   ├── HPL_broadcast.3
    │   │   ├── HPL_bwait.3
    │   │   ├── HPL_copyL.3
    │   │   ├── HPL_daxpy.3
    │   │   ├── HPL_dcopy.3
    │   │   ├── HPL_dgemm.3
    │   │   ├── HPL_dgemv.3
    │   │   ├── HPL_dger.3
    │   │   ├── HPL_dlacpy.3
    │   │   ├── HPL_dlamch.3
    │   │   ├── HPL_dlange.3
    │   │   ├── HPL_dlaprnt.3
    │   │   ├── HPL_dlaswp00N.3
    │   │   ├── HPL_dlaswp01N.3
    │   │   ├── HPL_dlaswp01T.3
    │   │   ├── HPL_dlaswp02N.3
    │   │   ├── HPL_dlaswp03N.3
    │   │   ├── HPL_dlaswp03T.3
    │   │   ├── HPL_dlaswp04N.3
    │   │   ├── HPL_dlaswp04T.3
    │   │   ├── HPL_dlaswp05N.3
    │   │   ├── HPL_dlaswp05T.3
    │   │   ├── HPL_dlaswp06N.3
    │   │   ├── HPL_dlaswp06T.3
    │   │   ├── HPL_dlaswp10N.3
    │   │   ├── HPL_dlatcpy.3
    │   │   ├── HPL_dlocmax.3
    │   │   ├── HPL_dlocswpN.3
    │   │   ├── HPL_dlocswpT.3
    │   │   ├── HPL_dmatgen.3
    │   │   ├── HPL_dscal.3
    │   │   ├── HPL_dswap.3
    │   │   ├── HPL_dtrsm.3
    │   │   ├── HPL_dtrsv.3
    │   │   ├── HPL_equil.3
    │   │   ├── HPL_fprintf.3
    │   │   ├── HPL_grid_exit.3
    │   │   ├── HPL_grid_info.3
    │   │   ├── HPL_grid_init.3
    │   │   ├── HPL_idamax.3
    │   │   ├── HPL_indxg2l.3
    │   │   ├── HPL_indxg2lp.3
    │   │   ├── HPL_indxg2p.3
    │   │   ├── HPL_indxl2g.3
    │   │   ├── HPL_infog2l.3
    │   │   ├── HPL_jumpit.3
    │   │   ├── HPL_ladd.3
    │   │   ├── HPL_lmul.3
    │   │   ├── HPL_logsort.3
    │   │   ├── HPL_max.3
    │   │   ├── HPL_min.3
    │   │   ├── HPL_numroc.3
    │   │   ├── HPL_numrocI.3
    │   │   ├── HPL_pabort.3
    │   │   ├── HPL_packL.3
    │   │   ├── HPL_pddriver.3
    │   │   ├── HPL_pdfact.3
    │   │   ├── HPL_pdgesv.3
    │   │   ├── HPL_pdgesv0.3
    │   │   ├── HPL_pdgesvK1.3
    │   │   ├── HPL_pdgesvK2.3
    │   │   ├── HPL_pdinfo.3
    │   │   ├── HPL_pdlamch.3
    │   │   ├── HPL_pdlange.3
    │   │   ├── HPL_pdlaprnt.3
    │   │   ├── HPL_pdlaswp00N.3
    │   │   ├── HPL_pdlaswp00T.3
    │   │   ├── HPL_pdlaswp01N.3
    │   │   ├── HPL_pdlaswp01T.3
    │   │   ├── HPL_pdmatgen.3
    │   │   ├── HPL_pdmxswp.3
    │   │   ├── HPL_pdpancrN.3
    │   │   ├── HPL_pdpancrT.3
    │   │   ├── HPL_pdpanel_disp.3
    │   │   ├── HPL_pdpanel_free.3
    │   │   ├── HPL_pdpanel_init.3
    │   │   ├── HPL_pdpanel_new.3
    │   │   ├── HPL_pdpanllN.3
    │   │   ├── HPL_pdpanllT.3
    │   │   ├── HPL_pdpanrlN.3
    │   │   ├── HPL_pdpanrlT.3
    │   │   ├── HPL_pdrpancrN.3
    │   │   ├── HPL_pdrpancrT.3
    │   │   ├── HPL_pdrpanllN.3
    │   │   ├── HPL_pdrpanllT.3
    │   │   ├── HPL_pdrpanrlN.3
    │   │   ├── HPL_pdrpanrlT.3
    │   │   ├── HPL_pdtest.3
    │   │   ├── HPL_pdtrsv.3
    │   │   ├── HPL_pdupdateNN.3
    │   │   ├── HPL_pdupdateNT.3
    │   │   ├── HPL_pdupdateTN.3
    │   │   ├── HPL_pdupdateTT.3
    │   │   ├── HPL_perm.3
    │   │   ├── HPL_pipid.3
    │   │   ├── HPL_plindx0.3
    │   │   ├── HPL_plindx1.3
    │   │   ├── HPL_plindx10.3
    │   │   ├── HPL_pnum.3
    │   │   ├── HPL_ptimer.3
    │   │   ├── HPL_ptimer_cputime.3
    │   │   ├── HPL_ptimer_walltime.3
    │   │   ├── HPL_pwarn.3
    │   │   ├── HPL_rand.3
    │   │   ├── HPL_recv.3
    │   │   ├── HPL_reduce.3
    │   │   ├── HPL_rollN.3
    │   │   ├── HPL_rollT.3
    │   │   ├── HPL_sdrv.3
    │   │   ├── HPL_send.3
    │   │   ├── HPL_setran.3
    │   │   ├── HPL_spreadN.3
    │   │   ├── HPL_spreadT.3
    │   │   ├── HPL_sum.3
    │   │   ├── HPL_timer.3
    │   │   ├── HPL_timer_cputime.3
    │   │   ├── HPL_timer_walltime.3
    │   │   ├── HPL_warn.3
    │   │   └── HPL_xjumpm.3
    ├── setup
    │   ├── Make.BGP
    │   ├── Make.CrayX1
    │   ├── Make.FreeBSD_PIV_CBLAS
    │   ├── Make.HPUX_FBLAS
    │   ├── Make.I860_FBLAS
    │   ├── Make.IRIX_FBLAS
    │   ├── Make.Linux-x86_64-OpenBLAS-FFTW3
    │   ├── Make.LinuxIntelIA64Itan2_eccMKL
    │   ├── Make.Linux_ATHLON_CBLAS
    │   ├── Make.Linux_ATHLON_FBLAS
    │   ├── Make.Linux_ATHLON_VSIPL
    │   ├── Make.Linux_AtlasCBLAS_Lam
    │   ├── Make.Linux_AtlasFBLAS_Lam
    │   ├── Make.Linux_PII_CBLAS
    │   ├── Make.Linux_PII_CBLAS_gm
    │   ├── Make.Linux_PII_FBLAS
    │   ├── Make.Linux_PII_FBLAS_gm
    │   ├── Make.Linux_PII_VSIPL
    │   ├── Make.Linux_PII_VSIPL_gm
    │   ├── Make.Linux_SGI_AltixIA64_Goto
    │   ├── Make.Linux_SGI_AltixIA64_SCSL
    │   ├── Make.PWR2_FBLAS
    │   ├── Make.PWR3_FBLAS
    │   ├── Make.PWRPC_FBLAS
    │   ├── Make.Power4_ESSL
    │   ├── Make.Power4_ESSLSMP
    │   ├── Make.Power4_ESSL_r
    │   ├── Make.SUN4SOL2-g_FBLAS
    │   ├── Make.SUN4SOL2-g_VSIPL
    │   ├── Make.SUN4SOL2_FBLAS
    │   ├── Make.Sun
    │   ├── Make.T3E_FBLAS
    │   ├── Make.Tru64_FBLAS
    │   ├── Make.Tru64_FBLAS_MPI
    │   ├── Make.Tru64_FBLAS_elan
    │   ├── Make.UNKNOWN.in
    │   ├── Make.cygwin
    │   ├── Make.macports_openmpi
    │   └── make_generic
    ├── src
    │   ├── auxil
    │   │   ├── HPL_abort.c
    │   │   ├── HPL_dlacpy.c
    │   │   ├── HPL_dlamch.c
    │   │   ├── HPL_dlange.c
    │   │   ├── HPL_dlaprnt.c
    │   │   ├── HPL_dlatcpy.c
    │   │   ├── HPL_fprintf.c
    │   │   └── HPL_warn.c
    │   ├── blas
    │   │   ├── HPL_daxpy.c
    │   │   ├── HPL_dcopy.c
    │   │   ├── HPL_dgemm.c
    │   │   ├── HPL_dgemv.c
    │   │   ├── HPL_dger.c
    │   │   ├── HPL_dscal.c
    │   │   ├── HPL_dswap.c
    │   │   ├── HPL_dtrsm.c
    │   │   ├── HPL_dtrsv.c
    │   │   └── HPL_idamax.c
    │   ├── comm
    │   │   ├── HPL_1rinM.c
    │   │   ├── HPL_1ring.c
    │   │   ├── HPL_2rinM.c
    │   │   ├── HPL_2ring.c
    │   │   ├── HPL_bcast.c
    │   │   ├── HPL_binit.c
    │   │   ├── HPL_blonM.c
    │   │   ├── HPL_blong.c
    │   │   ├── HPL_bwait.c
    │   │   ├── HPL_copyL.c
    │   │   ├── HPL_packL.c
    │   │   ├── HPL_recv.c
    │   │   ├── HPL_sdrv.c
    │   │   └── HPL_send.c
    │   ├── grid
    │   │   ├── HPL_all_reduce.c
    │   │   ├── HPL_barrier.c
    │   │   ├── HPL_broadcast.c
    │   │   ├── HPL_grid_exit.c
    │   │   ├── HPL_grid_info.c
    │   │   ├── HPL_grid_init.c
    │   │   ├── HPL_max.c
    │   │   ├── HPL_min.c
    │   │   ├── HPL_pnum.c
    │   │   ├── HPL_reduce.c
    │   │   └── HPL_sum.c
    │   ├── panel
    │   │   ├── HPL_pdpanel_disp.c
    │   │   ├── HPL_pdpanel_free.c
    │   │   ├── HPL_pdpanel_init.c
    │   │   └── HPL_pdpanel_new.c
    │   ├── pauxil
    │   │   ├── HPL_dlaswp00N.c
    │   │   ├── HPL_dlaswp01N.c
    │   │   ├── HPL_dlaswp01T.c
    │   │   ├── HPL_dlaswp02N.c
    │   │   ├── HPL_dlaswp03N.c
    │   │   ├── HPL_dlaswp03T.c
    │   │   ├── HPL_dlaswp04N.c
    │   │   ├── HPL_dlaswp04T.c
    │   │   ├── HPL_dlaswp05N.c
    │   │   ├── HPL_dlaswp05T.c
    │   │   ├── HPL_dlaswp06N.c
    │   │   ├── HPL_dlaswp06T.c
    │   │   ├── HPL_dlaswp10N.c
    │   │   ├── HPL_indxg2l.c
    │   │   ├── HPL_indxg2lp.c
    │   │   ├── HPL_indxg2p.c
    │   │   ├── HPL_indxl2g.c
    │   │   ├── HPL_infog2l.c
    │   │   ├── HPL_numroc.c
    │   │   ├── HPL_numrocI.c
    │   │   ├── HPL_pabort.c
    │   │   ├── HPL_pdlamch.c
    │   │   ├── HPL_pdlange.c
    │   │   ├── HPL_pdlaprnt.c
    │   │   └── HPL_pwarn.c
    │   ├── pfact
    │   │   ├── HPL_dlocmax.c
    │   │   ├── HPL_dlocswpN.c
    │   │   ├── HPL_dlocswpT.c
    │   │   ├── HPL_pdfact.c
    │   │   ├── HPL_pdmxswp.c
    │   │   ├── HPL_pdpancrN.c
    │   │   ├── HPL_pdpancrT.c
    │   │   ├── HPL_pdpanllN.c
    │   │   ├── HPL_pdpanllT.c
    │   │   ├── HPL_pdpanrlN.c
    │   │   ├── HPL_pdpanrlT.c
    │   │   ├── HPL_pdrpancrN.c
    │   │   ├── HPL_pdrpancrT.c
    │   │   ├── HPL_pdrpanllN.c
    │   │   ├── HPL_pdrpanllT.c
    │   │   ├── HPL_pdrpanrlN.c
    │   │   └── HPL_pdrpanrlT.c
    │   └── pgesv
    │   │   ├── HPL_equil.c
    │   │   ├── HPL_logsort.c
    │   │   ├── HPL_pdgesv.c
    │   │   ├── HPL_pdgesv0.c
    │   │   ├── HPL_pdgesvK1.c
    │   │   ├── HPL_pdgesvK2.c
    │   │   ├── HPL_pdlaswp00N.c
    │   │   ├── HPL_pdlaswp00T.c
    │   │   ├── HPL_pdlaswp01N.c
    │   │   ├── HPL_pdlaswp01T.c
    │   │   ├── HPL_pdtrsv.c
    │   │   ├── HPL_pdupdateNN.c
    │   │   ├── HPL_pdupdateNT.c
    │   │   ├── HPL_pdupdateTN.c
    │   │   ├── HPL_pdupdateTT.c
    │   │   ├── HPL_perm.c
    │   │   ├── HPL_pipid.c
    │   │   ├── HPL_plindx0.c
    │   │   ├── HPL_plindx1.c
    │   │   ├── HPL_plindx10.c
    │   │   ├── HPL_rollN.c
    │   │   ├── HPL_rollT.c
    │   │   ├── HPL_spreadN.c
    │   │   └── HPL_spreadT.c
    ├── testing
    │   ├── matgen
    │   │   ├── HPL_dmatgen.c
    │   │   ├── HPL_jumpit.c
    │   │   ├── HPL_ladd.c
    │   │   ├── HPL_lmul.c
    │   │   ├── HPL_rand.c
    │   │   ├── HPL_setran.c
    │   │   └── HPL_xjumpm.c
    │   ├── pmatgen
    │   │   └── HPL_pdmatgen.c
    │   ├── ptest
    │   │   ├── HPL.dat
    │   │   ├── HPL_pddriver.c
    │   │   ├── HPL_pdinfo.c
    │   │   └── HPL_pdtest.c
    │   ├── ptimer
    │   │   ├── HPL_ptimer.c
    │   │   ├── HPL_ptimer_cputime.c
    │   │   └── HPL_ptimer_walltime.c
    │   └── timer
    │   │   ├── HPL_timer.c
    │   │   ├── HPL_timer_cputime.c
    │   │   └── HPL_timer_walltime.c
    └── www
    │   ├── 1rinM.jpg
    │   ├── 1ring.jpg
    │   ├── 2-273x48.jpg
    │   ├── 2rinM.jpg
    │   ├── 2ring.jpg
    │   ├── HPL_abort.html
    │   ├── HPL_all_reduce.html
    │   ├── HPL_barrier.html
    │   ├── HPL_bcast.html
    │   ├── HPL_binit.html
    │   ├── HPL_broadcast.html
    │   ├── HPL_bwait.html
    │   ├── HPL_copyL.html
    │   ├── HPL_daxpy.html
    │   ├── HPL_dcopy.html
    │   ├── HPL_dgemm.html
    │   ├── HPL_dgemv.html
    │   ├── HPL_dger.html
    │   ├── HPL_dlacpy.html
    │   ├── HPL_dlamch.html
    │   ├── HPL_dlange.html
    │   ├── HPL_dlaprnt.html
    │   ├── HPL_dlaswp00N.html
    │   ├── HPL_dlaswp01N.html
    │   ├── HPL_dlaswp01T.html
    │   ├── HPL_dlaswp02N.html
    │   ├── HPL_dlaswp03N.html
    │   ├── HPL_dlaswp03T.html
    │   ├── HPL_dlaswp04N.html
    │   ├── HPL_dlaswp04T.html
    │   ├── HPL_dlaswp05N.html
    │   ├── HPL_dlaswp05T.html
    │   ├── HPL_dlaswp06N.html
    │   ├── HPL_dlaswp06T.html
    │   ├── HPL_dlaswp10N.html
    │   ├── HPL_dlatcpy.html
    │   ├── HPL_dlocmax.html
    │   ├── HPL_dlocswpN.html
    │   ├── HPL_dlocswpT.html
    │   ├── HPL_dmatgen.html
    │   ├── HPL_dscal.html
    │   ├── HPL_dswap.html
    │   ├── HPL_dtrsm.html
    │   ├── HPL_dtrsv.html
    │   ├── HPL_equil.html
    │   ├── HPL_fprintf.html
    │   ├── HPL_grid_exit.html
    │   ├── HPL_grid_info.html
    │   ├── HPL_grid_init.html
    │   ├── HPL_idamax.html
    │   ├── HPL_indxg2l.html
    │   ├── HPL_indxg2lp.html
    │   ├── HPL_indxg2p.html
    │   ├── HPL_indxl2g.html
    │   ├── HPL_infog2l.html
    │   ├── HPL_jumpit.html
    │   ├── HPL_ladd.html
    │   ├── HPL_lmul.html
    │   ├── HPL_logsort.html
    │   ├── HPL_max.html
    │   ├── HPL_min.html
    │   ├── HPL_numroc.html
    │   ├── HPL_numrocI.html
    │   ├── HPL_pabort.html
    │   ├── HPL_packL.html
    │   ├── HPL_pddriver.html
    │   ├── HPL_pdfact.html
    │   ├── HPL_pdgesv.html
    │   ├── HPL_pdgesv0.html
    │   ├── HPL_pdgesvK1.html
    │   ├── HPL_pdgesvK2.html
    │   ├── HPL_pdinfo.html
    │   ├── HPL_pdlamch.html
    │   ├── HPL_pdlange.html
    │   ├── HPL_pdlaprnt.html
    │   ├── HPL_pdlaswp00N.html
    │   ├── HPL_pdlaswp00T.html
    │   ├── HPL_pdlaswp01N.html
    │   ├── HPL_pdlaswp01T.html
    │   ├── HPL_pdmatgen.html
    │   ├── HPL_pdmxswp.html
    │   ├── HPL_pdpancrN.html
    │   ├── HPL_pdpancrT.html
    │   ├── HPL_pdpanel_disp.html
    │   ├── HPL_pdpanel_free.html
    │   ├── HPL_pdpanel_init.html
    │   ├── HPL_pdpanel_new.html
    │   ├── HPL_pdpanllN.html
    │   ├── HPL_pdpanllT.html
    │   ├── HPL_pdpanrlN.html
    │   ├── HPL_pdpanrlT.html
    │   ├── HPL_pdrpancrN.html
    │   ├── HPL_pdrpancrT.html
    │   ├── HPL_pdrpanllN.html
    │   ├── HPL_pdrpanllT.html
    │   ├── HPL_pdrpanrlN.html
    │   ├── HPL_pdrpanrlT.html
    │   ├── HPL_pdtest.html
    │   ├── HPL_pdtrsv.html
    │   ├── HPL_pdupdateNN.html
    │   ├── HPL_pdupdateNT.html
    │   ├── HPL_pdupdateTN.html
    │   ├── HPL_pdupdateTT.html
    │   ├── HPL_perm.html
    │   ├── HPL_pipid.html
    │   ├── HPL_plindx0.html
    │   ├── HPL_plindx1.html
    │   ├── HPL_plindx10.html
    │   ├── HPL_pnum.html
    │   ├── HPL_ptimer.html
    │   ├── HPL_ptimer_cputime.html
    │   ├── HPL_ptimer_walltime.html
    │   ├── HPL_pwarn.html
    │   ├── HPL_rand.html
    │   ├── HPL_recv.html
    │   ├── HPL_reduce.html
    │   ├── HPL_rollN.html
    │   ├── HPL_rollT.html
    │   ├── HPL_sdrv.html
    │   ├── HPL_send.html
    │   ├── HPL_setran.html
    │   ├── HPL_spreadN.html
    │   ├── HPL_spreadT.html
    │   ├── HPL_sum.html
    │   ├── HPL_timer.html
    │   ├── HPL_timer_cputime.html
    │   ├── HPL_timer_walltime.html
    │   ├── HPL_warn.html
    │   ├── HPL_xjumpm.html
    │   ├── algorithm.html
    │   ├── aprunner.gif
    │   ├── copyright.html
    │   ├── documentation.html
    │   ├── errata.html
    │   ├── faqs.html
    │   ├── index.html
    │   ├── links.html
    │   ├── main.jpg
    │   ├── mat2.jpg
    │   ├── pfact.jpg
    │   ├── references.html
    │   ├── results.html
    │   ├── roll.jpg
    │   ├── rollM.jpg
    │   ├── scalability.html
    │   ├── software.html
    │   ├── spread.jpg
    │   ├── spreadM.jpg
    │   └── tuning.html
├── include
    ├── hpcc.h
    └── hpccver.h
├── python
    ├── Makefile
    ├── Makefile-grig
    ├── default.css
    ├── grig-data
    │   ├── README
    │   ├── hpccoutf.txt-1procs
    │   ├── hpccoutf.txt-2procs
    │   ├── hpccoutf.txt-3procs
    │   └── hpccoutf.txt-4procs
    ├── hpcc.c
    ├── hpcc.py
    ├── index.html
    ├── mpi.pyx
    ├── pyxutil.h
    ├── sequential.py
    └── setup.py
├── setup.py
├── src
    ├── HPL_slamch.c
    ├── bench_lat_bw_1.3.c
    ├── bench_lat_bw_1.5.1.c
    ├── bench_lat_bw_1.5.2.c
    ├── extfinalize.c
    ├── extinit.c
    ├── hpcc.c
    ├── io.c
    └── noopt.c
├── tools
    ├── README.txt
    ├── hpccoutf.pl
    ├── hpccoutf.py
    ├── hpccxml.py
    ├── makefile.py
    ├── readme.py
    ├── setup.py
    └── todo.txt
├── work
    ├── Makefile
    ├── conly
    │   ├── Makefile
    │   ├── c.gpt
    │   ├── d.gpt
    │   ├── enumerate_all.txt
    │   ├── enumfft.py
    │   ├── fft235.c
    │   ├── hpccfft.h
    │   ├── mem4fft.c
    │   ├── results.txt
    │   ├── w1.gpt
    │   ├── w2.gpt
    │   ├── ww.gpt
    │   ├── ww2.gpt
    │   ├── ww3.gpt
    │   ├── ww4.gpt
    │   └── zfft1d.c
    ├── cpp.py
    ├── fft.txt
    ├── fft235.cc
    ├── fftbug.txt
    ├── mem4fft.cc
    ├── mem4fft.h
    ├── tpdtrans.c
    └── zfft1d.cc
└── www
    ├── hpcchallenge.org
        ├── Home.html
        ├── Makefile
        ├── poste_all.html
        └── pre_all.html
    └── icl_hpcc
        ├── orig_theoretical_peak.html
        └── theoretical_peak.html


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /hpcc
3 | /.cproject
4 | /.project
5 | /.settings
6 | /hpccinf.txt
7 | /hpccoutf.txt
8 | 
9 | 


--------------------------------------------------------------------------------
/FFT/hpccfft.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <math.h>
 3 | 
 4 | #define FFTE_NDA2 65536
 5 | #define FFTE_NDA3 4096
 6 | #define FFTE_NDA4 256
 7 | 
 8 | /* Parameters that affect performance */
 9 | 
10 | /*
11 |   Blocking parameter. Suggested values:
12 |    8 for Pentium III and Athlon
13 |   16 for Pentium4, Athlon XP, Opteron, Itanium and Itanium2
14 | */
15 | #ifndef FFTE_NBLK
16 | #define FFTE_NBLK 16
17 | #endif
18 | 
19 | /*
20 |   Padding parameter to avoid cache conflicts.
21 |   Suggested values:
22 |   2 for Pentium III
23 |   4 for Athlon, Athlon XP, Opteron, Itanium
24 |   8 for Pentium4 and Itanium2
25 | */
26 | #ifndef FFTE_NP
27 | #define FFTE_NP 8
28 | #endif
29 | 
30 | /* Size of Level 2 cache */
31 | #ifndef FFTE_L2SIZE
32 | #define FFTE_L2SIZE 1048576
33 | #endif
34 | 
35 | #ifdef LONG_IS_64BITS
36 | typedef unsigned long u64Int_t;
37 | typedef long s64Int_t;
38 | #else
39 | typedef unsigned long long u64Int_t;
40 | typedef long long s64Int_t;
41 | #endif
42 | 
43 | #include "wrapfftw.h"
44 | 
45 | extern int HPCC_ipow(int x, int p);
46 | 
47 | extern int HPCC_zfft1d(int n, fftw_complex *a, fftw_complex *b, int iopt, hpcc_fftw_plan p);
48 | extern int HPCC_fft235(fftw_complex *a, fftw_complex *b, fftw_complex *w, int n, const int *ip);
49 | extern int HPCC_settbl(fftw_complex *w, int n);
50 | extern int HPCC_factor235(int n, int *ip);
51 | extern int HPCC_factor235_8(s64Int_t n, int *ip);
52 | 
53 | extern int HPCC_bcnrand(u64Int_t n, u64Int_t a, void *x);
54 | 
55 | #define ARR2D(a, i, j, lda) a[(i)+(j)*(lda)]
56 | #define PTR2D(a, i, j, lda) (a+(i)+(j)*(lda))
57 | #define ARR3D(a, i, j, k, lda1, lda2) a[(i)+(lda1)*((j)+(k)*(lda2))]
58 | #define PTR3D(a, i, j, k, lda1, lda2) (a+(i)+(lda1)*((j)+(k)*(lda2)))
59 | #define ARR4D(a, i, j, k, l, lda1, lda2, lda3) a[(i)+(lda1)*((j)+(lda2)*((k)+(lda3)*(l)))]
60 | #define c_mul3v(v, v1, v2) c_re(v) = c_re(v1) * c_re(v2) - c_im(v1) * c_im(v2); c_im(v) = c_re(v1) * c_im(v2) + c_im(v1) * c_re(v2)
61 | #define c_assgn(d, s) c_re(d)=c_re(s);c_im(d)=c_im(s)
62 | #define V3MIN(r, e, v) r = (e); V2MIN(r, v)
63 | #define V2MIN(r, v) r = (v) < r ? (v) : r
64 | #define EMAX(d, v, e) d=(e); d=d>(v)?d:(v)
65 | 
66 | #define    Mmax( a_, b_ )      ( ( (a_) > (b_) ) ?  (a_) : (b_) )
67 | 


--------------------------------------------------------------------------------
/FFT/wrapfftw.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #if defined(USING_FFTW)
 3 | 
 4 | #include <fftw.h>
 5 | 
 6 | #elif defined(USING_FFTW3)
 7 | 
 8 | #include <fftw3.h>
 9 | 
10 | typedef int fftw_direction;
11 | 
12 | #define c_re(c)  ((c)[0])
13 | #define c_im(c)  ((c)[1])
14 | 
15 | #else
16 | 
17 | typedef double fftw_real;
18 | typedef struct {
19 |      fftw_real re, im;
20 | } fftw_complex_orig;
21 | typedef fftw_real HPCC_Complex[2];
22 | typedef HPCC_Complex fftw_complex;
23 | 
24 | typedef enum {
25 |      FFTW_FORWARD = -1, FFTW_BACKWARD = 1
26 | } fftw_direction;
27 | #endif
28 | 
29 | struct hpcc_fftw_plan_struct {
30 |   fftw_complex *w1, *w2, *ww1, *ww2, *ww3, *ww4, *c, *d;
31 |   int n, c_size, d_size;
32 |   int flags;
33 |   fftw_direction dir;
34 | };
35 | typedef struct hpcc_fftw_plan_struct *hpcc_fftw_plan;
36 | 
37 | extern hpcc_fftw_plan HPCC_fftw_create_plan(int n, fftw_direction dir, int flags);
38 | extern void HPCC_fftw_destroy_plan(hpcc_fftw_plan plan);
39 | extern void HPCC_fftw_one(hpcc_fftw_plan plan, fftw_complex *in, fftw_complex *out);
40 | 
41 | #if !defined(USING_FFTW) && !defined(USING_FFTW3)
42 | 
43 | typedef struct hpcc_fftw_plan_struct *fftw_plan;
44 | 
45 | #define c_re(c)  ((c)[0])
46 | #define c_im(c)  ((c)[1])
47 | 
48 | #define fftw_malloc malloc
49 | #define fftw_free free
50 | /* flags for the planner */
51 | #define  FFTW_ESTIMATE (0)
52 | #define  FFTW_MEASURE  (1)
53 | 
54 | #define FFTW_OUT_OF_PLACE (0)
55 | #define FFTW_IN_PLACE (8)
56 | #define FFTW_USE_WISDOM (16)
57 | 
58 | #define fftw_create_plan HPCC_fftw_create_plan
59 | #define fftw_destroy_plan HPCC_fftw_destroy_plan
60 | #define fftw_one HPCC_fftw_one
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/FFT/wrapmpifftw.h:
--------------------------------------------------------------------------------
 1 | #if defined(USING_FFTW)
 2 | 
 3 | #include <fftw_mpi.h>
 4 | 
 5 | #elif defined(USING_FFTW3)
 6 | 
 7 | #include <fftw3-mpi.h>
 8 | 
 9 | typedef int fftw_direction;
10 | 
11 | #define c_re(c)  ((c)[0])
12 | #define c_im(c)  ((c)[1])
13 | 
14 | #else
15 | #include <mpi.h>
16 | typedef struct hpcc_fftw_mpi_plan_struct *fftw_mpi_plan;
17 | #define fftw_mpi_create_plan  HPCC_fftw_mpi_create_plan
18 | #define fftw_mpi_destroy_plan HPCC_fftw_mpi_destroy_plan
19 | #define fftw_mpi HPCC_fftw_mpi
20 | #define fftw_mpi_local_sizes HPCC_fftw_mpi_local_sizes
21 | #endif
22 | 
23 | struct hpcc_fftw_mpi_plan_struct {
24 |   MPI_Comm comm;
25 |   MPI_Datatype cmplx;
26 |   fftw_complex *wx, *wy, *wz, *c, *work;
27 |   s64Int_t n;
28 |   int flags, c_size;
29 |   fftw_direction dir;
30 |   double *timings;
31 | };
32 | typedef struct hpcc_fftw_mpi_plan_struct *hpcc_fftw_mpi_plan;
33 | 
34 | extern hpcc_fftw_mpi_plan
35 | HPCC_fftw_mpi_create_plan(MPI_Comm comm, s64Int_t n, fftw_direction dir, int flags);
36 | extern void HPCC_fftw_mpi_destroy_plan(hpcc_fftw_mpi_plan plan);
37 | extern void HPCC_fftw_mpi(hpcc_fftw_mpi_plan p, int n_fields, fftw_complex *local_data,
38 |                      fftw_complex *work);
39 | extern void HPCC_fftw_mpi_local_sizes(hpcc_fftw_mpi_plan p, s64Int_t *local_n,
40 |               s64Int_t *local_start, s64Int_t *local_n_after_transform,
41 |               s64Int_t *local_start_after_transform, s64Int_t *total_local_size);
42 | 
43 | extern int
44 | HPCC_pzfft1d(s64Int_t n, fftw_complex *a, fftw_complex *b, fftw_complex *w, int me, int npu, int iopt,
45 |   hpcc_fftw_mpi_plan p);
46 | 
47 | extern double *HPCC_fft_timings_forward, *HPCC_fft_timings_backward;
48 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | 
 3 | arch = UNKNOWN
 4 | include hpl/Make.$(arch)
 5 | 
 6 | all:
 7 | 	- $(MKDIR) hpl/lib/$(arch)
 8 | 	( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc )
 9 | 
10 | clean:
11 | 	- $(MKDIR) hpl/lib/$(arch)
12 | 	( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc clean )
13 | 
14 | readme: README.html README.txt
15 | 
16 | README.html: README.tex
17 | 	hevea -fix -O README.tex
18 | 	python tools/readme.py README.html
19 | 
20 | README.info: README.tex
21 | 	hevea -fix -info README.tex
22 | 
23 | README.txt: README.tex
24 | 	hevea -fix -text README.tex
25 | 
26 | .PHONY: all clean readme
27 | 


--------------------------------------------------------------------------------
/PTRANS/cblacslt.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define SGET_SYSCONTXT    0
 3 | #define SGET_BLACSCONTXT 10
 4 | 
 5 | extern double dcputime00(void);
 6 | extern double dwalltime00(void);
 7 | extern void Cblacs_abort(int ConTxt, int ErrNo);
 8 | extern void Cblacs_barrier(int ConTxt, char *scope);
 9 | extern void Cblacs_exit(int NotDone);
10 | extern void Cblacs_get(int ConTxt, int what, int *val);
11 | extern void Cblacs_gridexit(int ConTxt);
12 | extern void Cblacs_gridinfo(int ConTxt, int *nprow, int *npcol, int *myrow, int *mycol);
13 | extern void Cblacs_gridinit(int *ConTxt, char *order, int nprow, int npcol);
14 | extern void Cblacs_gridmap(int *ConTxt, int *umap, int ldumap, int nprow, int npcol);
15 | extern void Cblacs_pinfo(int *mypnum, int *nprocs);
16 | extern void Cdgamn2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA,
17 |   int *cA, int ldia, int rdest, int cdest);
18 | extern void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda,
19 |   int *rA, int *cA, int ldia, int rdest, int cdest);
20 | extern void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A,
21 |   int lda, int rsrc, int csrc);
22 | extern void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda);
23 | extern void Cdgerv2d(int ConTxt, int m, int n, double *A, int lda, int rsrc, int csrc);
24 | extern void Cdgesd2d(int ConTxt, int m, int n, double *A, int lda, int rdest, int cdest);
25 | extern void Cdgsum2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda,
26 |   int rdest, int cdest);
27 | extern void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc,
28 |   int csrc);
29 | extern void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda);
30 | extern void Cigsum2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda,
31 |   int rdest, int cdest);
32 | extern void Cblacs_dSendrecv(int ctxt, int mSrc, int nSrc, double *Asrc, int ldaSrc, int rdest,
33 |   int cdest, int mDest, int nDest, double *Adest, int ldaDest, int rsrc, int csrc);
34 | 


--------------------------------------------------------------------------------
/RandomAccess/buckets.h:
--------------------------------------------------------------------------------
 1 | 
 2 | typedef struct update_s {
 3 |   char  *poolNext;              /* pointer for memory pool */
 4 |   u64Int value;
 5 |   struct update_s *forward;
 6 | } Update_T, *Update_Ptr;
 7 | 
 8 | #define NULL_UPDATE_PTR ((Update_Ptr) NULL)
 9 | 
10 | typedef struct pe_bucket_s {
11 |   int numUpdates;
12 |   Update_Ptr updateList;
13 | } Bucket_T, *Bucket_Ptr;
14 | 
15 | #define NULL_BUCKET_PTR ((Bucket_Ptr) NULL)
16 | 
17 | extern Bucket_Ptr HPCC_InitBuckets(int numPEs, int maxNumUpdates);
18 | extern void HPCC_FreeBuckets(Bucket_Ptr buckets, int numPEs);
19 | extern void HPCC_InsertUpdate(u64Int ran, int pe,  Bucket_Ptr buckets);
20 | extern int HPCC_GetUpdates(Bucket_Ptr buckets,  u64Int *buffer, int bufferSize, int *peUpdates);
21 | 


--------------------------------------------------------------------------------
/RandomAccess/heap.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define HEAP_ROOT   0
 3 | #define NOT_A_NODE  (-1)
 4 | 
 5 | typedef struct heap_record {
 6 |   char*  poolNext;              /* pointer for memory pool */
 7 |   int   index;
 8 |   int   key;
 9 | } Heap_Record, *Heap_Record_Ptr;
10 | 
11 | extern void HPCC_ra_Heap_Init (int size);
12 | extern void HPCC_ra_Heap_Insert (int index, int key);
13 | extern void HPCC_ra_Heap_ExtractMax (int *index, int *key);
14 | extern void HPCC_ra_Heap_IncrementKey (int index);
15 | extern void HPCC_ra_Heapify (int node);
16 | extern void HPCC_ra_Heap_Free ();
17 | 


--------------------------------------------------------------------------------
/RandomAccess/pool.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define HPCC_NULL_PTR ((char *)0)
 3 | 
 4 | typedef struct Pool_s {        /* used to minimize the use of malloc */
 5 |   char *head;                  /* pointer to the first element of the pool */
 6 |   char *tail;                  /* pointer to the last element of the pool */
 7 |   int   numObjs;               /* number of objects to malloc */
 8 |   int   objSize;               /* size of objects in bytes */
 9 |   char *poolBase;              /* pointer to block of memory allocated for pool */
10 | } POOL;
11 | 
12 | extern POOL* HPCC_PoolInit(int numObjs, int objSize);
13 | extern char *HPCC_PoolGetObj(POOL *poolPtr);
14 | extern void HPCC_PoolReturnObj(POOL *poolPtr, void *objPtr);
15 | extern void HPCC_PoolFree(POOL *poolPtr);
16 | 
17 | 


--------------------------------------------------------------------------------
/RandomAccess/single_cpu.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*-
 2 |  *
 3 |  * This file contains the interface for the single cpu RandomAccess test.  The
 4 |  * test is only run on a single (random) node in the MPI universe, with all
 5 |  * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected
 6 |  * CPU to finish the RandomAccess test.
 7 |  *
 8 |  * This test uses the computational core found in core_single_cpu.c
 9 |  */
10 | 
11 | #include <hpcc.h>
12 | #include "RandomAccess.h"
13 | 
14 | int
15 | HPCC_SingleRandomAccess(HPCC_Params *params)
16 | {
17 |   int myRank, commSize;
18 |   int rv, errCount, rank, failure = 0;
19 |   double localGUPs;
20 |   double scl = 1.0 / RAND_MAX;
21 |   FILE *outputFile = NULL;
22 |   MPI_Comm comm = MPI_COMM_WORLD;
23 | 
24 |   localGUPs = 0.0;
25 | 
26 |   MPI_Comm_size( comm, &commSize );
27 |   MPI_Comm_rank( comm, &myRank );
28 | 
29 |   srand(time(NULL));
30 |   scl *= commSize;
31 | 
32 |   /* select a node at random, but not node 0 (unless there is just one node) */
33 |   if (1 == commSize)
34 |     rank = 0;
35 |   else
36 |     for (rank = 0; ; rank = (int)(scl * rand())) {
37 |       if (rank > 0 && rank < commSize)
38 |         break;
39 |     }
40 | 
41 |   MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */
42 | 
43 |   if (myRank == rank) /* if this node has been selected */
44 |     rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure );
45 | 
46 |   MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */
47 |   MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */
48 |   MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */
49 |   errCount = rv;
50 |   params->SingleGUPs = localGUPs;
51 |   if (failure) params->Failure = 1;
52 | 
53 |   BEGIN_IO( myRank, params->outFname, outputFile);
54 |   fprintf( outputFile, "Node(s) with error %d\n", errCount );
55 |   fprintf( outputFile, "Node selected %d\n", rank );
56 |   fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs );
57 |   END_IO( myRank, outputFile );
58 | 
59 |   return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/RandomAccess/single_cpu_lcg.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*-
 2 |  *
 3 |  * This file contains the interface for the single cpu RandomAccess test.  The
 4 |  * test is only run on a single (random) node in the MPI universe, with all
 5 |  * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected
 6 |  * CPU to finish the RandomAccess test.
 7 |  *
 8 |  * This test uses the computational core found in core_single_cpu.c
 9 |  */
10 | 
11 | #include <hpcc.h>
12 | #include "RandomAccess.h"
13 | 
14 | int
15 | HPCC_SingleRandomAccess_LCG(HPCC_Params *params)
16 | {
17 |   int myRank, commSize;
18 |   int rv, errCount, rank, failure = 0;
19 |   double localGUPs;
20 |   double scl = 1.0 / RAND_MAX;
21 |   FILE *outputFile = NULL;
22 |   MPI_Comm comm = MPI_COMM_WORLD;
23 | 
24 |   localGUPs = 0.0;
25 | 
26 |   MPI_Comm_size( comm, &commSize );
27 |   MPI_Comm_rank( comm, &myRank );
28 | 
29 |   srand(time(NULL));
30 |   scl *= commSize;
31 | 
32 |   /* select a node at random, but not node 0 (unless there is just one node) */
33 |   if (1 == commSize)
34 |     rank = 0;
35 |   else
36 |     for (rank = 0; ; rank = (int)(scl * rand())) {
37 |       if (rank > 0 && rank < commSize)
38 |         break;
39 |     }
40 | 
41 |   MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */
42 | 
43 |   if (myRank == rank) /* if this node has been selected */
44 |     rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure );
45 | 
46 |   MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */
47 |   MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */
48 |   MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */
49 |   errCount = rv;
50 |   params->Single_LCG_GUPs = localGUPs;
51 |   if (failure) params->Failure = 1;
52 | 
53 |   BEGIN_IO( myRank, params->outFname, outputFile);
54 |   fprintf( outputFile, "Node(s) with error %d\n", errCount );
55 |   fprintf( outputFile, "Node selected %d\n", rank );
56 |   fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs );
57 |   END_IO( myRank, outputFile );
58 | 
59 |   return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/RandomAccess/star_single_cpu.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*-
 2 |  *
 3 |  * This file contains the interface for the star single cpu RandomAccess test.
 4 |  * The test runs on all cpus in the MPI universe, but there is no communication
 5 |  * between cpus during the process (each cpu runs its own version of the
 6 |  * single_cpu test).  The final result is the average of the entire system.
 7 |  *
 8 |  * This test uses the computational core found in core_single_cpu.c
 9 |  */
10 | 
11 | #include <hpcc.h>
12 | #include "RandomAccess.h"
13 | 
14 | int
15 | HPCC_StarRandomAccess(HPCC_Params *params)
16 | {
17 |   int myRank, commSize;
18 |   int rv, errCount, failure = 0, failureAll = 0;
19 |   double minGUPs, avgGUPs, maxGUPs, localGUPs;
20 |   FILE *outputFile = NULL;
21 |   MPI_Comm comm = MPI_COMM_WORLD;
22 | 
23 |   minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0;
24 | 
25 |   MPI_Comm_size( comm, &commSize );
26 |   MPI_Comm_rank( comm, &myRank );
27 | 
28 |   rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure );
29 |   MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm );
30 |   MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm );
31 |   if (failureAll) params->Failure = 1;
32 | 
33 |   MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm );
34 |   MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm );
35 |   MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm );
36 | 
37 |   avgGUPs /= commSize;
38 | 
39 |   MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm );
40 |   params->StarGUPs = avgGUPs;
41 | 
42 |   BEGIN_IO( myRank, params->outFname, outputFile);
43 |   fprintf( outputFile, "Node(s) with error %d\n", errCount );
44 |   fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs );
45 |   fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs );
46 |   fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs );
47 |   END_IO( myRank, outputFile );
48 | 
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/RandomAccess/star_single_cpu_lcg.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*-
 2 |  *
 3 |  * This file contains the interface for the star single cpu RandomAccess test.
 4 |  * The test runs on all cpus in the MPI universe, but there is no communication
 5 |  * between cpus during the process (each cpu runs its own version of the
 6 |  * single_cpu test).  The final result is the average of the entire system.
 7 |  *
 8 |  * This test uses the computational core found in core_single_cpu.c
 9 |  */
10 | 
11 | #include <hpcc.h>
12 | #include "RandomAccess.h"
13 | 
14 | int
15 | HPCC_StarRandomAccess_LCG(HPCC_Params *params)
16 | {
17 |   int myRank, commSize;
18 |   int rv, errCount, failure = 0, failureAll = 0;
19 |   double minGUPs, avgGUPs, maxGUPs, localGUPs;
20 |   FILE *outputFile = NULL;
21 |   MPI_Comm comm = MPI_COMM_WORLD;
22 | 
23 |   minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0;
24 | 
25 |   MPI_Comm_size( comm, &commSize );
26 |   MPI_Comm_rank( comm, &myRank );
27 | 
28 |   rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure );
29 |   MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm );
30 |   MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm );
31 |   if (failureAll) params->Failure = 1;
32 | 
33 |   MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm );
34 |   MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm );
35 |   MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm );
36 | 
37 |   avgGUPs /= commSize;
38 | 
39 |   MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm );
40 |   params->Star_LCG_GUPs = avgGUPs;
41 | 
42 |   BEGIN_IO( myRank, params->outFname, outputFile);
43 |   fprintf( outputFile, "Node(s) with error %d\n", errCount );
44 |   fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs );
45 |   fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs );
46 |   fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs );
47 |   END_IO( myRank, outputFile );
48 | 
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/RandomAccess/time_bound.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #if defined( HPCC_RA_STDALG )
 3 | #if ! defined(RA_TIME_BOUND_DISABLE)
 4 | #define RA_TIME_BOUND 1
 5 | #endif
 6 | #endif
 7 | 
 8 | /* time bound in seconds */
 9 | #define TIME_BOUND 60
10 | 
11 | 
12 | /* _RA_SAMPLE_FACTOR determines the fraction of the total number
13 |  * of updates used (in time_bound.c) to empirically derive an
14 |  * upper bound for the  number of updates executed by the benchmark.
15 |  * This upper bound must be such that the total execution time of the
16 |  * benchmark does not exceed a specified time bound.
17 |  * _RA_SAMPLE_FACTOR may need to be adjusted for each architecture
18 |  * since the dafault number of updates depends on the total
19 |  * memory size.
20 |  */
21 | /* 1% of total number of updates */
22 | #define RA_SAMPLE_FACTOR 100
23 | 
24 | extern void HPCC_Power2NodesTime(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter);
25 | 
26 | extern void HPCC_AnyNodesTime(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter);
27 | 
28 | extern void HPCC_Power2NodesTimeLCG(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter);
29 | 
30 | extern void HPCC_AnyNodesTimeLCG(HPCC_RandomAccess_tabparams_t tparams, double timeBound, u64Int *numIter);
31 | 


--------------------------------------------------------------------------------
/RandomAccess/utility.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*-
 2 |  *
 3 |  * This file provides utility functions for the RandomAccess benchmark suite.
 4 |  */
 5 | 
 6 | #include <hpcc.h>
 7 | #include "RandomAccess.h"
 8 | 
 9 | 
10 | /* Utility routine to start random number generator at Nth step */
11 | u64Int
12 | HPCC_starts(s64Int n)
13 | {
14 |   int i, j;
15 |   u64Int m2[64];
16 |   u64Int temp, ran;
17 | 
18 |   while (n < 0) n += PERIOD;
19 |   while (n > PERIOD) n -= PERIOD;
20 |   if (n == 0) return 0x1;
21 | 
22 |   temp = 0x1;
23 |   for (i=0; i<64; i++) {
24 |     m2[i] = temp;
25 |     temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0);
26 |     temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0);
27 |   }
28 | 
29 |   for (i=62; i>=0; i--)
30 |     if ((n >> i) & 1)
31 |       break;
32 | 
33 |   ran = 0x2;
34 |   while (i > 0) {
35 |     temp = 0;
36 |     for (j=0; j<64; j++)
37 |       if ((ran >> j) & 1)
38 |         temp ^= m2[j];
39 |     ran = temp;
40 |     i -= 1;
41 |     if ((n >> i) & 1)
42 |       ran = (ran << 1) ^ ((s64Int) ran < 0 ? POLY : 0);
43 |   }
44 | 
45 |   return ran;
46 | }
47 | 
48 | /* Utility routine to start LCG random number generator at Nth step */
49 | u64Int
50 | HPCC_starts_LCG(s64Int n)
51 | {
52 |   u64Int mul_k, add_k, ran, un;
53 | 
54 |   mul_k = LCG_MUL64;
55 |   add_k = LCG_ADD64;
56 | 
57 |   ran = 1;
58 |   for (un = (u64Int)n; un; un >>= 1) {
59 |     if (un & 1)
60 |       ran = mul_k * ran + add_k;
61 |     add_k *= (mul_k + 1);
62 |     mul_k *= mul_k;
63 |   }
64 | 
65 |   return ran;
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/STREAM/split_stream_funcs.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | 
 4 | Tuple = (
 5 | ("stream.c", (
 6 |   ("HPCC_Stream(", "main"),
 7 |   ("checkSTREAMresults(FILE", "checkres"),
 8 |   ("checktick() {", "checktick"),
 9 |   ("void tuned_STREAM_Copy(", "copy"),
10 |   ("tuned_STREAM_Scale(double", "scale"),
11 |   ("void tuned_STREAM_Add(", "add"),
12 |   ("tuned_STREAM_Triad(double", "triad"),
13 |   ("void computeSTREAMerrors(", "checkerr"),
14 | ), "hpcc"),
15 | 
16 | ("stream_mpi.c", (
17 | ("main()" , "main"),
18 | ("^checktick()", "checktick"),
19 | ("computeSTREAMerrors(STREAM", "checkerr"),
20 | ("checkSTREAMresults (STREAM", "checkres"),
21 | ("void tuned_STREAM_Copy(", "copy"),
22 | ("tuned_STREAM_Scale(STREAM", "scale"),
23 | ("void tuned_STREAM_Add(", "add"),
24 | ("tuned_STREAM_Triad(STREAM", "triad"),
25 | ), "tstrm"),
26 | )
27 | 
28 | 
29 | def swap_fd(fd, fname, prefix):
30 |   fd.close()
31 | 
32 |   if not os.path.exists(prefix):
33 |     os.mkdir(prefix)
34 | 
35 |   name = os.path.join(prefix, fname +".c")
36 |   if fname.startswith("/dev"):
37 |     name = fname
38 |   fd = open(name, "w")
39 |   return fd
40 | 
41 |         
42 | for tup in Tuple:
43 |   fd = open("/dev/null", "w")
44 | 
45 |   prefix = tup[2]
46 |   
47 |   for line in open(tup[0]):
48 |     for m in tup[1]:
49 |       if m[0].startswith("^"):
50 |         if line.startswith(m[0][1:]):
51 |           fd = swap_fd(fd, m[1], prefix)
52 | 
53 |       elif line.find(m[0]) != -1:
54 |         fd = swap_fd(fd, m[1], prefix)
55 | 
56 |         break
57 | 
58 |     fd.write(line)
59 | 
60 |   fd.close()
61 | 
62 | Replacements = (
63 | ("STREAM_TYPE", "double"),
64 | ("MAX", "Mmax"),
65 | ("MIN", "Mmin"),
66 | ("ssize_t", "int"),
67 | ("abs", "fabs"),
68 | )
69 | 
70 | for tup in Tuple:
71 |   prefix = tup[2]
72 |   for m in tup[1]:
73 |     name = m[1]
74 |     if name.startswith("/dev"):
75 |       continue
76 |     fname = os.path.join(prefix, name + ".c")
77 |     code = open(fname).read()
78 |     for rt in Replacements:
79 |       code = code.replace(rt[0], rt[1])
80 |     open(fname, "w").write(code)
81 | 


--------------------------------------------------------------------------------
/TEST:
--------------------------------------------------------------------------------
1 | hg push test
2 | 


--------------------------------------------------------------------------------
/_hpccinf.txt:
--------------------------------------------------------------------------------
 1 | HPLinpack benchmark input file
 2 | Innovative Computing Laboratory, University of Tennessee
 3 | HPL.out      output file name (if any)
 4 | 8            device out (6=stdout,7=stderr,file)
 5 | 1            # of problems sizes (N)
 6 | 1000         Ns
 7 | 1            # of NBs
 8 | 80           NBs
 9 | 0            PMAP process mapping (0=Row-,1=Column-major)
10 | 1            # of process grids (P x Q)
11 | 2            Ps
12 | 2            Qs
13 | 16.0         threshold
14 | 1            # of panel fact
15 | 2            PFACTs (0=left, 1=Crout, 2=Right)
16 | 1            # of recursive stopping criterium
17 | 4            NBMINs (>= 1)
18 | 1            # of panels in recursion
19 | 2            NDIVs
20 | 1            # of recursive panel fact.
21 | 1            RFACTs (0=left, 1=Crout, 2=Right)
22 | 1            # of broadcast
23 | 1            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
24 | 1            # of lookahead depth
25 | 1            DEPTHs (>=0)
26 | 2            SWAP (0=bin-exch,1=long,2=mix)
27 | 64           swapping threshold
28 | 0            L1 in (0=transposed,1=no-transposed) form
29 | 0            U  in (0=transposed,1=no-transposed) form
30 | 1            Equilibration (0=no,1=yes)
31 | 8            memory alignment in double (> 0)
32 | ##### This line (no. 32) is ignored (it serves as a separator). ######
33 | 0                      		Number of additional problem sizes for PTRANS
34 | 1200 10000 30000        	values of N
35 | 0                       	number of additional blocking sizes for PTRANS
36 | 40 9 8 13 13 20 16 32 64       	values of NB
37 | 


--------------------------------------------------------------------------------
/doc/hpccusrg.tex:
--------------------------------------------------------------------------------
 1 | % -*- LaTeX -*-
 2 | \documentclass[twocolumn]{report}
 3 | 
 4 | \usepackage{xspace}
 5 | 
 6 | \newcommand{\STREAM}{\textsf{STREAM}\xspace}
 7 | \newcommand{\RANDA}{\textsf{RandomAccess}\xspace}
 8 | \newcommand{\PTRANS}{\textsf{PTRANS}\xspace}
 9 | 
10 | \begin{document}
11 | 
12 | \title{HPC Challenge User Guide}
13 | \author{Piotr Luszczek}
14 | \maketitile
15 | 
16 | \chapter{Performance Considerations}
17 | 
18 | As mentioned earlier, we try to operate on large data objects. The
19 | size of these objects is determined at runtime which contrasts with
20 | the original version of the \STREAM benchmark which uses static
21 | storage~(determined at compile time) and size. The original benchmark
22 | gives the compiler more information~(and control) over data alignment,
23 | loop trip counts, etc.
24 | 
25 | \RANDA is by design heavy in misses that occur at various levels of
26 | memory hierarchy. But also Translation Look-aside Buffer~(TLB) is
27 | streased. It might be the TLB pressure that comes into play in the SMP
28 | and multicore setting. If TLB is shared between multiple processing
29 | elements it will become the bottleneck. It's because TLBs do not have
30 | prefetch functionality nor any other latency hiding mechanism.
31 | 
32 | \section{Tuning}
33 | 
34 | \PTRANS
35 | 
36 | \end{document}
37 | 
38 | Rules: avoid, reference web page
39 | definitions: CPU, chip, core, process, node
40 | optimizations: FFTE parameters, using FFTW 2 by modyfing FFTW code to
41 | accomodate large vector sizes
42 | OpenMP: IBM xlc_r version 6 doesn't define _OPENMP, -qsmp, -qsmp=noauto
43 | Total,process,thread memory file
44 | Appendix with units
45 | 


--------------------------------------------------------------------------------
/hpl/BUGS:
--------------------------------------------------------------------------------
 1 | ==============================================================
 2 |  List of the known problems with the HPL software
 3 | 
 4 |  Current as of release 2.0 - September 10, 2008
 5 | ==============================================================
 6 | 
 7 | ==============================================================
 8 |  
 9 | ==============================================================
10 | 


--------------------------------------------------------------------------------
/hpl/Make.UNKNOWN:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | 
 3 | RM = exit
 4 | CD = exit
 5 | arch=UNKNOWN
 6 | 
 7 | UNKNOWN:
 8 | 	@echo
 9 | 	@echo Please specify "'"arch"'" variable, for example:
10 | 	@echo 1. Create file Make.Unix in "'"hpl"'" directory
11 | 	@echo 2. Type: make arch=Unix
12 | 	@echo
13 | 
14 | .PHONY: UNKNOWN
15 | 


--------------------------------------------------------------------------------
/hpl/README:
--------------------------------------------------------------------------------
 1 | ==============================================================
 2 |  High Performance Computing Linpack Benchmark (HPL)
 3 |  HPL 2.0 - September 10, 2008
 4 | ==============================================================
 5 | 
 6 |  HPL is a software package that solves a (random) dense linear
 7 |  system  in   double  precision  (64   bits)   arithmetic   on 
 8 |  distributed-memory  computers.   It can thus be regarded as a
 9 |  portable as well as  freely  available implementation  of the
10 |  High Performance Computing Linpack Benchmark.
11 | 
12 |  The  HPL  software  package requires the availibility on your
13 |  system of an implementation of the  Message Passing Interface
14 |  MPI  (1.1 compliant).  An  implementation of either the Basic
15 |  Linear Algebra Subprograms  BLAS  or the  Vector Signal Image
16 |  Processing Library VSIPL is also needed.  Machine-specific as
17 |  well as generic implementations of MPI, the  BLAS  and  VSIPL
18 |  are available for a large variety of systems.
19 | 
20 |  Install See the file INSTALL in this directory.
21 |  -------
22 | 
23 |  Tuning  See the file TUNING in this directory.
24 |  ------
25 | 
26 |  Bugs  Known  problems and bugs with this release are documen-
27 |  ----  ted in the file hpl/BUGS.
28 | 
29 |  Check out  the website  www.netlib.org/benchmark/hpl  for the
30 |  latest information.
31 | 
32 | ==============================================================
33 | 


--------------------------------------------------------------------------------
/hpl/TODO:
--------------------------------------------------------------------------------
 1 | ==============================================================
 2 |  High Performance Computing Linpack Benchmark (HPL)
 3 |  HPL 2.0 - September 10, 2008
 4 | ==============================================================
 5 | 
 6 |  Done list in version 1.0b, December 15th, 2004
 7 |  - Fixed problem with 32-bit integer overflow.
 8 |    Thanks to John Baron.
 9 | 
10 |  Done list in version 1.0a, January 1st, 2004
11 |  - Added Row- or Column-major process mapping in data file
12 |  - Fixed compilation error for gcc 3.3 in walltime.
13 |  - Fixed building problems on the T3E;
14 |    Thanks to Edward Anderson.
15 | 
16 | ==============================================================
17 | 


--------------------------------------------------------------------------------
/hpl/include/hpccmema.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */
 2 | 
 3 | #ifndef HPCCMEMA_H
 4 | #define HPCCMEMA_H 1
 5 | 
 6 | #ifdef HPCC_MEMALLCTR
 7 | extern int HPCC_alloc_init(size_t total_size);
 8 | extern int HPCC_alloc_finalize();
 9 | extern void *HPCC_malloc(size_t size);
10 | extern void HPCC_free(void *ptr);
11 | #define HPCC_fftw_malloc HPCC_malloc
12 | #define HPCC_fftw_free HPCC_free
13 | #define HPCC_XMALLOC(t,s) ((t*)HPCC_malloc(sizeof(t)*(s)))
14 | #else
15 | #define HPCC_malloc malloc
16 | #define HPCC_free free
17 | #define HPCC_fftw_malloc fftw_malloc
18 | #define HPCC_fftw_free fftw_free
19 | #define HPCC_XMALLOC(t,s) XMALLOC(t,s)
20 | #endif
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_abort.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_abort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_abort \- halts execution.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_abort(\fR
 9 | \fB\&int\fR
10 | \fI\&LINE\fR,
11 | \fB\&const char *\fR
12 | \fI\&SRNAME\fR,
13 | \fB\&const char *\fR
14 | \fI\&FORM\fR,
15 | \fB\&...\fR
16 | \fB\&);\fR
17 | .SH DESCRIPTION
18 | \fB\&HPL_abort\fR
19 | displays an error message on stderr and halts execution.
20 | .SH ARGUMENTS
21 | .TP 8
22 | LINE    (local input)           int
23 | On entry,  LINE  specifies the line  number in the file where
24 | the  error  has  occured.  When  LINE  is not a positive line
25 | number, it is ignored.
26 | .TP 8
27 | SRNAME  (local input)           const char *
28 | On entry, SRNAME  should  be the name of the routine  calling
29 | this error handler.
30 | .TP 8
31 | FORM    (local input)           const char *
32 | On entry, FORM specifies the format, i.e., how the subsequent
33 | arguments are converted for output.
34 | .TP 8
35 |         (local input)           ...
36 | On entry,  ...  is the list of arguments to be printed within
37 | the format string.
38 | .SH EXAMPLE
39 | \fI\&#include "hpl.h"\fR
40 |  
41 | int main(int argc, char *argv[])
42 | .br
43 | {
44 | .br
45 |    HPL_abort( __LINE__, __FILE__, "Halt.\en" );
46 | .br
47 |    exit(0); return(0);
48 | .br
49 | }
50 | .SH SEE ALSO
51 | .BR HPL_fprintf \ (3),
52 | .BR HPL_warn \ (3).
53 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_all_reduce.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_all_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_all_reduce \- All reduce operation.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_all_reduce(\fR
 9 | \fB\&void *\fR
10 | \fI\&BUFFER\fR,
11 | \fB\&const int\fR
12 | \fI\&COUNT\fR,
13 | \fB\&const HPL_T_TYPE\fR
14 | \fI\&DTYPE\fR,
15 | \fB\&const HPL_T_OP \fR
16 | \fI\&OP\fR,
17 | \fB\&MPI_Comm\fR
18 | \fI\&COMM\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_all_reduce\fR
22 | performs   a   global   reduce  operation  across  all
23 | processes of a group leaving the results on all processes.
24 | .SH ARGUMENTS
25 | .TP 8
26 | BUFFER  (local input/global out void *
27 | On entry,  BUFFER  points to  the  buffer to be combined.  On
28 | exit, this array contains the combined data and  is identical
29 | on all processes in the group.
30 | .TP 8
31 | COUNT   (global input)          const int
32 | On entry,  COUNT  indicates the number of entries in  BUFFER.
33 | COUNT must be at least zero.
34 | .TP 8
35 | DTYPE   (global input)          const HPL_T_TYPE
36 | On entry,  DTYPE  specifies the type of the buffers operands.
37 | .TP 8
38 | OP      (global input)          const HPL_T_OP 
39 | On entry, OP is a pointer to the local combine function.
40 | .TP 8
41 | COMM    (global/local input)    MPI_Comm
42 | The MPI communicator identifying the process collection.
43 | .SH SEE ALSO
44 | .BR HPL_broadcast \ (3),
45 | .BR HPL_reduce \ (3),
46 | .BR HPL_barrier \ (3),
47 | .BR HPL_min \ (3),
48 | .BR HPL_max \ (3),
49 | .BR HPL_sum \ (3).
50 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_barrier.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_barrier 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_barrier \- Barrier operation.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_barrier(\fR
 9 | \fB\&MPI_Comm\fR
10 | \fI\&COMM\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_barrier\fR
14 | blocks the caller until all process members have call it.
15 | The  call  returns  at any process  only after all group members have
16 | entered the call.
17 | .SH ARGUMENTS
18 | .TP 8
19 | COMM    (global/local input)    MPI_Comm
20 | The MPI communicator identifying the process collection.
21 | .SH SEE ALSO
22 | .BR HPL_broadcast \ (3),
23 | .BR HPL_reduce \ (3),
24 | .BR HPL_all_reduce \ (3),
25 | .BR HPL_min \ (3),
26 | .BR HPL_max \ (3),
27 | .BR HPL_sum \ (3).
28 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_bcast.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_bcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_bcast \- Perform the row broadcast.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_bcast(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR,
11 | \fB\&int *\fR
12 | \fI\&IFLAG\fR
13 | \fB\&);\fR
14 | .SH DESCRIPTION
15 | \fB\&HPL_bcast\fR
16 | broadcasts  the  current  panel.  Successful  completion is
17 | indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
18 | HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
19 | not completed, in which case this function should be called again.
20 | .SH ARGUMENTS
21 | .TP 8
22 | PANEL   (input/output)          HPL_T_panel *
23 | On entry,  PANEL  points to the  current panel data structure
24 | being broadcast.
25 | .TP 8
26 | IFLAG   (output)                int *
27 | On exit,  IFLAG  indicates  whether  or not the broadcast has
28 | occured.
29 | .SH SEE ALSO
30 | .BR HPL_binit \ (3),
31 | .BR HPL_bwait \ (3).
32 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_binit.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_binit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_binit \- Initialize the row broadcast.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_binit(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_binit\fR
14 | initializes  a  row  broadcast.  Successful  completion  is
15 | indicated by the returned error code HPL_SUCCESS.
16 | .SH ARGUMENTS
17 | .TP 8
18 | PANEL   (input/output)          HPL_T_panel *
19 | On entry,  PANEL  points to the  current panel data structure
20 | being broadcast.
21 | .SH SEE ALSO
22 | .BR HPL_bcast \ (3),
23 | .BR HPL_bwait \ (3).
24 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_broadcast.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_broadcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_broadcast \- Broadcast operation.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_broadcast(\fR
 9 | \fB\&void *\fR
10 | \fI\&BUFFER\fR,
11 | \fB\&const int\fR
12 | \fI\&COUNT\fR,
13 | \fB\&const HPL_T_TYPE\fR
14 | \fI\&DTYPE\fR,
15 | \fB\&const int\fR
16 | \fI\&ROOT\fR,
17 | \fB\&MPI_Comm\fR
18 | \fI\&COMM\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_broadcast\fR
22 | broadcasts  a message from the process with rank ROOT to
23 | all processes in the group.
24 | .SH ARGUMENTS
25 | .TP 8
26 | BUFFER  (local input/output)    void *
27 | On entry,  BUFFER  points to  the  buffer to be broadcast. On
28 | exit, this array contains the broadcast data and is identical
29 | on all processes in the group.
30 | .TP 8
31 | COUNT   (global input)          const int
32 | On entry,  COUNT  indicates the number of entries in  BUFFER.
33 | COUNT must be at least zero.
34 | .TP 8
35 | DTYPE   (global input)          const HPL_T_TYPE
36 | On entry,  DTYPE  specifies the type of the buffers operands.
37 | .TP 8
38 | ROOT    (global input)          const int
39 | On entry, ROOT is the coordinate of the source process.
40 | .TP 8
41 | COMM    (global/local input)    MPI_Comm
42 | The MPI communicator identifying the process collection.
43 | .SH SEE ALSO
44 | .BR HPL_reduce \ (3),
45 | .BR HPL_all_reduce \ (3),
46 | .BR HPL_barrier \ (3),
47 | .BR HPL_min \ (3),
48 | .BR HPL_max \ (3),
49 | .BR HPL_sum \ (3).
50 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_bwait.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_bwait 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_bwait \- Finalize the row broadcast.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_bwait(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_bwait\fR
14 | HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
15 | terminate.  Successful completion is indicated by the returned  error
16 | code HPL_SUCCESS.
17 | .SH ARGUMENTS
18 | .TP 8
19 | PANEL   (input/output)          HPL_T_panel *
20 | On entry,  PANEL  points to the  current panel data structure
21 | being broadcast.
22 | .SH SEE ALSO
23 | .BR HPL_binit \ (3),
24 | .BR HPL_bcast \ (3).
25 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_copyL.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_copyL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_copyL \- Copy the current panel into a contiguous workspace.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_copyL(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_copyL\fR
14 | copies  the  panel of columns, the L1 replicated submatrix,
15 | the pivot array  and  the info scalar into a contiguous workspace for
16 | later broadcast.
17 |  
18 | The copy of this panel  into  a contiguous buffer  can be enforced by
19 | specifying -DHPL_COPY_L in the architecture specific Makefile.
20 | .SH ARGUMENTS
21 | .TP 8
22 | PANEL   (input/output)          HPL_T_panel *
23 | On entry,  PANEL  points to the  current panel data structure
24 | being broadcast.
25 | .SH SEE ALSO
26 | .BR HPL_binit \ (3),
27 | .BR HPL_bcast \ (3),
28 | .BR HPL_bwait \ (3).
29 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dcopy.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dcopy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dcopy \- y := x.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dcopy(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const double *\fR
12 | \fI\&X\fR,
13 | \fB\&const int\fR
14 | \fI\&INCX\fR,
15 | \fB\&double *\fR
16 | \fI\&Y\fR,
17 | \fB\&const int\fR
18 | \fI\&INCY\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_dcopy\fR
22 | copies the vector x into the vector y.
23 | .SH ARGUMENTS
24 | .TP 8
25 | N       (local input)           const int
26 | On entry, N specifies the length of the vectors  x  and  y. N
27 | must be at least zero.
28 | .TP 8
29 | X       (local input)           const double *
30 | On entry,  X  is an incremented array of dimension  at  least
31 | ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
32 | .TP 8
33 | INCX    (local input)           const int
34 | On entry, INCX specifies the increment for the elements of X.
35 | INCX must not be zero.
36 | .TP 8
37 | Y       (local input/output)    double *
38 | On entry,  Y  is an incremented array of dimension  at  least
39 | ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
40 | On exit, the entries of the incremented array  Y  are updated
41 | with the entries of the incremented array X.
42 | .TP 8
43 | INCY    (local input)           const int
44 | On entry, INCY specifies the increment for the elements of Y.
45 | INCY must not be zero.
46 | .SH EXAMPLE
47 | \fI\&#include "hpl.h"\fR
48 |  
49 | int main(int argc, char *argv[])
50 | .br
51 | {
52 | .br
53 |    double x[3], y[3];
54 | .br
55 |    x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
56 | .br
57 |    y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
58 | .br
59 |    HPL_dcopy( 3, x, 1, y, 1 );
60 | .br
61 |    printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
62 | .br
63 |    exit(0); return(0);
64 | .br
65 | }
66 | .SH SEE ALSO
67 | .BR HPL_daxpy \ (3),
68 | .BR HPL_dscal \ (3),
69 | .BR HPL_dswap \ (3).
70 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlacpy.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlacpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlacpy \- B := A.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlacpy(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&const double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&LDA\fR,
17 | \fB\&double *\fR
18 | \fI\&B\fR,
19 | \fB\&const int\fR
20 | \fI\&LDB\fR
21 | \fB\&);\fR
22 | .SH DESCRIPTION
23 | \fB\&HPL_dlacpy\fR
24 | copies an array A into an array B.
25 | .SH ARGUMENTS
26 | .TP 8
27 | M       (local input)           const int
28 | On entry,  M specifies the number of rows of the arrays A and
29 | B. M must be at least zero.
30 | .TP 8
31 | N       (local input)           const int
32 | On entry,  N specifies  the number of columns of the arrays A
33 | and B. N must be at least zero.
34 | .TP 8
35 | A       (local input)           const double *
36 | On entry, A points to an array of dimension (LDA,N).
37 | .TP 8
38 | LDA     (local input)           const int
39 | On entry, LDA specifies the leading dimension of the array A.
40 | LDA must be at least MAX(1,M).
41 | .TP 8
42 | B       (local output)          double *
43 | On entry, B points to an array of dimension (LDB,N). On exit,
44 | B is overwritten with A.
45 | .TP 8
46 | LDB     (local input)           const int
47 | On entry, LDB specifies the leading dimension of the array B.
48 | LDB must be at least MAX(1,M).
49 | .SH EXAMPLE
50 | \fI\&#include "hpl.h"\fR
51 |  
52 | int main(int argc, char *argv[])
53 | .br
54 | {
55 | .br
56 |    double a[2*2], b[2*2];
57 | .br
58 |    a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
59 | .br
60 |    HPL_dlacpy( 2, 2, a, 2, b, 2 );
61 | .br
62 |    printf("  [%f,%f]\en", b[0], b[2]);
63 | .br
64 |    printf("b=[%f,%f]\en", b[1], b[3]);
65 | .br
66 |    exit(0);
67 | .br
68 |    return(0);
69 | .br
70 | }
71 | .SH SEE ALSO
72 | .BR HPL_dlatcpy \ (3).
73 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlaprnt.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlaprnt 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlaprnt \- Print the matrix A.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlaprnt(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&IA\fR,
17 | \fB\&const int\fR
18 | \fI\&JA\fR,
19 | \fB\&const int\fR
20 | \fI\&LDA\fR,
21 | \fB\&const char *\fR
22 | \fI\&CMATNM\fR
23 | \fB\&);\fR
24 | .SH DESCRIPTION
25 | \fB\&HPL_dlaprnt\fR
26 | prints to standard error an M-by-N matrix A.
27 | .SH ARGUMENTS
28 | .TP 8
29 | M       (local input)           const int
30 | On entry,  M  specifies the number of rows of A. M must be at
31 | least zero.
32 | .TP 8
33 | N       (local input)           const int
34 | On entry,  N  specifies the number of columns of A. N must be
35 | at least zero.
36 | .TP 8
37 | A       (local input)           double *
38 | On entry, A  points to an array of dimension (LDA,N).
39 | .TP 8
40 | IA      (local input)           const int
41 | On entry, IA specifies the starting row index to be printed.
42 | .TP 8
43 | JA      (local input)           const int
44 | On entry,  JA  specifies  the  starting  column index  to be
45 | printed.
46 | .TP 8
47 | LDA     (local input)           const int
48 | On entry, LDA specifies the leading dimension of the array A.
49 | LDA must be at least max(1,M).
50 | .TP 8
51 | CMATNM  (local input)           const char *
52 | On entry, CMATNM is the name of the matrix to be printed.
53 | .SH EXAMPLE
54 | \fI\&#include "hpl.h"\fR
55 |  
56 | int main(int argc, char *argv[])
57 | .br
58 | {
59 | .br
60 |    double a[2*2];
61 | .br
62 |    a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
63 | .br
64 |    HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
65 | .br
66 |    exit(0); return(0);
67 | .br
68 | }
69 | .SH SEE ALSO
70 | .BR HPL_fprintf \ (3).
71 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlaswp00N.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlaswp00N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlaswp00N \- performs a series of row interchanges.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlaswp00N(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&LDA\fR,
17 | \fB\&const int *\fR
18 | \fI\&IPIV\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_dlaswp00N\fR
22 | performs a series of local row interchanges on a matrix
23 | A. One row interchange is initiated for rows 0 through M-1 of A.
24 | .SH ARGUMENTS
25 | .TP 8
26 | M       (local input)           const int
27 | On entry, M specifies the number of rows of the array A to be
28 | interchanged. M must be at least zero.
29 | .TP 8
30 | N       (local input)           const int
31 | On entry, N  specifies  the number of columns of the array A.
32 | N must be at least zero.
33 | .TP 8
34 | A       (local input/output)    double *
35 | On entry, A  points to an array of dimension (LDA,N) to which
36 | the row interchanges will be  applied.  On exit, the permuted
37 | matrix.
38 | .TP 8
39 | LDA     (local input)           const int
40 | On entry, LDA specifies the leading dimension of the array A.
41 | LDA must be at least MAX(1,M).
42 | .TP 8
43 | IPIV    (local input)           const int *
44 | On entry,  IPIV  is  an  array of size  M  that  contains the
45 | pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
46 | implies that local rows k and l are to be interchanged.
47 | .SH SEE ALSO
48 | .BR HPL_dlaswp00N \ (3),
49 | .BR HPL_dlaswp10N \ (3),
50 | .BR HPL_dlaswp01N \ (3),
51 | .BR HPL_dlaswp01T \ (3),
52 | .BR HPL_dlaswp02N \ (3),
53 | .BR HPL_dlaswp03N \ (3),
54 | .BR HPL_dlaswp03T \ (3),
55 | .BR HPL_dlaswp04N \ (3),
56 | .BR HPL_dlaswp04T \ (3),
57 | .BR HPL_dlaswp05N \ (3),
58 | .BR HPL_dlaswp05T \ (3),
59 | .BR HPL_dlaswp06N \ (3),
60 | .BR HPL_dlaswp06T \ (3).
61 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlaswp10N.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlaswp10N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlaswp10N \- performs a series column interchanges.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlaswp10N(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&LDA\fR,
17 | \fB\&const int *\fR
18 | \fI\&IPIV\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_dlaswp10N\fR
22 | performs a sequence  of  local column interchanges on a
23 | matrix A.  One column interchange is initiated  for columns 0 through
24 | N-1 of A.
25 | .SH ARGUMENTS
26 | .TP 8
27 | M       (local input)           const int
28 | __arg0__
29 | .TP 8
30 | N       (local input)           const int
31 | On entry,  M  specifies  the number of rows of the array A. M
32 | must be at least zero.
33 | .TP 8
34 | A       (local input/output)    double *
35 | On entry, N specifies the number of columns of the array A. N
36 | must be at least zero.
37 | .TP 8
38 | LDA     (local input)           const int
39 | On entry, A  points to an  array of  dimension (LDA,N).  This
40 | array contains the columns onto which the interchanges should
41 | be applied. On exit, A contains the permuted matrix.
42 | .TP 8
43 | IPIV    (local input)           const int *
44 | On entry, LDA specifies the leading dimension of the array A.
45 | LDA must be at least MAX(1,M).
46 | .SH SEE ALSO
47 | .BR HPL_dlaswp00N \ (3),
48 | .BR HPL_dlaswp10N \ (3),
49 | .BR HPL_dlaswp01N \ (3),
50 | .BR HPL_dlaswp01T \ (3),
51 | .BR HPL_dlaswp02N \ (3),
52 | .BR HPL_dlaswp03N \ (3),
53 | .BR HPL_dlaswp03T \ (3),
54 | .BR HPL_dlaswp04N \ (3),
55 | .BR HPL_dlaswp04T \ (3),
56 | .BR HPL_dlaswp05N \ (3),
57 | .BR HPL_dlaswp05T \ (3),
58 | .BR HPL_dlaswp06N \ (3),
59 | .BR HPL_dlaswp06T \ (3).
60 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlatcpy.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlatcpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlatcpy \- B := A^T
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlatcpy(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&const double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&LDA\fR,
17 | \fB\&double *\fR
18 | \fI\&B\fR,
19 | \fB\&const int\fR
20 | \fI\&LDB\fR
21 | \fB\&);\fR
22 | .SH DESCRIPTION
23 | \fB\&HPL_dlatcpy\fR
24 | copies the transpose of an array A into an array B.
25 | .SH ARGUMENTS
26 | .TP 8
27 | M       (local input)           const int
28 | On entry,  M specifies the number of  rows of the array B and
29 | the number of columns of A. M must be at least zero.
30 | .TP 8
31 | N       (local input)           const int
32 | On entry,  N specifies the number of  rows of the array A and
33 | the number of columns of B. N must be at least zero.
34 | .TP 8
35 | A       (local input)           const double *
36 | On entry, A points to an array of dimension (LDA,M).
37 | .TP 8
38 | LDA     (local input)           const int
39 | On entry, LDA specifies the leading dimension of the array A.
40 | LDA must be at least MAX(1,N).
41 | .TP 8
42 | B       (local output)          double *
43 | On entry, B points to an array of dimension (LDB,N). On exit,
44 | B is overwritten with the transpose of A.
45 | .TP 8
46 | LDB     (local input)           const int
47 | On entry, LDB specifies the leading dimension of the array B.
48 | LDB must be at least MAX(1,M).
49 | .SH EXAMPLE
50 | \fI\&#include "hpl.h"\fR
51 |  
52 | int main(int argc, char *argv[])
53 | .br
54 | {
55 | .br
56 |    double a[2*2], b[2*2];
57 | .br
58 |    a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
59 | .br
60 |    HPL_dlacpy( 2, 2, a, 2, b, 2 );
61 | .br
62 |    printf("  [%f,%f]\en", b[0], b[2]);
63 | .br
64 |    printf("b=[%f,%f]\en", b[1], b[3]);
65 | .br
66 |    exit(0); return(0);
67 | .br
68 | }
69 | .SH SEE ALSO
70 | .BR HPL_dlacpy \ (3).
71 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dlocswpT.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dlocswpT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dlocswpT \- locally swaps rows within panel.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dlocswpT(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR,
11 | \fB\&const int\fR
12 | \fI\&II\fR,
13 | \fB\&const int\fR
14 | \fI\&JJ\fR,
15 | \fB\&double *\fR
16 | \fI\&WORK\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_dlocswpT\fR
20 | performs  the local swapping operations  within a panel.
21 | The lower triangular  N0-by-N0  upper block of the panel is stored in
22 | transpose form.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PANEL   (local input/output)    HPL_T_panel *
26 | On entry,  PANEL  points to the data structure containing the
27 | panel information.
28 | .TP 8
29 | II      (local input)           const int
30 | On entry, II  specifies the row offset where the column to be
31 | operated on starts with respect to the panel.
32 | .TP 8
33 | JJ      (local input)           const int
34 | On entry, JJ  specifies the column offset where the column to
35 | be operated on starts with respect to the panel.
36 | .TP 8
37 | WORK    (local workspace)       double *
38 | On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
39 | WORK[0] contains  the  local  maximum  absolute value scalar,
40 | WORK[1] contains  the corresponding local row index,  WORK[2]
41 | contains the corresponding global row index, and  WORK[3]  is
42 | the coordinate of process owning this max.  The N0 length max
43 | row is stored in WORK[4:4+N0-1];  Note  that this is also the
44 | JJth row  (or column) of L1. The remaining part of this array
45 | is used as workspace.
46 | .SH SEE ALSO
47 | .BR HPL_dlocmax \ (3),
48 | .BR HPL_dlocswpN \ (3),
49 | .BR HPL_pdmxswp \ (3),
50 | .BR HPL_pdpancrN \ (3),
51 | .BR HPL_pdpancrT \ (3),
52 | .BR HPL_pdpanllN \ (3),
53 | .BR HPL_pdpanllT \ (3),
54 | .BR HPL_pdpanrlN \ (3),
55 | .BR HPL_pdpanrlT \ (3),
56 | .BR HPL_pdrpancrN \ (3),
57 | .BR HPL_pdrpancrT \ (3),
58 | .BR HPL_pdrpanllN \ (3),
59 | .BR HPL_pdrpanllT \ (3),
60 | .BR HPL_pdrpanrlN \ (3),
61 | .BR HPL_pdrpanrlT \ (3),
62 | .BR HPL_pdfact \ (3).
63 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dmatgen.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dmatgen 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dmatgen \- random matrix generator.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dmatgen(\fR
 9 | \fB\&const int\fR
10 | \fI\&M\fR,
11 | \fB\&const int\fR
12 | \fI\&N\fR,
13 | \fB\&double *\fR
14 | \fI\&A\fR,
15 | \fB\&const int\fR
16 | \fI\&LDA\fR,
17 | \fB\&const int\fR
18 | \fI\&ISEED\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_dmatgen\fR
22 | generates (or regenerates) a random matrix A.
23 |  
24 | The  pseudo-random  generator uses the linear congruential algorithm:
25 | X(n+1) = (a * X(n) + c) mod m  as  described  in the  Art of Computer
26 | Programming, Knuth 1973, Vol. 2.
27 | .SH ARGUMENTS
28 | .TP 8
29 | M       (input)                 const int
30 | On entry,  M  specifies  the number  of rows of the matrix A.
31 | M must be at least zero.
32 | .TP 8
33 | N       (input)                 const int
34 | On entry,  N specifies the number of columns of the matrix A.
35 | N must be at least zero.
36 | .TP 8
37 | A       (output)                double *
38 | On entry, A points to an array of dimension (LDA,N). On exit,
39 | this  array  contains   the   coefficients  of  the  randomly
40 | generated matrix.
41 | .TP 8
42 | LDA     (input)                 const int
43 | On entry, LDA specifies the leading dimension of the array A.
44 | LDA must be at least max(1,M).
45 | .TP 8
46 | ISEED   (input)                 const int
47 | On entry, ISEED  specifies  the  seed  number to generate the
48 | matrix A. ISEED must be at least zero.
49 | .SH SEE ALSO
50 | .BR HPL_ladd \ (3),
51 | .BR HPL_lmul \ (3),
52 | .BR HPL_setran \ (3),
53 | .BR HPL_xjumpm \ (3),
54 | .BR HPL_jumpit \ (3),
55 | .BR HPL_rand \ (3).
56 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dscal.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dscal 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dscal \- x = alpha * x.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dscal(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const double\fR
12 | \fI\&ALPHA\fR,
13 | \fB\&double *\fR
14 | \fI\&X\fR,
15 | \fB\&const int\fR
16 | \fI\&INCX\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_dscal\fR
20 | scales the vector x by alpha.
21 | .SH ARGUMENTS
22 | .TP 8
23 | N       (local input)           const int
24 | On entry, N specifies the length of the vector x. N  must  be
25 | at least zero.
26 | .TP 8
27 | ALPHA   (local input)           const double
28 | On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
29 | supplied as zero, then the entries of the incremented array X
30 | need not be set on input.
31 | .TP 8
32 | X       (local input/output)    double *
33 | On entry,  X  is an incremented array of dimension  at  least
34 | ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
35 | On exit, the entries of the incremented array  X  are  scaled
36 | by the scalar alpha.
37 | .TP 8
38 | INCX    (local input)           const int
39 | On entry, INCX specifies the increment for the elements of X.
40 | INCX must not be zero.
41 | .SH EXAMPLE
42 | \fI\&#include "hpl.h"\fR
43 |  
44 | int main(int argc, char *argv[])
45 | .br
46 | {
47 | .br
48 |    double x[3];
49 | .br
50 |    x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
51 | .br
52 |    HPL_dscal( 3, 2.0, x, 1 );
53 | .br
54 |    printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
55 | .br
56 |    exit(0); return(0);
57 | .br
58 | }
59 | .SH SEE ALSO
60 | .BR HPL_daxpy \ (3),
61 | .BR HPL_dcopy \ (3),
62 | .BR HPL_dswap \ (3).
63 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_dswap.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_dswap 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_dswap \- y <-> x.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_dswap(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&double *\fR
12 | \fI\&X\fR,
13 | \fB\&const int\fR
14 | \fI\&INCX\fR,
15 | \fB\&double *\fR
16 | \fI\&Y\fR,
17 | \fB\&const int\fR
18 | \fI\&INCY\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_dswap\fR
22 | swaps the vectors x and y.
23 | .SH ARGUMENTS
24 | .TP 8
25 | N       (local input)           const int
26 | On entry, N specifies the length of the vectors  x  and  y. N
27 | must be at least zero.
28 | .TP 8
29 | X       (local input/output)    double *
30 | On entry,  X  is an incremented array of dimension  at  least
31 | ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
32 | On exit, the entries of the incremented array  X  are updated
33 | with the entries of the incremented array Y.
34 | .TP 8
35 | INCX    (local input)           const int
36 | On entry, INCX specifies the increment for the elements of X.
37 | INCX must not be zero.
38 | .TP 8
39 | Y       (local input/output)    double *
40 | On entry,  Y  is an incremented array of dimension  at  least
41 | ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
42 | On exit, the entries of the incremented array  Y  are updated
43 | with the entries of the incremented array X.
44 | .TP 8
45 | INCY    (local input)           const int
46 | On entry, INCY specifies the increment for the elements of Y.
47 | INCY must not be zero.
48 | .SH EXAMPLE
49 | \fI\&#include "hpl.h"\fR
50 |  
51 | int main(int argc, char *argv[])
52 | .br
53 | {
54 | .br
55 |    double x[3], y[3];
56 | .br
57 |    x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
58 | .br
59 |    y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
60 | .br
61 |    HPL_dswap( 3, x, 1, y, 1 );
62 | .br
63 |    printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]);
64 | .br
65 |    printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]);
66 | .br
67 |    exit(0); return(0);
68 | .br
69 | }
70 | .SH SEE ALSO
71 | .BR HPL_daxpy \ (3),
72 | .BR HPL_dcopy \ (3),
73 | .BR HPL_dscal \ (3).
74 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_fprintf.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_fprintf 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_fprintf \- fprintf + fflush wrapper.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_fprintf(\fR
 9 | \fB\&FILE *\fR
10 | \fI\&STREAM\fR,
11 | \fB\&const char *\fR
12 | \fI\&FORM\fR,
13 | \fB\&...\fR
14 | \fB\&);\fR
15 | .SH DESCRIPTION
16 | \fB\&HPL_fprintf\fR
17 | is a wrapper around fprintf flushing the output stream.
18 | .SH ARGUMENTS
19 | .TP 8
20 | STREAM  (local input)           FILE *
21 | On entry, STREAM specifies the output stream.
22 | .TP 8
23 | FORM    (local input)           const char *
24 | On entry, FORM specifies the format, i.e., how the subsequent
25 | arguments are converted for output.
26 | .TP 8
27 |         (local input)           ...
28 | On entry,  ...  is the list of arguments to be printed within
29 | the format string.
30 | .SH EXAMPLE
31 | \fI\&#include "hpl.h"\fR
32 |  
33 | int main(int argc, char *argv[])
34 | .br
35 | {
36 | .br
37 |    HPL_fprintf( stdout, "Hello World.\en" );
38 | .br
39 |    exit(0); return(0);
40 | .br
41 | }
42 | .SH SEE ALSO
43 | .BR HPL_abort \ (3),
44 | .BR HPL_warn \ (3).
45 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_grid_exit.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_grid_exit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_grid_exit \- Exit process grid.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_grid_exit(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_grid_exit\fR
14 | marks  the process  grid object for  deallocation.  The
15 | returned  error  code  MPI_SUCCESS  indicates  successful completion.
16 | Other error codes are (MPI) implementation dependent.
17 | .SH ARGUMENTS
18 | .TP 8
19 | GRID    (local input/output)    HPL_T_grid *
20 | On entry,  GRID  points  to the data structure containing the
21 | process grid to be released.
22 | .SH SEE ALSO
23 | .BR HPL_pnum \ (3),
24 | .BR HPL_grid_init \ (3),
25 | .BR HPL_grid_info \ (3).
26 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_grid_info.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_grid_info 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_grid_info \- Retrieve grid information.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_grid_info(\fR
 9 | \fB\&const HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&int *\fR
12 | \fI\&NPROW\fR,
13 | \fB\&int *\fR
14 | \fI\&NPCOL\fR,
15 | \fB\&int *\fR
16 | \fI\&MYROW\fR,
17 | \fB\&int *\fR
18 | \fI\&MYCOL\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_grid_info\fR
22 | returns  the grid shape and the coordinates in the grid
23 | of the calling process.  Successful  completion  is  indicated by the
24 | returned error code  MPI_SUCCESS. Other error codes depend on the MPI
25 | implementation.
26 | .SH ARGUMENTS
27 | .TP 8
28 | GRID    (local input)           const HPL_T_grid *
29 | On entry,  GRID  points  to the data structure containing the
30 | process grid information.
31 | .TP 8
32 | NPROW   (global output)         int *
33 | On exit,   NPROW  specifies the number of process rows in the
34 | grid. NPROW is at least one.
35 | .TP 8
36 | NPCOL   (global output)         int *
37 | On exit,   NPCOL  specifies  the number of process columns in
38 | the grid. NPCOL is at least one.
39 | .TP 8
40 | MYROW   (global output)         int *
41 | On exit,  MYROW  specifies my  row process  coordinate in the
42 | grid. MYROW is greater than or equal  to zero  and  less than
43 | NPROW.
44 | .TP 8
45 | MYCOL   (global output)         int *
46 | On exit,  MYCOL specifies my column process coordinate in the
47 | grid. MYCOL is greater than or equal  to zero  and  less than
48 | NPCOL.
49 | .SH SEE ALSO
50 | .BR HPL_pnum \ (3),
51 | .BR HPL_grid_init \ (3),
52 | .BR HPL_grid_exit \ (3).
53 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_grid_init.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_grid_init 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_grid_init \- Create a process grid.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_grid_init(\fR
 9 | \fB\&MPI_Comm\fR
10 | \fI\&COMM\fR,
11 | \fB\&const HPL_T_ORDER\fR
12 | \fI\&ORDER\fR,
13 | \fB\&const int\fR
14 | \fI\&NPROW\fR,
15 | \fB\&const int\fR
16 | \fI\&NPCOL\fR,
17 | \fB\&HPL_T_grid *\fR
18 | \fI\&GRID\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_grid_init\fR
22 | creates a NPROW x NPCOL  process  grid using column- or
23 | row-major ordering from an initial collection of processes identified
24 | by an  MPI  communicator.  Successful  completion is indicated by the
25 | returned error code MPI_SUCCESS.  Other error codes depend on the MPI
26 | implementation. The coordinates of processes that are not part of the
27 | grid are set to values outside of [0..NPROW) x [0..NPCOL).
28 | .SH ARGUMENTS
29 | .TP 8
30 | COMM    (global/local input)    MPI_Comm
31 | On entry,  COMM  is  the  MPI  communicator  identifying  the
32 | initial  collection  of  processes out of which  the  grid is
33 | formed.
34 | .TP 8
35 | ORDER   (global input)          const HPL_T_ORDER
36 | On entry, ORDER specifies how the processes should be ordered
37 | in the grid as follows:
38 |    ORDER = HPL_ROW_MAJOR    row-major    ordering;
39 |    ORDER = HPL_COLUMN_MAJOR column-major ordering;
40 | .TP 8
41 | NPROW   (global input)          const int
42 | On entry,  NPROW  specifies the number of process rows in the
43 | grid to be created. NPROW must be at least one.
44 | .TP 8
45 | NPCOL   (global input)          const int
46 | On entry,  NPCOL  specifies  the number of process columns in
47 | the grid to be created. NPCOL must be at least one.
48 | .TP 8
49 | GRID    (local input/output)    HPL_T_grid *
50 | On entry,  GRID  points  to the data structure containing the
51 | process grid information to be initialized.
52 | .SH SEE ALSO
53 | .BR HPL_pnum \ (3),
54 | .BR HPL_grid_info \ (3),
55 | .BR HPL_grid_exit \ (3).
56 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_idamax.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_idamax 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|).
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_idamax(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const double *\fR
12 | \fI\&X\fR,
13 | \fB\&const int\fR
14 | \fI\&INCX\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_idamax\fR
18 | returns  the index in an n-vector  x  of the first element
19 | having maximum absolute value.
20 | .SH ARGUMENTS
21 | .TP 8
22 | N       (local input)           const int
23 | On entry, N specifies the length of the vector x. N  must  be
24 | at least zero.
25 | .TP 8
26 | X       (local input)           const double *
27 | On entry,  X  is an incremented array of dimension  at  least
28 | ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
29 | .TP 8
30 | INCX    (local input)           const int
31 | On entry, INCX specifies the increment for the elements of X.
32 | INCX must not be zero.
33 | .SH EXAMPLE
34 | \fI\&#include "hpl.h"\fR
35 |  
36 | int main(int argc, char *argv[])
37 | .br
38 | {
39 | .br
40 |    double x[3];
41 | .br
42 |    int    imax;
43 | .br
44 |    x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
45 | .br
46 |    imax = HPL_idamax( 3, x, 1 );
47 | .br
48 |    printf("imax=%d\en", imax);
49 | .br
50 |    exit(0);
51 | .br
52 |    return(0);
53 | .br
54 | }
55 | .SH SEE ALSO
56 | .BR HPL_daxpy \ (3),
57 | .BR HPL_dcopy \ (3),
58 | .BR HPL_dscal \ (3),
59 | .BR HPL_dswap \ (3).
60 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_indxg2l.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_indxg2l 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_indxg2l \- Map a global index into a local one.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_indxg2l(\fR
 9 | \fB\&const int\fR
10 | \fI\&IG\fR,
11 | \fB\&const int\fR
12 | \fI\&INB\fR,
13 | \fB\&const int\fR
14 | \fI\&NB\fR,
15 | \fB\&const int\fR
16 | \fI\&SRCPROC\fR,
17 | \fB\&const int\fR
18 | \fI\&NPROCS\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_indxg2l\fR
22 | computes  the local index of a matrix entry pointed to by
23 | the  global index IG.  This  local  returned index is the same in all
24 | processes.
25 | .SH ARGUMENTS
26 | .TP 8
27 | IG      (input)                 const int
28 | On entry, IG specifies the global index of the matrix  entry.
29 | IG must be at least zero.
30 | .TP 8
31 | INB     (input)                 const int
32 | On entry,  INB  specifies  the size of the first block of the
33 | global matrix. INB must be at least one.
34 | .TP 8
35 | NB      (input)                 const int
36 | On entry,  NB specifies the blocking factor used to partition
37 | and distribute the matrix. NB must be larger than one.
38 | .TP 8
39 | SRCPROC (input)                 const int
40 | On entry, if SRCPROC = -1, the data  is not  distributed  but
41 | replicated,  in  which  case  this  routine returns IG in all
42 | processes. Otherwise, the value of SRCPROC is ignored.
43 | .TP 8
44 | NPROCS  (input)                 const int
45 | On entry,  NPROCS  specifies the total number of process rows
46 | or columns over which the matrix is distributed.  NPROCS must
47 | be at least one.
48 | .SH SEE ALSO
49 | .BR HPL_indxg2lp \ (3),
50 | .BR HPL_indxg2p \ (3),
51 | .BR HPL_indxl2g \ (3),
52 | .BR HPL_numroc \ (3),
53 | .BR HPL_numrocI \ (3).
54 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_indxg2p.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_indxg2p 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_indxg2p \- Map a global index into a process coordinate.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_indxg2p(\fR
 9 | \fB\&const int\fR
10 | \fI\&IG\fR,
11 | \fB\&const int\fR
12 | \fI\&INB\fR,
13 | \fB\&const int\fR
14 | \fI\&NB\fR,
15 | \fB\&const int\fR
16 | \fI\&SRCPROC\fR,
17 | \fB\&const int\fR
18 | \fI\&NPROCS\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_indxg2p\fR
22 | computes the process coordinate  which posseses the entry
23 | of a matrix specified by a global index IG.
24 | .SH ARGUMENTS
25 | .TP 8
26 | IG      (input)                 const int
27 | On entry, IG specifies the global index of the matrix  entry.
28 | IG must be at least zero.
29 | .TP 8
30 | INB     (input)                 const int
31 | On entry,  INB  specifies  the size of the first block of the
32 | global matrix. INB must be at least one.
33 | .TP 8
34 | NB      (input)                 const int
35 | On entry,  NB specifies the blocking factor used to partition
36 | and distribute the matrix A. NB must be larger than one.
37 | .TP 8
38 | SRCPROC (input)                 const int
39 | On entry,  SRCPROC  specifies  the coordinate of the  process
40 | that possesses the first row or column of the matrix. SRCPROC
41 | must be at least zero and strictly less than NPROCS.
42 | .TP 8
43 | NPROCS  (input)                 const int
44 | On entry,  NPROCS  specifies the total number of process rows
45 | or columns over which the matrix is distributed.  NPROCS must
46 | be at least one.
47 | .SH SEE ALSO
48 | .BR HPL_indxg2l \ (3),
49 | .BR HPL_indxg2p \ (3),
50 | .BR HPL_indxl2g \ (3),
51 | .BR HPL_numroc \ (3),
52 | .BR HPL_numrocI \ (3).
53 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_indxl2g.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_indxl2g 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_indxl2g \- Map a index-process pair into a global index.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_indxl2g(\fR
 9 | \fB\&const int\fR
10 | \fI\&IL\fR,
11 | \fB\&const int\fR
12 | \fI\&INB\fR,
13 | \fB\&const int\fR
14 | \fI\&NB\fR,
15 | \fB\&const int\fR
16 | \fI\&PROC\fR,
17 | \fB\&const int\fR
18 | \fI\&SRCPROC\fR,
19 | \fB\&const int\fR
20 | \fI\&NPROCS\fR
21 | \fB\&);\fR
22 | .SH DESCRIPTION
23 | \fB\&HPL_indxl2g\fR
24 | computes the global index of a matrix  entry  pointed to
25 | by the local index IL of the process indicated by PROC.
26 | .SH ARGUMENTS
27 | .TP 8
28 | IL      (input)                 const int
29 | On entry, IL specifies the local  index of the matrix  entry.
30 | IL must be at least zero.
31 | .TP 8
32 | INB     (input)                 const int
33 | On entry,  INB  specifies  the size of the first block of the
34 | global matrix. INB must be at least one.
35 | .TP 8
36 | NB      (input)                 const int
37 | On entry,  NB specifies the blocking factor used to partition
38 | and distribute the matrix A. NB must be larger than one.
39 | .TP 8
40 | PROC    (input)                 const int
41 | On entry, PROC  specifies the coordinate of the process whose
42 | local array row or column is to be determined. PROC  must  be
43 | at least zero and strictly less than NPROCS.
44 | .TP 8
45 | SRCPROC (input)                 const int
46 | On entry,  SRCPROC  specifies  the coordinate of the  process
47 | that possesses the first row or column of the matrix. SRCPROC
48 | must be at least zero and strictly less than NPROCS.
49 | .TP 8
50 | NPROCS  (input)                 const int
51 | On entry,  NPROCS  specifies the total number of process rows
52 | or columns over which the matrix is distributed.  NPROCS must
53 | be at least one.
54 | .SH SEE ALSO
55 | .BR HPL_indxg2l \ (3),
56 | .BR HPL_indxg2lp \ (3),
57 | .BR HPL_indxg2p \ (3),
58 | .BR HPL_numroc \ (3),
59 | .BR HPL_numrocI \ (3).
60 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_jumpit.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_jumpit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_jumpit \- jump into the random sequence.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_jumpit(\fR
 9 | \fB\&int *\fR
10 | \fI\&MULT\fR,
11 | \fB\&int *\fR
12 | \fI\&IADD\fR,
13 | \fB\&int *\fR
14 | \fI\&IRANN\fR,
15 | \fB\&int *\fR
16 | \fI\&IRANM\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_jumpit\fR
20 | jumps in the random sequence from the number  X(n) encoded
21 | in IRANN to the number  X(m)  encoded in  IRANM using the constants A
22 | and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
23 | and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
24 | order to initialize them.
25 | .SH ARGUMENTS
26 | .TP 8
27 | MULT    (local input)           int *
28 | On entry, MULT is an array of dimension 2, that contains the
29 | 16-lower and 15-higher bits of the constant A.
30 | .TP 8
31 | IADD    (local input)           int *
32 | On entry, IADD is an array of dimension 2, that contains the
33 | 16-lower and 15-higher bits of the constant C.
34 | .TP 8
35 | IRANN   (local input)           int *
36 | On entry,  IRANN  is an array of dimension 2,  that contains 
37 | the 16-lower and 15-higher bits of the encoding of X(n).
38 | .TP 8
39 | IRANM   (local output)          int *
40 | On entry,  IRANM  is an array of dimension 2.  On exit, this
41 | array contains respectively the 16-lower and  15-higher bits
42 | of the encoding of X(m).
43 | .SH SEE ALSO
44 | .BR HPL_ladd \ (3),
45 | .BR HPL_lmul \ (3),
46 | .BR HPL_setran \ (3),
47 | .BR HPL_xjumpm \ (3),
48 | .BR HPL_rand \ (3).
49 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_ladd.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_ladd 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_ladd \- Adds two long positive integers.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_ladd(\fR
 9 | \fB\&int *\fR
10 | \fI\&J\fR,
11 | \fB\&int *\fR
12 | \fI\&K\fR,
13 | \fB\&int *\fR
14 | \fI\&I\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_ladd\fR
18 | adds  without carry two long positive integers  K and J  and
19 | puts the result into I. The long integers  I, J, K are encoded on 64
20 | bits using an array of 2 integers.  The 32-lower bits  are stored in
21 | the  first  entry  of each array,  the 32-higher bits  in the second
22 | entry.
23 | .SH ARGUMENTS
24 | .TP 8
25 | J       (local input)           int *
26 | On entry, J is an integer array of dimension 2 containing the
27 | encoded long integer J.
28 | .TP 8
29 | K       (local input)           int *
30 | On entry, K is an integer array of dimension 2 containing the
31 | encoded long integer K.
32 | .TP 8
33 | I       (local output)          int *
34 | On entry, I is an integer array of dimension 2. On exit, this
35 | array contains the encoded long integer result.
36 | .SH SEE ALSO
37 | .BR HPL_lmul \ (3),
38 | .BR HPL_setran \ (3),
39 | .BR HPL_xjumpm \ (3),
40 | .BR HPL_jumpit \ (3),
41 | .BR HPL_rand \ (3).
42 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_lmul.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_lmul 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_lmul \- multiplies 2 long positive integers.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_lmul(\fR
 9 | \fB\&int *\fR
10 | \fI\&K\fR,
11 | \fB\&int *\fR
12 | \fI\&J\fR,
13 | \fB\&int *\fR
14 | \fI\&I\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_lmul\fR
18 | multiplies  without carry two long positive integers K and J
19 | and puts the result into I. The long integers  I, J, K are encoded on
20 | 64 bits using an array of 2 integers. The 32-lower bits are stored in
21 | the first entry of each array, the 32-higher bits in the second entry
22 | of each array. For efficiency purposes, the  intrisic modulo function
23 | is inlined.
24 | .SH ARGUMENTS
25 | .TP 8
26 | K       (local input)           int *
27 | On entry, K is an integer array of dimension 2 containing the
28 | encoded long integer K.
29 | .TP 8
30 | J       (local input)           int *
31 | On entry, J is an integer array of dimension 2 containing the
32 | encoded long integer J.
33 | .TP 8
34 | I       (local output)          int *
35 | On entry, I is an integer array of dimension 2. On exit, this
36 | array contains the encoded long integer result.
37 | .SH SEE ALSO
38 | .BR HPL_ladd \ (3),
39 | .BR HPL_setran \ (3),
40 | .BR HPL_xjumpm \ (3),
41 | .BR HPL_jumpit \ (3),
42 | .BR HPL_rand \ (3).
43 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_max.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_max 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_max \- Combine (max) two buffers.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_max(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const void *\fR
12 | \fI\&IN\fR,
13 | \fB\&void *\fR
14 | \fI\&INOUT\fR,
15 | \fB\&const HPL_T_TYPE\fR
16 | \fI\&DTYPE\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_max\fR
20 | combines (max) two buffers.
21 | .SH ARGUMENTS
22 | .TP 8
23 | N       (input)                 const int
24 | On entry, N  specifies  the  length  of  the  buffers  to  be
25 | combined. N must be at least zero.
26 | .TP 8
27 | IN      (input)                 const void *
28 | On entry, IN points to the input-only buffer to be combined.
29 | .TP 8
30 | INOUT   (input/output)          void *
31 | On entry, INOUT  points  to  the  input-output  buffer  to be
32 | combined.  On exit,  the  entries of this array contains  the
33 | combined results.
34 | .TP 8
35 | DTYPE   (input)                 const HPL_T_TYPE
36 | On entry,  DTYPE  specifies the type of the buffers operands.
37 | .SH SEE ALSO
38 | .BR HPL_broadcast \ (3),
39 | .BR HPL_reduce \ (3),
40 | .BR HPL_all_reduce \ (3),
41 | .BR HPL_barrier \ (3),
42 | .BR HPL_min \ (3),
43 | .BR HPL_sum \ (3).
44 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_min.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_min 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_min \- Combine (min) two buffers.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_min(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const void *\fR
12 | \fI\&IN\fR,
13 | \fB\&void *\fR
14 | \fI\&INOUT\fR,
15 | \fB\&const HPL_T_TYPE\fR
16 | \fI\&DTYPE\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_min\fR
20 | combines (min) two buffers.
21 | .SH ARGUMENTS
22 | .TP 8
23 | N       (input)                 const int
24 | On entry, N  specifies  the  length  of  the  buffers  to  be
25 | combined. N must be at least zero.
26 | .TP 8
27 | IN      (input)                 const void *
28 | On entry, IN points to the input-only buffer to be combined.
29 | .TP 8
30 | INOUT   (input/output)          void *
31 | On entry, INOUT  points  to  the  input-output  buffer  to be
32 | combined.  On exit,  the  entries of this array contains  the
33 | combined results.
34 | .TP 8
35 | DTYPE   (input)                 const HPL_T_TYPE
36 | On entry,  DTYPE  specifies the type of the buffers operands.
37 | .SH SEE ALSO
38 | .BR HPL_broadcast \ (3),
39 | .BR HPL_reduce \ (3),
40 | .BR HPL_all_reduce \ (3),
41 | .BR HPL_barrier \ (3),
42 | .BR HPL_max \ (3),
43 | .BR HPL_sum \ (3).
44 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_numroc.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_numroc 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_numroc \- Compute the local number of row/columns.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_numroc(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const int\fR
12 | \fI\&INB\fR,
13 | \fB\&const int\fR
14 | \fI\&NB\fR,
15 | \fB\&const int\fR
16 | \fI\&PROC\fR,
17 | \fB\&const int\fR
18 | \fI\&SRCPROC\fR,
19 | \fB\&const int\fR
20 | \fI\&NPROCS\fR
21 | \fB\&);\fR
22 | .SH DESCRIPTION
23 | \fB\&HPL_numroc\fR
24 | returns  the  local number of matrix rows/columns process
25 | PROC  will  get  if  we give out  N rows/columns starting from global
26 | index 0.
27 | .SH ARGUMENTS
28 | .TP 8
29 | N       (input)                 const int
30 | On entry, N  specifies the number of rows/columns being dealt
31 | out. N must be at least zero.
32 | .TP 8
33 | INB     (input)                 const int
34 | On entry,  INB  specifies  the size of the first block of the
35 | global matrix. INB must be at least one.
36 | .TP 8
37 | NB      (input)                 const int
38 | On entry,  NB specifies the blocking factor used to partition
39 | and distribute the matrix A. NB must be larger than one.
40 | .TP 8
41 | PROC    (input)                 const int
42 | On entry, PROC specifies  the coordinate of the process whose
43 | local portion is determined.  PROC must be at least zero  and
44 | strictly less than NPROCS.
45 | .TP 8
46 | SRCPROC (input)                 const int
47 | On entry,  SRCPROC  specifies  the coordinate of the  process
48 | that possesses the first row or column of the matrix. SRCPROC
49 | must be at least zero and strictly less than NPROCS.
50 | .TP 8
51 | NPROCS  (input)                 const int
52 | On entry,  NPROCS  specifies the total number of process rows
53 | or columns over which the matrix is distributed.  NPROCS must
54 | be at least one.
55 | .SH SEE ALSO
56 | .BR HPL_indxg2l \ (3),
57 | .BR HPL_indxg2lp \ (3),
58 | .BR HPL_indxg2p \ (3),
59 | .BR HPL_indxl2g \ (3),
60 | .BR HPL_numrocI \ (3).
61 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pabort.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pabort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pabort \- halts execution.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pabort(\fR
 9 | \fB\&int\fR
10 | \fI\&LINE\fR,
11 | \fB\&const char *\fR
12 | \fI\&SRNAME\fR,
13 | \fB\&const char *\fR
14 | \fI\&FORM\fR,
15 | \fB\&...\fR
16 | \fB\&);\fR
17 | .SH DESCRIPTION
18 | \fB\&HPL_pabort\fR
19 | displays an error message on stderr and halts execution.
20 | .SH ARGUMENTS
21 | .TP 8
22 | LINE    (local input)           int
23 | On entry,  LINE  specifies the line  number in the file where
24 | the  error  has  occured.  When  LINE  is not a positive line
25 | number, it is ignored.
26 | .TP 8
27 | SRNAME  (local input)           const char *
28 | On entry, SRNAME  should  be the name of the routine  calling
29 | this error handler.
30 | .TP 8
31 | FORM    (local input)           const char *
32 | On entry, FORM specifies the format, i.e., how the subsequent
33 | arguments are converted for output.
34 | .TP 8
35 |         (local input)           ...
36 | On entry,  ...  is the list of arguments to be printed within
37 | the format string.
38 | .SH SEE ALSO
39 | .BR HPL_fprintf \ (3),
40 | .BR HPL_pwarn \ (3).
41 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_packL.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_packL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_packL \- Form the MPI structure for the row ring broadcasts.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_packL(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR,
11 | \fB\&const int\fR
12 | \fI\&INDEX\fR,
13 | \fB\&const int\fR
14 | \fI\&LEN\fR,
15 | \fB\&const int\fR
16 | \fI\&IBUF\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_packL\fR
20 | forms  the MPI data type for the panel to be broadcast.
21 | Successful  completion  is  indicated  by  the  returned  error  code
22 | MPI_SUCCESS.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PANEL   (input/output)          HPL_T_panel *
26 | On entry,  PANEL  points to the  current panel data structure
27 | being broadcast.
28 | .TP 8
29 | INDEX   (input)                 const int
30 | On entry,  INDEX  points  to  the  first entry of the  packed
31 | buffer being broadcast.
32 | .TP 8
33 | LEN     (input)                 const int
34 | On entry, LEN is the length of the packed buffer.
35 | .TP 8
36 | IBUF    (input)                 const int
37 | On entry, IBUF  specifies the panel buffer/count/type entries
38 | that should be initialized.
39 | .SH SEE ALSO
40 | .BR HPL_binit \ (3),
41 | .BR HPL_bcast \ (3),
42 | .BR HPL_bwait \ (3).
43 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pddriver.3:
--------------------------------------------------------------------------------
 1 | .TH main 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | main \- HPL main timing program.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&main();\fR
 9 | .SH DESCRIPTION
10 | \fB\&main\fR
11 | is the main driver program for testing the HPL routines.
12 | This  program is  driven  by  a short data file named  "HPL.dat".
13 | .SH SEE ALSO
14 | .BR HPL_pdinfo \ (3),
15 | .BR HPL_pdtest \ (3).
16 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdgesv.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdgesv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdgesv \- Solve A x = b.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdgesv(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&HPL_T_palg *\fR
12 | \fI\&ALGO\fR,
13 | \fB\&HPL_T_pmat *\fR
14 | \fI\&A\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_pdgesv\fR
18 | factors a N+1-by-N matrix using LU factorization with row
19 | partial pivoting.  The main algorithm  is the "right looking" variant
20 | with  or  without look-ahead.  The  lower  triangular  factor is left
21 | unpivoted and the pivots are not returned. The right hand side is the
22 | N+1 column of the coefficient matrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | GRID    (local input)           HPL_T_grid *
26 | On entry,  GRID  points  to the data structure containing the
27 | process grid information.
28 | .TP 8
29 | ALGO    (global input)          HPL_T_palg *
30 | On entry,  ALGO  points to  the data structure containing the
31 | algorithmic parameters.
32 | .TP 8
33 | A       (local input/output)    HPL_T_pmat *
34 | On entry, A points to the data structure containing the local
35 | array information.
36 | .SH SEE ALSO
37 | .BR HPL_pdgesv0 \ (3),
38 | .BR HPL_pdgesvK1 \ (3),
39 | .BR HPL_pdgesvK2 \ (3),
40 | .BR HPL_pdtrsv \ (3).
41 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdgesv0.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdgesv0 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdgesv0 \- Factor an N x N+1 matrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdgesv0(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&HPL_T_palg *\fR
12 | \fI\&ALGO\fR,
13 | \fB\&HPL_T_pmat *\fR
14 | \fI\&A\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_pdgesv0\fR
18 | factors a N+1-by-N matrix using LU factorization with row
19 | partial pivoting.  The main algorithm  is the "right looking" variant
20 | without look-ahead. The lower triangular factor is left unpivoted and
21 | the pivots are not returned. The right hand side is the N+1 column of
22 | the coefficient matrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | GRID    (local input)           HPL_T_grid *
26 | On entry,  GRID  points  to the data structure containing the
27 | process grid information.
28 | .TP 8
29 | ALGO    (global input)          HPL_T_palg *
30 | On entry,  ALGO  points to  the data structure containing the
31 | algorithmic parameters.
32 | .TP 8
33 | A       (local input/output)    HPL_T_pmat *
34 | On entry, A points to the data structure containing the local
35 | array information.
36 | .SH SEE ALSO
37 | .BR HPL_pdgesv \ (3),
38 | .BR HPL_pdgesvK1 \ (3),
39 | .BR HPL_pdgesvK2 \ (3),
40 | .BR HPL_pdfact \ (3),
41 | .BR HPL_binit \ (3),
42 | .BR HPL_bcast \ (3),
43 | .BR HPL_bwait \ (3),
44 | .BR HPL_pdupdateNN \ (3),
45 | .BR HPL_pdupdateNT \ (3),
46 | .BR HPL_pdupdateTN \ (3),
47 | .BR HPL_pdupdateTT \ (3).
48 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdgesvK1.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdgesvK1 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdgesvK1 \- Factor an N x N+1 matrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdgesvK1(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&HPL_T_palg *\fR
12 | \fI\&ALGO\fR,
13 | \fB\&HPL_T_pmat *\fR
14 | \fI\&A\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_pdgesvK1\fR
18 | factors a N+1-by-N matrix using LU factorization with row
19 | partial pivoting.  The main algorithm  is the "right looking" variant
20 | with look-ahead.  The  lower  triangular factor is left unpivoted and
21 | the pivots are not returned. The right hand side is the N+1 column of
22 | the coefficient matrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | GRID    (local input)           HPL_T_grid *
26 | On entry,  GRID  points  to the data structure containing the
27 | process grid information.
28 | .TP 8
29 | ALGO    (global input)          HPL_T_palg *
30 | On entry,  ALGO  points to  the data structure containing the
31 | algorithmic parameters.
32 | .TP 8
33 | A       (local input/output)    HPL_T_pmat *
34 | On entry, A points to the data structure containing the local
35 | array information.
36 | .SH SEE ALSO
37 | .BR HPL_pdgesv \ (3),
38 | .BR HPL_pdgesvK2 \ (3),
39 | .BR HPL_pdfact \ (3),
40 | .BR HPL_binit \ (3),
41 | .BR HPL_bcast \ (3),
42 | .BR HPL_bwait \ (3),
43 | .BR HPL_pdupdateNN \ (3),
44 | .BR HPL_pdupdateNT \ (3),
45 | .BR HPL_pdupdateTN \ (3),
46 | .BR HPL_pdupdateTT \ (3).
47 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdgesvK2.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdgesvK2 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdgesvK2 \- Factor an N x N+1 matrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdgesvK2(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&HPL_T_palg *\fR
12 | \fI\&ALGO\fR,
13 | \fB\&HPL_T_pmat *\fR
14 | \fI\&A\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_pdgesvK2\fR
18 | factors a N+1-by-N matrix using LU factorization with row
19 | partial pivoting.  The main algorithm  is the "right looking" variant
20 | with look-ahead.  The  lower  triangular factor is left unpivoted and
21 | the pivots are not returned. The right hand side is the N+1 column of
22 | the coefficient matrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | GRID    (local input)           HPL_T_grid *
26 | On entry,  GRID  points  to the data structure containing the
27 | process grid information.
28 | .TP 8
29 | ALGO    (global input)          HPL_T_palg *
30 | On entry,  ALGO  points to  the data structure containing the
31 | algorithmic parameters.
32 | .TP 8
33 | A       (local input/output)    HPL_T_pmat *
34 | On entry, A points to the data structure containing the local
35 | array information.
36 | .SH SEE ALSO
37 | .BR HPL_pdgesv \ (3),
38 | .BR HPL_pdgesv0 \ (3),
39 | .BR HPL_pdgesvK1 \ (3),
40 | .BR HPL_pdfact \ (3),
41 | .BR HPL_binit \ (3),
42 | .BR HPL_bcast \ (3),
43 | .BR HPL_bwait \ (3),
44 | .BR HPL_pdupdateNN \ (3),
45 | .BR HPL_pdupdateNT \ (3),
46 | .BR HPL_pdupdateTN \ (3),
47 | .BR HPL_pdupdateTT \ (3).
48 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdpanel_disp.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdpanel_disp 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdpanel_disp \- Deallocate a panel data structure.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_pdpanel_disp(\fR
 9 | \fB\&HPL_T_panel * *\fR
10 | \fI\&PANEL\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_pdpanel_disp\fR
14 | deallocates  the  panel  structure  and  resources  and
15 | stores the error code returned by the panel factorization.
16 | .SH ARGUMENTS
17 | .TP 8
18 | PANEL   (local input/output)    HPL_T_panel * *
19 | On entry,  PANEL  points  to  the  address  of the panel data
20 | structure to be deallocated.
21 | .SH SEE ALSO
22 | .BR HPL_pdpanel_new \ (3),
23 | .BR HPL_pdpanel_init \ (3),
24 | .BR HPL_pdpanel_free \ (3).
25 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdpanel_free.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdpanel_free 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdpanel_free \- Deallocate the panel ressources.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_pdpanel_free(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PANEL\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_pdpanel_free\fR
14 | deallocates  the panel resources  and  stores the error
15 | code returned by the panel factorization.
16 | .SH ARGUMENTS
17 | .TP 8
18 | PANEL   (local input/output)    HPL_T_panel *
19 | On entry,  PANEL  points  to  the  panel data  structure from
20 | which the resources should be deallocated.
21 | .SH SEE ALSO
22 | .BR HPL_pdpanel_new \ (3),
23 | .BR HPL_pdpanel_init \ (3),
24 | .BR HPL_pdpanel_disp \ (3).
25 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdtrsv.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdtrsv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdtrsv \- Solve triu( A ) x = b.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdtrsv(\fR
 9 | \fB\&HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&HPL_T_pmat *\fR
12 | \fI\&AMAT\fR
13 | \fB\&);\fR
14 | .SH DESCRIPTION
15 | \fB\&HPL_pdtrsv\fR
16 | solves an upper triangular system of linear equations.
17 |  
18 | The rhs is the last column of the N by N+1 matrix A. The solve starts
19 | in the process  column owning the  Nth  column of A, so the rhs b may
20 | need to be moved one process column to the left at the beginning. The
21 | routine therefore needs  a column  vector in every process column but
22 | the one owning  b. The result is  replicated in all process rows, and
23 | returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes.
24 |  
25 | The algorithm uses decreasing one-ring broadcast in process rows  and
26 | columns  implemented  in terms of  synchronous communication point to
27 | point primitives.  The  lookahead of depth 1 is used to minimize  the
28 | critical path. This entire operation is essentially ``latency'' bound
29 | and an estimate of its running time is given by:
30 |  
31 |    (move rhs) lat + N / ( P bdwth ) +            
32 |    (solve)    ((N / NB)-1) 2 (lat + NB / bdwth) +
33 |               gam2 N^2 / ( P Q ),                
34 |  
35 | where  gam2   is an estimate of the   Level 2 BLAS rate of execution.
36 | There are  N / NB  diagonal blocks. One must exchange  2  messages of
37 | length NB to compute the next  NB  entries of the vector solution, as
38 | well as performing a total of N^2 floating point operations.
39 | .SH ARGUMENTS
40 | .TP 8
41 | GRID    (local input)           HPL_T_grid *
42 | On entry,  GRID  points  to the data structure containing the
43 | process grid information.
44 | .TP 8
45 | AMAT    (local input/output)    HPL_T_pmat *
46 | On entry,  AMAT  points  to the data structure containing the
47 | local array information.
48 | .SH SEE ALSO
49 | .BR HPL_pdgesv \ (3).
50 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdupdateNN.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdupdateNN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdupdateNN(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PBCST\fR,
11 | \fB\&int *\fR
12 | \fI\&IFLAG\fR,
13 | \fB\&HPL_T_panel *\fR
14 | \fI\&PANEL\fR,
15 | \fB\&const int\fR
16 | \fI\&NN\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_pdupdateNN\fR
20 | broadcast - forward the panel PBCST and simultaneously
21 | applies the row interchanges and updates part of the trailing  (using
22 | the panel PANEL) submatrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PBCST   (local input/output)    HPL_T_panel *
26 | On entry,  PBCST  points to the data structure containing the
27 | panel (to be broadcast) information.
28 | .TP 8
29 | IFLAG   (local output)          int *
30 | On exit,  IFLAG  indicates  whether or not  the broadcast has
31 | been completed when PBCST is not NULL on entry. In that case,
32 | IFLAG is left unchanged.
33 | .TP 8
34 | PANEL   (local input/output)    HPL_T_panel *
35 | On entry,  PANEL  points to the data structure containing the
36 | panel (to be updated) information.
37 | .TP 8
38 | NN      (local input)           const int
39 | On entry, NN specifies  the  local  number  of columns of the
40 | trailing  submatrix  to be updated  starting  at the  current
41 | position. NN must be at least zero.
42 | .SH SEE ALSO
43 | .BR HPL_pdgesv \ (3),
44 | .BR HPL_pdgesv0 \ (3),
45 | .BR HPL_pdgesvK1 \ (3),
46 | .BR HPL_pdgesvK2 \ (3),
47 | .BR HPL_pdlaswp00N \ (3),
48 | .BR HPL_pdlaswp01N \ (3).
49 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdupdateNT.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdupdateNT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdupdateNT(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PBCST\fR,
11 | \fB\&int *\fR
12 | \fI\&IFLAG\fR,
13 | \fB\&HPL_T_panel *\fR
14 | \fI\&PANEL\fR,
15 | \fB\&const int\fR
16 | \fI\&NN\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_pdupdateNT\fR
20 | broadcast - forward the panel PBCST and simultaneously
21 | applies the row interchanges and updates part of the trailing  (using
22 | the panel PANEL) submatrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PBCST   (local input/output)    HPL_T_panel *
26 | On entry,  PBCST  points to the data structure containing the
27 | panel (to be broadcast) information.
28 | .TP 8
29 | IFLAG   (local output)          int *
30 | On exit,  IFLAG  indicates  whether or not  the broadcast has
31 | been completed when PBCST is not NULL on entry. In that case,
32 | IFLAG is left unchanged.
33 | .TP 8
34 | PANEL   (local input/output)    HPL_T_panel *
35 | On entry,  PANEL  points to the data structure containing the
36 | panel (to be updated) information.
37 | .TP 8
38 | NN      (local input)           const int
39 | On entry, NN specifies  the  local  number  of columns of the
40 | trailing  submatrix  to be updated  starting  at the  current
41 | position. NN must be at least zero.
42 | .SH SEE ALSO
43 | .BR HPL_pdgesv \ (3),
44 | .BR HPL_pdgesv0 \ (3),
45 | .BR HPL_pdgesvK1 \ (3),
46 | .BR HPL_pdgesvK2 \ (3),
47 | .BR HPL_pdlaswp00T \ (3),
48 | .BR HPL_pdlaswp01T \ (3).
49 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdupdateTN.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdupdateTN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdupdateTN(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PBCST\fR,
11 | \fB\&int *\fR
12 | \fI\&IFLAG\fR,
13 | \fB\&HPL_T_panel *\fR
14 | \fI\&PANEL\fR,
15 | \fB\&const int\fR
16 | \fI\&NN\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_pdupdateTN\fR
20 | broadcast - forward the panel PBCST and simultaneously
21 | applies the row interchanges and updates part of the trailing  (using
22 | the panel PANEL) submatrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PBCST   (local input/output)    HPL_T_panel *
26 | On entry,  PBCST  points to the data structure containing the
27 | panel (to be broadcast) information.
28 | .TP 8
29 | IFLAG   (local output)          int *
30 | On exit,  IFLAG  indicates  whether or not  the broadcast has
31 | been completed when PBCST is not NULL on entry. In that case,
32 | IFLAG is left unchanged.
33 | .TP 8
34 | PANEL   (local input/output)    HPL_T_panel *
35 | On entry,  PANEL  points to the data structure containing the
36 | panel (to be updated) information.
37 | .TP 8
38 | NN      (local input)           const int
39 | On entry, NN specifies  the  local  number  of columns of the
40 | trailing  submatrix  to be updated  starting  at the  current
41 | position. NN must be at least zero.
42 | .SH SEE ALSO
43 | .BR HPL_pdgesv \ (3),
44 | .BR HPL_pdgesv0 \ (3),
45 | .BR HPL_pdgesvK1 \ (3),
46 | .BR HPL_pdgesvK2 \ (3),
47 | .BR HPL_pdlaswp00N \ (3),
48 | .BR HPL_pdlaswp01N \ (3).
49 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pdupdateTT.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pdupdateTT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pdupdateTT(\fR
 9 | \fB\&HPL_T_panel *\fR
10 | \fI\&PBCST\fR,
11 | \fB\&int *\fR
12 | \fI\&IFLAG\fR,
13 | \fB\&HPL_T_panel *\fR
14 | \fI\&PANEL\fR,
15 | \fB\&const int\fR
16 | \fI\&NN\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_pdupdateTT\fR
20 | broadcast - forward the panel PBCST and simultaneously
21 | applies the row interchanges and updates part of the trailing  (using
22 | the panel PANEL) submatrix.
23 | .SH ARGUMENTS
24 | .TP 8
25 | PBCST   (local input/output)    HPL_T_panel *
26 | On entry,  PBCST  points to the data structure containing the
27 | panel (to be broadcast) information.
28 | .TP 8
29 | IFLAG   (local output)          int *
30 | On exit,  IFLAG  indicates  whether or not  the broadcast has
31 | been completed when PBCST is not NULL on entry. In that case,
32 | IFLAG is left unchanged.
33 | .TP 8
34 | PANEL   (local input/output)    HPL_T_panel *
35 | On entry,  PANEL  points to the data structure containing the
36 | panel (to be updated) information.
37 | .TP 8
38 | NN      (local input)           const int
39 | On entry, NN specifies  the  local  number  of columns of the
40 | trailing  submatrix  to be updated  starting  at the  current
41 | position. NN must be at least zero.
42 | .SH SEE ALSO
43 | .BR HPL_pdgesv \ (3),
44 | .BR HPL_pdgesv0 \ (3),
45 | .BR HPL_pdgesvK1 \ (3),
46 | .BR HPL_pdgesvK2 \ (3),
47 | .BR HPL_pdlaswp00T \ (3),
48 | .BR HPL_pdlaswp01T \ (3).
49 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_perm.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_perm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_perm \- Combine 2 index arrays - Generate the permutation.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_perm(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&int *\fR
12 | \fI\&LINDXA\fR,
13 | \fB\&int *\fR
14 | \fI\&LINDXAU\fR,
15 | \fB\&int *\fR
16 | \fI\&IWORK\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_perm\fR
20 | combines  two  index  arrays  and generate the corresponding
21 | permutation. First, this function computes the inverse of LINDXA, and
22 | then combine it with LINDXAU.  Second, in order to be able to perform
23 | the permutation in place,  LINDXAU  is overwritten by the sequence of
24 | permutation  producing  the  same result.  What we ultimately want to
25 | achieve is:  U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the
26 | call to this function,  this in place permutation can be performed by
27 | for i in [0..N) swap U[i] with U[LINDXAU[i]].
28 | .SH ARGUMENTS
29 | .TP 8
30 | N       (global input)          const int
31 | On entry,  N  specifies the length of the arrays  LINDXA  and
32 | LINDXAU. N should be at least zero.
33 | .TP 8
34 | LINDXA  (global input/output)   int *
35 | On entry,  LINDXA  is an array of dimension N  containing the
36 | source indexes. On exit,  LINDXA  contains the combined index
37 | array.
38 | .TP 8
39 | LINDXAU (global input/output)   int *
40 | On entry,  LINDXAU is an array of dimension N  containing the
41 | target indexes.  On exit,  LINDXAU  contains  the sequence of
42 | permutation,  that  should be applied  in increasing order to
43 | permute the underlying array U in place.
44 | .TP 8
45 | IWORK   (workspace)             int *
46 | On entry, IWORK is a workarray of dimension N.
47 | .SH SEE ALSO
48 | .BR HPL_plindx1 \ (3),
49 | .BR HPL_pdlaswp01N \ (3),
50 | .BR HPL_pdlaswp01T \ (3).
51 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pnum.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pnum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pnum \- Rank determination.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_pnum(\fR
 9 | \fB\&const HPL_T_grid *\fR
10 | \fI\&GRID\fR,
11 | \fB\&const int\fR
12 | \fI\&MYROW\fR,
13 | \fB\&const int\fR
14 | \fI\&MYCOL\fR
15 | \fB\&);\fR
16 | .SH DESCRIPTION
17 | \fB\&HPL_pnum\fR
18 | determines  the  rank  of a  process  as a function  of  its
19 | coordinates in the grid.
20 | .SH ARGUMENTS
21 | .TP 8
22 | GRID    (local input)           const HPL_T_grid *
23 | On entry,  GRID  points  to the data structure containing the
24 | process grid information.
25 | .TP 8
26 | MYROW   (local input)           const int
27 | On entry,  MYROW  specifies the row coordinate of the process
28 | whose rank is to be determined. MYROW must be greater than or
29 | equal to zero and less than NPROW.
30 | .TP 8
31 | MYCOL   (local input)           const int
32 | On entry,  MYCOL  specifies  the  column  coordinate  of  the
33 | process whose rank is to be determined. MYCOL must be greater
34 | than or equal to zero and less than NPCOL.
35 | .SH SEE ALSO
36 | .BR HPL_grid_init \ (3),
37 | .BR HPL_grid_info \ (3),
38 | .BR HPL_grid_exit \ (3).
39 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_ptimer.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_ptimer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_ptimer \- Timer facility.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_ptimer(\fR
 9 | \fB\&const int\fR
10 | \fI\&I\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_ptimer\fR
14 | provides a  "stopwatch"  functionality  cpu/wall  timer in
15 | seconds.  Up to  64  separate timers can be functioning at once.  The
16 | first call starts the timer,  and the second stops it.  This  routine
17 | can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
18 | the timer are ignored.  This feature can be used to make sure certain
19 | sections of code do not affect timings,  even  if  they call routines
20 | which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
21 | the  timer  functionality.  One  can retrieve  the current value of a
22 | timer by calling
23 |  
24 | t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
25 |  
26 | where  I  is the timer index in  [0..64).  To  inititialize the timer
27 | functionality, one must have called HPL_ptimer_boot() prior to any of
28 | the functions mentioned above.
29 | .SH ARGUMENTS
30 | .TP 8
31 | I       (global input)          const int
32 | On entry, I specifies the timer to stop/start.
33 | .SH SEE ALSO
34 | .BR HPL_ptimer_cputime \ (3),
35 | .BR HPL_ptimer_walltime \ (3).
36 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_ptimer_cputime.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_ptimer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_ptimer_cputime \- Return the CPU time.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&double\fR
 8 | \fB\&HPL_ptimer_cputime();\fR
 9 | .SH DESCRIPTION
10 | \fB\&HPL_ptimer_cputime\fR
11 | returns the cpu time. If HPL_USE_CLOCK is defined,
12 | the  clock() function is used to return an approximation of processor
13 | time used by the program.  The value returned is the CPU time used so
14 | far as a clock_t;  to get the number of seconds used,  the result  is
15 | divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
16 | standard library.  If  HPL_USE_TIMES is defined, the times() function
17 | is used instead.  This  function  returns  the current process times.
18 | times() returns the number of clock ticks that have elapsed since the
19 | system has been up.  Otherwise and by default,  the  standard library
20 | function getrusage() is used.
21 | .SH SEE ALSO
22 | .BR HPL_ptimer_walltime \ (3),
23 | .BR HPL_ptimer \ (3).
24 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_ptimer_walltime.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_ptimer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_ptimer_walltime \- Return the elapsed (wall-clock) time.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&double\fR
 8 | \fB\&HPL_ptimer_walltime();\fR
 9 | .SH DESCRIPTION
10 | \fB\&HPL_ptimer_walltime\fR
11 | returns the elapsed (wall-clock) time.
12 | .SH SEE ALSO
13 | .BR HPL_ptimer_cputime \ (3),
14 | .BR HPL_ptimer \ (3).
15 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_pwarn.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_pwarn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_pwarn \- displays an error message.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_pwarn(\fR
 9 | \fB\&FILE *\fR
10 | \fI\&STREAM\fR,
11 | \fB\&int\fR
12 | \fI\&LINE\fR,
13 | \fB\&const char *\fR
14 | \fI\&SRNAME\fR,
15 | \fB\&const char *\fR
16 | \fI\&FORM\fR,
17 | \fB\&...\fR
18 | \fB\&);\fR
19 | .SH DESCRIPTION
20 | \fB\&HPL_pwarn\fR
21 | displays an error message.
22 | .SH ARGUMENTS
23 | .TP 8
24 | STREAM  (local input)           FILE *
25 | On entry, STREAM specifies the output stream.
26 | .TP 8
27 | LINE    (local input)           int
28 | On entry,  LINE  specifies the line  number in the file where
29 | the  error  has  occured.  When  LINE  is not a positive line
30 | number, it is ignored.
31 | .TP 8
32 | SRNAME  (local input)           const char *
33 | On entry, SRNAME  should  be the name of the routine  calling
34 | this error handler.
35 | .TP 8
36 | FORM    (local input)           const char *
37 | On entry, FORM specifies the format, i.e., how the subsequent
38 | arguments are converted for output.
39 | .TP 8
40 |         (local input)           ...
41 | On entry,  ...  is the list of arguments to be printed within
42 | the format string.
43 | .SH SEE ALSO
44 | .BR HPL_pabort \ (3),
45 | .BR HPL_fprintf \ (3).
46 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_rand.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_rand 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_rand \- random number generator.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&double\fR
 8 | \fB\&HPL_rand();\fR
 9 | .SH DESCRIPTION
10 | \fB\&HPL_rand\fR
11 | generates  the next number  in the  random  sequence.  This
12 | function  ensures  that this number lies in the interval (-0.5, 0.5].
13 |  
14 | The static array irand contains the information (2 integers) required
15 | to generate the  next number  in the sequence  X(n).  This  number is
16 | computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
17 | constant d is the largest 64 bit positive integer. The array irand is
18 | then  updated  for the generation of the next number  X(n+1)  in  the
19 | random sequence as follows X(n+1) = a * X(n) + c. The constants a and
20 | c  should have been preliminarily stored in the arrays ias and ics as
21 | 2 pairs of integers.  The initialization of  ias,  ics and  irand  is
22 | performed by the function HPL_setran.
23 | .SH SEE ALSO
24 | .BR HPL_ladd \ (3),
25 | .BR HPL_lmul \ (3),
26 | .BR HPL_setran \ (3),
27 | .BR HPL_xjumpm \ (3),
28 | .BR HPL_jumpit \ (3).
29 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_recv.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_recv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_recv \- Receive a message.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_recv(\fR
 9 | \fB\&double *\fR
10 | \fI\&RBUF\fR,
11 | \fB\&int\fR
12 | \fI\&RCOUNT\fR,
13 | \fB\&int\fR
14 | \fI\&SRC\fR,
15 | \fB\&int\fR
16 | \fI\&RTAG\fR,
17 | \fB\&MPI_Comm\fR
18 | \fI\&COMM\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_recv\fR
22 | is a simple wrapper around  MPI_Recv.  Its  main  purpose is
23 | to  allow for some  experimentation / tuning  of this simple routine.
24 | Successful  completion  is  indicated  by  the  returned  error  code
25 | HPL_SUCCESS.  In the case of messages of length less than or equal to
26 | zero, this function returns immediately.
27 | .SH ARGUMENTS
28 | .TP 8
29 | RBUF    (local output)          double *
30 | On entry, RBUF specifies the starting address of buffer to be
31 | received.
32 | .TP 8
33 | RCOUNT  (local input)           int
34 | On entry,  RCOUNT  specifies  the number  of double precision
35 | entries in RBUF. RCOUNT must be at least zero.
36 | .TP 8
37 | SRC     (local input)           int
38 | On entry, SRC  specifies the rank of the  sending  process in
39 | the communication space defined by COMM.
40 | .TP 8
41 | RTAG    (local input)           int
42 | On entry,  STAG specifies the message tag to be used for this
43 | communication operation.
44 | .TP 8
45 | COMM    (local input)           MPI_Comm
46 | The MPI communicator identifying the communication space.
47 | .SH SEE ALSO
48 | .BR HPL_send \ (3),
49 | .BR HPL_sendrecv \ (3).
50 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_reduce.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_reduce \- Reduce operation.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_reduce(\fR
 9 | \fB\&void *\fR
10 | \fI\&BUFFER\fR,
11 | \fB\&const int\fR
12 | \fI\&COUNT\fR,
13 | \fB\&const HPL_T_TYPE\fR
14 | \fI\&DTYPE\fR,
15 | \fB\&const HPL_T_OP \fR
16 | \fI\&OP\fR,
17 | \fB\&const int\fR
18 | \fI\&ROOT\fR,
19 | \fB\&MPI_Comm\fR
20 | \fI\&COMM\fR
21 | \fB\&);\fR
22 | .SH DESCRIPTION
23 | \fB\&HPL_reduce\fR
24 | performs a global reduce operation across all processes of
25 | a group.  Note that the input buffer is  used as workarray and in all
26 | processes but the accumulating process corrupting the original data.
27 | .SH ARGUMENTS
28 | .TP 8
29 | BUFFER  (local input/output)    void *
30 | On entry,  BUFFER  points to  the  buffer to be  reduced.  On
31 | exit,  and  in process of rank  ROOT  this array contains the
32 | reduced data.  This  buffer  is also used as workspace during
33 | the operation in the other processes of the group.
34 | .TP 8
35 | COUNT   (global input)          const int
36 | On entry,  COUNT  indicates the number of entries in  BUFFER.
37 | COUNT must be at least zero.
38 | .TP 8
39 | DTYPE   (global input)          const HPL_T_TYPE
40 | On entry,  DTYPE  specifies the type of the buffers operands.
41 | .TP 8
42 | OP      (global input)          const HPL_T_OP 
43 | On entry, OP is a pointer to the local combine function.
44 | .TP 8
45 | ROOT    (global input)          const int
46 | On entry, ROOT is the coordinate of the accumulating process.
47 | .TP 8
48 | COMM    (global/local input)    MPI_Comm
49 | The MPI communicator identifying the process collection.
50 | .SH SEE ALSO
51 | .BR HPL_broadcast \ (3),
52 | .BR HPL_all_reduce \ (3),
53 | .BR HPL_barrier \ (3),
54 | .BR HPL_min \ (3),
55 | .BR HPL_max \ (3),
56 | .BR HPL_sum \ (3).
57 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_sdrv.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_sdrv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_sdrv \- Send and receive a message.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_sdrv(\fR
 9 | \fB\&double *\fR
10 | \fI\&SBUF\fR,
11 | \fB\&int\fR
12 | \fI\&SCOUNT\fR,
13 | \fB\&int\fR
14 | \fI\&STAG\fR,
15 | \fB\&double *\fR
16 | \fI\&RBUF\fR,
17 | \fB\&int\fR
18 | \fI\&RCOUNT\fR,
19 | \fB\&int\fR
20 | \fI\&RTAG\fR,
21 | \fB\&int\fR
22 | \fI\&PARTNER\fR,
23 | \fB\&MPI_Comm\fR
24 | \fI\&COMM\fR
25 | \fB\&);\fR
26 | .SH DESCRIPTION
27 | \fB\&HPL_sdrv\fR
28 | is a simple wrapper around MPI_Sendrecv. Its main purpose is
29 | to allow for some experimentation and tuning of this simple function.
30 | Messages  of  length  less than  or  equal to zero  are not sent  nor
31 | received.  Successful completion  is  indicated by the returned error
32 | code HPL_SUCCESS.
33 | .SH ARGUMENTS
34 | .TP 8
35 | SBUF    (local input)           double *
36 | On entry, SBUF specifies the starting address of buffer to be
37 | sent.
38 | .TP 8
39 | SCOUNT  (local input)           int
40 | On entry,  SCOUNT  specifies  the number  of double precision
41 | entries in SBUF. SCOUNT must be at least zero.
42 | .TP 8
43 | STAG    (local input)           int
44 | On entry,  STAG  specifies the message tag to be used for the
45 | sending communication operation.
46 | .TP 8
47 | RBUF    (local output)          double *
48 | On entry, RBUF specifies the starting address of buffer to be
49 | received.
50 | .TP 8
51 | RCOUNT  (local input)           int
52 | On entry,  RCOUNT  specifies  the number  of double precision
53 | entries in RBUF. RCOUNT must be at least zero.
54 | .TP 8
55 | RTAG    (local input)           int
56 | On entry,  RTAG  specifies the message tag to be used for the
57 | receiving communication operation.
58 | .TP 8
59 | PARTNER (local input)           int
60 | On entry,  PARTNER  specifies  the rank of the  collaborative
61 | process in the communication space defined by COMM.
62 | .TP 8
63 | COMM    (local input)           MPI_Comm
64 | The MPI communicator identifying the communication space.
65 | .SH SEE ALSO
66 | .BR HPL_send \ (3),
67 | .BR HPL_recv \ (3).
68 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_send.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_send 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_send \- Send a message.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&int\fR
 8 | \fB\&HPL_send(\fR
 9 | \fB\&double *\fR
10 | \fI\&SBUF\fR,
11 | \fB\&int\fR
12 | \fI\&SCOUNT\fR,
13 | \fB\&int\fR
14 | \fI\&DEST\fR,
15 | \fB\&int\fR
16 | \fI\&STAG\fR,
17 | \fB\&MPI_Comm\fR
18 | \fI\&COMM\fR
19 | \fB\&);\fR
20 | .SH DESCRIPTION
21 | \fB\&HPL_send\fR
22 | is a simple wrapper around  MPI_Send.  Its  main  purpose is
23 | to  allow for some  experimentation / tuning  of this simple routine.
24 | Successful  completion  is  indicated  by  the  returned  error  code
25 | MPI_SUCCESS.  In the case of messages of length less than or equal to
26 | zero, this function returns immediately.
27 | .SH ARGUMENTS
28 | .TP 8
29 | SBUF    (local input)           double *
30 | On entry, SBUF specifies the starting address of buffer to be
31 | sent.
32 | .TP 8
33 | SCOUNT  (local input)           int
34 | On entry,  SCOUNT  specifies  the number of  double precision
35 | entries in SBUF. SCOUNT must be at least zero.
36 | .TP 8
37 | DEST    (local input)           int
38 | On entry, DEST specifies the rank of the receiving process in
39 | the communication space defined by COMM.
40 | .TP 8
41 | STAG    (local input)           int
42 | On entry,  STAG specifies the message tag to be used for this
43 | communication operation.
44 | .TP 8
45 | COMM    (local input)           MPI_Comm
46 | The MPI communicator identifying the communication space.
47 | .SH SEE ALSO
48 | .BR HPL_recv \ (3),
49 | .BR HPL_sendrecv \ (3).
50 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_setran.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_setran 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_setran \- Manage the random number generator.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_setran(\fR
 9 | \fB\&const int\fR
10 | \fI\&OPTION\fR,
11 | \fB\&int *\fR
12 | \fI\&IRAN\fR
13 | \fB\&);\fR
14 | .SH DESCRIPTION
15 | \fB\&HPL_setran\fR
16 | initializes  the random generator with the encoding of the
17 | first number X(0) in the sequence,  and the constants a and c used to
18 | compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
19 | a and c are stored in the static variables  irand, ias and ics.  When
20 | OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
21 | values of the input array IRAN.  When OPTION is 3, IRAN is set to the
22 | current value of irand, and irand is then incremented.
23 | .SH ARGUMENTS
24 | .TP 8
25 | OPTION  (local input)           const int
26 | On entry, OPTION  is an integer that specifies the operations
27 | to be performed on the random generator as specified above.
28 | .TP 8
29 | IRAN    (local input/output)    int *
30 | On entry,  IRAN is an array of dimension 2, that contains the
31 | 16-lower and 15-higher bits of a random number.
32 | .SH SEE ALSO
33 | .BR HPL_ladd \ (3),
34 | .BR HPL_lmul \ (3),
35 | .BR HPL_xjumpm \ (3),
36 | .BR HPL_jumpit \ (3),
37 | .BR HPL_rand \ (3).
38 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_sum.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_sum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_sum \- Combine (sum) two buffers.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_sum(\fR
 9 | \fB\&const int\fR
10 | \fI\&N\fR,
11 | \fB\&const void *\fR
12 | \fI\&IN\fR,
13 | \fB\&void *\fR
14 | \fI\&INOUT\fR,
15 | \fB\&const HPL_T_TYPE\fR
16 | \fI\&DTYPE\fR
17 | \fB\&);\fR
18 | .SH DESCRIPTION
19 | \fB\&HPL_sum\fR
20 | combines (sum) two buffers.
21 | .SH ARGUMENTS
22 | .TP 8
23 | N       (input)                 const int
24 | On entry, N  specifies  the  length  of  the  buffers  to  be
25 | combined. N must be at least zero.
26 | .TP 8
27 | IN      (input)                 const void *
28 | On entry, IN points to the input-only buffer to be combined.
29 | .TP 8
30 | INOUT   (input/output)          void *
31 | On entry, INOUT  points  to  the  input-output  buffer  to be
32 | combined.  On exit,  the  entries of this array contains  the
33 | combined results.
34 | .TP 8
35 | DTYPE   (input)                 const HPL_T_TYPE
36 | On entry,  DTYPE  specifies the type of the buffers operands.
37 | .SH SEE ALSO
38 | .BR HPL_broadcast \ (3),
39 | .BR HPL_reduce \ (3),
40 | .BR HPL_all_reduce \ (3),
41 | .BR HPL_barrier \ (3),
42 | .BR HPL_min \ (3),
43 | .BR HPL_max \ (3),
44 | .BR HPL_sum \ (3).
45 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_timer.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_timer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_timer \- Timer facility.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_timer(\fR
 9 | \fB\&const int\fR
10 | \fI\&I\fR
11 | \fB\&);\fR
12 | .SH DESCRIPTION
13 | \fB\&HPL_timer\fR
14 | provides a  "stopwatch"  functionality  cpu/wall  timer  in
15 | seconds.  Up to  64  separate timers can be functioning at once.  The
16 | first call starts the timer,  and the second stops it.  This  routine
17 | can be disenabled  by calling  HPL_timer_disable(),  so that calls to
18 | the timer are ignored.  This feature can be used to make sure certain
19 | sections of code do not affect timings,  even  if  they call routines
20 | which have HPL_timer calls in them. HPL_timer_enable() will re-enable
21 | the  timer  functionality.  One  can retrieve  the current value of a
22 | timer by calling
23 |  
24 | t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
25 |  
26 | where  I  is the timer index in  [0..64).  To  initialize  the  timer
27 | functionality, one must have called HPL_timer_boot()  prior to any of
28 | the functions mentioned above.
29 | .SH ARGUMENTS
30 | .TP 8
31 | I       (global input)          const int
32 | On entry, I specifies the timer to stop/start.
33 | .SH SEE ALSO
34 | .BR HPL_timer_cputime \ (3),
35 | .BR HPL_timer_walltime \ (3).
36 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_timer_cputime.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_timer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_timer_cputime \- Return the CPU time.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&double\fR
 8 | \fB\&HPL_timer_cputime();\fR
 9 | .SH DESCRIPTION
10 | \fB\&HPL_timer_cputime\fR
11 | returns the cpu time.  If HPL_USE_CLOCK is defined,
12 | the  clock() function is used to return an approximation of processor
13 | time used by the program.  The value returned is the CPU time used so
14 | far as a clock_t;  to get the number of seconds used,  the result  is
15 | divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
16 | standard library.  If  HPL_USE_TIMES is defined, the times() function
17 | is used instead.  This  function  returns  the current process times.
18 | times() returns the number of clock ticks that have elapsed since the
19 | system has been up.  Otherwise and by default,  the  standard library
20 | function getrusage() is used.
21 | .SH SEE ALSO
22 | .BR HPL_timer_walltime \ (3),
23 | .BR HPL_timer \ (3).
24 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_timer_walltime.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_timer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_timer_walltime \- Return the elapsed (wall-clock) time.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&double\fR
 8 | \fB\&HPL_timer_walltime();\fR
 9 | .SH DESCRIPTION
10 | \fB\&HPL_timer_walltime\fR
11 | returns the elapsed (wall-clock) time.
12 | .SH SEE ALSO
13 | .BR HPL_timer_cputime \ (3),
14 | .BR HPL_timer \ (3).
15 | 


--------------------------------------------------------------------------------
/hpl/man/man3/HPL_warn.3:
--------------------------------------------------------------------------------
 1 | .TH HPL_warn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions"
 2 | .SH NAME
 3 | HPL_warn \- displays an error message.
 4 | .SH SYNOPSIS
 5 | \fB\&#include "hpl.h"\fR
 6 |  
 7 | \fB\&void\fR
 8 | \fB\&HPL_warn(\fR
 9 | \fB\&FILE *\fR
10 | \fI\&STREAM\fR,
11 | \fB\&int\fR
12 | \fI\&LINE\fR,
13 | \fB\&const char *\fR
14 | \fI\&SRNAME\fR,
15 | \fB\&const char *\fR
16 | \fI\&FORM\fR,
17 | \fB\&...\fR
18 | \fB\&);\fR
19 | .SH DESCRIPTION
20 | \fB\&HPL_warn\fR
21 | displays an error message.
22 | .SH ARGUMENTS
23 | .TP 8
24 | STREAM  (local input)           FILE *
25 | On entry, STREAM specifies the output stream.
26 | .TP 8
27 | LINE    (local input)           int
28 | On entry,  LINE  specifies the line  number in the file where
29 | the  error  has  occured.  When  LINE  is not a positive line
30 | number, it is ignored.
31 | .TP 8
32 | SRNAME  (local input)           const char *
33 | On entry, SRNAME  should  be the name of the routine  calling
34 | this error handler.
35 | .TP 8
36 | FORM    (local input)           const char *
37 | On entry, FORM specifies the format, i.e., how the subsequent
38 | arguments are converted for output.
39 | .TP 8
40 |         (local input)           ...
41 | On entry,  ...  is the list of arguments to be printed within
42 | the format string.
43 | .SH EXAMPLE
44 | \fI\&#include "hpl.h"\fR
45 |  
46 | int main(int argc, char *argv[])
47 | .br
48 | {
49 | .br
50 |    HPL_warn( stderr, __LINE__, __FILE__,
51 | .br
52 |              "Demo.\en" );
53 | .br
54 |    exit(0); return(0);
55 | .br
56 | }
57 | .SH SEE ALSO
58 | .BR HPL_abort \ (3),
59 | .BR HPL_fprintf \ (3).
60 | 


--------------------------------------------------------------------------------
/hpl/testing/ptest/HPL.dat:
--------------------------------------------------------------------------------
 1 | HPLinpack benchmark input file
 2 | Innovative Computing Laboratory, University of Tennessee
 3 | HPL.out      output file name (if any)
 4 | 6            device out (6=stdout,7=stderr,file)
 5 | 4            # of problems sizes (N)
 6 | 29 30 34 35  Ns
 7 | 4            # of NBs
 8 | 1 2 3 4      NBs
 9 | 0            PMAP process mapping (0=Row-,1=Column-major)
10 | 3            # of process grids (P x Q)
11 | 2 1 4        Ps
12 | 2 4 1        Qs
13 | 16.0         threshold
14 | 3            # of panel fact
15 | 0 1 2        PFACTs (0=left, 1=Crout, 2=Right)
16 | 2            # of recursive stopping criterium
17 | 2 4          NBMINs (>= 1)
18 | 1            # of panels in recursion
19 | 2            NDIVs
20 | 3            # of recursive panel fact.
21 | 0 1 2        RFACTs (0=left, 1=Crout, 2=Right)
22 | 1            # of broadcast
23 | 0            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
24 | 1            # of lookahead depth
25 | 0            DEPTHs (>=0)
26 | 2            SWAP (0=bin-exch,1=long,2=mix)
27 | 64           swapping threshold
28 | 0            L1 in (0=transposed,1=no-transposed) form
29 | 0            U  in (0=transposed,1=no-transposed) form
30 | 1            Equilibration (0=no,1=yes)
31 | 8            memory alignment in double (> 0)
32 | 


--------------------------------------------------------------------------------
/hpl/www/1rinM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/1rinM.jpg


--------------------------------------------------------------------------------
/hpl/www/1ring.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/1ring.jpg


--------------------------------------------------------------------------------
/hpl/www/2-273x48.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2-273x48.jpg


--------------------------------------------------------------------------------
/hpl/www/2rinM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2rinM.jpg


--------------------------------------------------------------------------------
/hpl/www/2ring.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/2ring.jpg


--------------------------------------------------------------------------------
/hpl/www/HPL_abort.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_abort HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_abort</B> halts execution.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_abort(</CODE>
16 | <CODE>int</CODE>
17 | <CODE>LINE</CODE>,
18 | <CODE>const char *</CODE>
19 | <CODE>SRNAME</CODE>,
20 | <CODE>const char *</CODE>
21 | <CODE>FORM</CODE>,
22 | <CODE>...</CODE>
23 | <CODE>);</CODE>
24 | 
25 | <H1>Description</H1>
26 | <B>HPL_abort</B>
27 | displays an error message on stderr and halts execution.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | LINE    (local input)                 int
32 |         On entry,  LINE  specifies the line  number in the file where
33 |         the  error  has  occured.  When  LINE  is not a positive line
34 |         number, it is ignored.
35 | </PRE>
36 | <PRE>
37 | SRNAME  (local input)                 const char *
38 |         On entry, SRNAME  should  be the name of the routine  calling
39 |         this error handler.
40 | </PRE>
41 | <PRE>
42 | FORM    (local input)                 const char *
43 |         On entry, FORM specifies the format, i.e., how the subsequent
44 |         arguments are converted for output.
45 | </PRE>
46 | <PRE>
47 |         (local input)                 ...
48 |         On entry,  ...  is the list of arguments to be printed within
49 |         the format string.
50 | </PRE>
51 | 
52 | <H1>Example</H1>
53 | <CODE>#include "hpl.h"</CODE><BR><BR>
54 | <PRE>
55 | int main(int argc, char *argv[])
56 | {
57 |    HPL_abort( __LINE__, __FILE__, "Halt.\n" );
58 |    exit(0); return(0);
59 | }
60 | </PRE>
61 | 
62 | <H1>See Also</H1>
63 | <A HREF="HPL_fprintf.html">HPL_fprintf</A>,
64 | <A HREF="HPL_warn.html">HPL_warn</A>.
65 | 
66 | </BODY>
67 | </HTML>
68 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_all_reduce.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_all_reduce HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_all_reduce</B> All reduce operation.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_all_reduce(</CODE>
16 | <CODE>void *</CODE>
17 | <CODE>BUFFER</CODE>,
18 | <CODE>const int</CODE>
19 | <CODE>COUNT</CODE>,
20 | <CODE>const HPL_T_TYPE</CODE>
21 | <CODE>DTYPE</CODE>,
22 | <CODE>const HPL_T_OP </CODE>
23 | <CODE>OP</CODE>,
24 | <CODE>MPI_Comm</CODE>
25 | <CODE>COMM</CODE>
26 | <CODE>);</CODE>
27 | 
28 | <H1>Description</H1>
29 | <B>HPL_all_reduce</B>
30 | performs   a   global   reduce  operation  across  all
31 | processes of a group leaving the results on all processes.
32 | 
33 | <H1>Arguments</H1>
34 | <PRE>
35 | BUFFER  (local input/global output)   void *
36 |         On entry,  BUFFER  points to  the  buffer to be combined.  On
37 |         exit, this array contains the combined data and  is identical
38 |         on all processes in the group.
39 | </PRE>
40 | <PRE>
41 | COUNT   (global input)                const int
42 |         On entry,  COUNT  indicates the number of entries in  BUFFER.
43 |         COUNT must be at least zero.
44 | </PRE>
45 | <PRE>
46 | DTYPE   (global input)                const HPL_T_TYPE
47 |         On entry,  DTYPE  specifies the type of the buffers operands.
48 | </PRE>
49 | <PRE>
50 | OP      (global input)                const HPL_T_OP 
51 |         On entry, OP is a pointer to the local combine function.
52 | </PRE>
53 | <PRE>
54 | COMM    (global/local input)          MPI_Comm
55 |         The MPI communicator identifying the process collection.
56 | </PRE>
57 | 
58 | <H1>See Also</H1>
59 | <A HREF="HPL_broadcast.html">HPL_broadcast</A>,
60 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
61 | <A HREF="HPL_barrier.html">HPL_barrier</A>,
62 | <A HREF="HPL_min.html">HPL_min</A>,
63 | <A HREF="HPL_max.html">HPL_max</A>,
64 | <A HREF="HPL_sum.html">HPL_sum</A>.
65 | 
66 | </BODY>
67 | </HTML>
68 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_barrier.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_barrier HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_barrier</B> Barrier operation.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_barrier(</CODE>
16 | <CODE>MPI_Comm</CODE>
17 | <CODE>COMM</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_barrier</B>
22 | blocks the caller until all process members have call it.
23 | The  call  returns  at any process  only after all group members have
24 | entered the call.
25 | 
26 | <H1>Arguments</H1>
27 | <PRE>
28 | COMM    (global/local input)          MPI_Comm
29 |         The MPI communicator identifying the process collection.
30 | </PRE>
31 | 
32 | <H1>See Also</H1>
33 | <A HREF="HPL_broadcast.html">HPL_broadcast</A>,
34 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
35 | <A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
36 | <A HREF="HPL_min.html">HPL_min</A>,
37 | <A HREF="HPL_max.html">HPL_max</A>,
38 | <A HREF="HPL_sum.html">HPL_sum</A>.
39 | 
40 | </BODY>
41 | </HTML>
42 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_bcast.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_bcast HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_bcast</B> Perform the row broadcast.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_bcast(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>,
18 | <CODE>int *</CODE>
19 | <CODE>IFLAG</CODE>
20 | <CODE>);</CODE>
21 | 
22 | <H1>Description</H1>
23 | <B>HPL_bcast</B>
24 | broadcasts  the  current  panel.  Successful  completion is
25 | indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to
26 | HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was
27 | not completed, in which case this function should be called again.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | PANEL   (input/output)                HPL_T_panel *
32 |         On entry,  PANEL  points to the  current panel data structure
33 |         being broadcast.
34 | </PRE>
35 | <PRE>
36 | IFLAG   (output)                      int *
37 |         On exit,  IFLAG  indicates  whether  or not the broadcast has
38 |         occured.
39 | </PRE>
40 | 
41 | <H1>See Also</H1>
42 | <A HREF="HPL_binit.html">HPL_binit</A>,
43 | <A HREF="HPL_bwait.html">HPL_bwait</A>.
44 | 
45 | </BODY>
46 | </HTML>
47 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_binit.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_binit HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_binit</B> Initialize the row broadcast.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_binit(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_binit</B>
22 | initializes  a  row  broadcast.  Successful  completion  is
23 | indicated by the returned error code HPL_SUCCESS.
24 | 
25 | <H1>Arguments</H1>
26 | <PRE>
27 | PANEL   (input/output)                HPL_T_panel *
28 |         On entry,  PANEL  points to the  current panel data structure
29 |         being broadcast.
30 | </PRE>
31 | 
32 | <H1>See Also</H1>
33 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
34 | <A HREF="HPL_bwait.html">HPL_bwait</A>.
35 | 
36 | </BODY>
37 | </HTML>
38 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_broadcast.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_broadcast HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_broadcast</B> Broadcast operation.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_broadcast(</CODE>
16 | <CODE>void *</CODE>
17 | <CODE>BUFFER</CODE>,
18 | <CODE>const int</CODE>
19 | <CODE>COUNT</CODE>,
20 | <CODE>const HPL_T_TYPE</CODE>
21 | <CODE>DTYPE</CODE>,
22 | <CODE>const int</CODE>
23 | <CODE>ROOT</CODE>,
24 | <CODE>MPI_Comm</CODE>
25 | <CODE>COMM</CODE>
26 | <CODE>);</CODE>
27 | 
28 | <H1>Description</H1>
29 | <B>HPL_broadcast</B>
30 | broadcasts  a message from the process with rank ROOT to
31 | all processes in the group.
32 | 
33 | <H1>Arguments</H1>
34 | <PRE>
35 | BUFFER  (local input/output)          void *
36 |         On entry,  BUFFER  points to  the  buffer to be broadcast. On
37 |         exit, this array contains the broadcast data and is identical
38 |         on all processes in the group.
39 | </PRE>
40 | <PRE>
41 | COUNT   (global input)                const int
42 |         On entry,  COUNT  indicates the number of entries in  BUFFER.
43 |         COUNT must be at least zero.
44 | </PRE>
45 | <PRE>
46 | DTYPE   (global input)                const HPL_T_TYPE
47 |         On entry,  DTYPE  specifies the type of the buffers operands.
48 | </PRE>
49 | <PRE>
50 | ROOT    (global input)                const int
51 |         On entry, ROOT is the coordinate of the source process.
52 | </PRE>
53 | <PRE>
54 | COMM    (global/local input)          MPI_Comm
55 |         The MPI communicator identifying the process collection.
56 | </PRE>
57 | 
58 | <H1>See Also</H1>
59 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
60 | <A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
61 | <A HREF="HPL_barrier.html">HPL_barrier</A>,
62 | <A HREF="HPL_min.html">HPL_min</A>,
63 | <A HREF="HPL_max.html">HPL_max</A>,
64 | <A HREF="HPL_sum.html">HPL_sum</A>.
65 | 
66 | </BODY>
67 | </HTML>
68 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_bwait.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_bwait HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_bwait</B> Finalize the row broadcast.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_bwait(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_bwait</B>
22 | HPL_bwait waits  for  the  row  broadcast  of  the current  panel  to
23 | terminate.  Successful completion is indicated by the returned  error
24 | code HPL_SUCCESS.
25 | 
26 | <H1>Arguments</H1>
27 | <PRE>
28 | PANEL   (input/output)                HPL_T_panel *
29 |         On entry,  PANEL  points to the  current panel data structure
30 |         being broadcast.
31 | </PRE>
32 | 
33 | <H1>See Also</H1>
34 | <A HREF="HPL_binit.html">HPL_binit</A>,
35 | <A HREF="HPL_bcast.html">HPL_bcast</A>.
36 | 
37 | </BODY>
38 | </HTML>
39 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_copyL.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_copyL HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_copyL</B> Copy the current panel into a contiguous workspace.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_copyL(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_copyL</B>
22 | copies  the  panel of columns, the L1 replicated submatrix,
23 | the pivot array  and  the info scalar into a contiguous workspace for
24 | later broadcast.
25 |  
26 | The copy of this panel  into  a contiguous buffer  can be enforced by
27 | specifying -DHPL_COPY_L in the architecture specific Makefile.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | PANEL   (input/output)                HPL_T_panel *
32 |         On entry,  PANEL  points to the  current panel data structure
33 |         being broadcast.
34 | </PRE>
35 | 
36 | <H1>See Also</H1>
37 | <A HREF="HPL_binit.html">HPL_binit</A>,
38 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
39 | <A HREF="HPL_bwait.html">HPL_bwait</A>.
40 | 
41 | </BODY>
42 | </HTML>
43 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_fprintf.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_fprintf HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_fprintf</B> fprintf + fflush wrapper.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_fprintf(</CODE>
16 | <CODE>FILE *</CODE>
17 | <CODE>STREAM</CODE>,
18 | <CODE>const char *</CODE>
19 | <CODE>FORM</CODE>,
20 | <CODE>...</CODE>
21 | <CODE>);</CODE>
22 | 
23 | <H1>Description</H1>
24 | <B>HPL_fprintf</B>
25 | is a wrapper around fprintf flushing the output stream.
26 | 
27 | <H1>Arguments</H1>
28 | <PRE>
29 | STREAM  (local input)                 FILE *
30 |         On entry, STREAM specifies the output stream.
31 | </PRE>
32 | <PRE>
33 | FORM    (local input)                 const char *
34 |         On entry, FORM specifies the format, i.e., how the subsequent
35 |         arguments are converted for output.
36 | </PRE>
37 | <PRE>
38 |         (local input)                 ...
39 |         On entry,  ...  is the list of arguments to be printed within
40 |         the format string.
41 | </PRE>
42 | 
43 | <H1>Example</H1>
44 | <CODE>#include "hpl.h"</CODE><BR><BR>
45 | <PRE>
46 | int main(int argc, char *argv[])
47 | {
48 |    HPL_fprintf( stdout, "Hello World.\n" );
49 |    exit(0); return(0);
50 | }
51 | </PRE>
52 | 
53 | <H1>See Also</H1>
54 | <A HREF="HPL_abort.html">HPL_abort</A>,
55 | <A HREF="HPL_warn.html">HPL_warn</A>.
56 | 
57 | </BODY>
58 | </HTML>
59 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_grid_exit.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_grid_exit HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_grid_exit</B> Exit process grid.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_grid_exit(</CODE>
16 | <CODE>HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_grid_exit</B>
22 | marks  the process  grid object for  deallocation.  The
23 | returned  error  code  MPI_SUCCESS  indicates  successful completion.
24 | Other error codes are (MPI) implementation dependent.
25 | 
26 | <H1>Arguments</H1>
27 | <PRE>
28 | GRID    (local input/output)          HPL_T_grid *
29 |         On entry,  GRID  points  to the data structure containing the
30 |         process grid to be released.
31 | </PRE>
32 | 
33 | <H1>See Also</H1>
34 | <A HREF="HPL_pnum.html">HPL_pnum</A>,
35 | <A HREF="HPL_grid_init.html">HPL_grid_init</A>,
36 | <A HREF="HPL_grid_info.html">HPL_grid_info</A>.
37 | 
38 | </BODY>
39 | </HTML>
40 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_idamax.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_idamax HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_idamax</B> 1st k s.t. |x_k| = max_i(|x_i|).
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_idamax(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>N</CODE>,
18 | <CODE>const double *</CODE>
19 | <CODE>X</CODE>,
20 | <CODE>const int</CODE>
21 | <CODE>INCX</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_idamax</B>
26 | returns  the index in an n-vector  x  of the first element
27 | having maximum absolute value.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | N       (local input)                 const int
32 |         On entry, N specifies the length of the vector x. N  must  be
33 |         at least zero.
34 | </PRE>
35 | <PRE>
36 | X       (local input)                 const double *
37 |         On entry,  X  is an incremented array of dimension  at  least
38 |         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
39 | </PRE>
40 | <PRE>
41 | INCX    (local input)                 const int
42 |         On entry, INCX specifies the increment for the elements of X.
43 |         INCX must not be zero.
44 | </PRE>
45 | 
46 | <H1>Example</H1>
47 | <CODE>#include "hpl.h"</CODE><BR><BR>
48 | <PRE>
49 | int main(int argc, char *argv[])
50 | {
51 |    double x[3];
52 |    int    imax;
53 |    x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
54 |    imax = HPL_idamax( 3, x, 1 );
55 |    printf("imax=%d\n", imax);
56 |    exit(0);
57 |    return(0);
58 | }
59 | </PRE>
60 | 
61 | <H1>See Also</H1>
62 | <A HREF="HPL_daxpy.html">HPL_daxpy</A>,
63 | <A HREF="HPL_dcopy.html">HPL_dcopy</A>,
64 | <A HREF="HPL_dscal.html">HPL_dscal</A>,
65 | <A HREF="HPL_dswap.html">HPL_dswap</A>.
66 | 
67 | </BODY>
68 | </HTML>
69 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_jumpit.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_jumpit HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_jumpit</B> jump into the random sequence.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_jumpit(</CODE>
16 | <CODE>int *</CODE>
17 | <CODE>MULT</CODE>,
18 | <CODE>int *</CODE>
19 | <CODE>IADD</CODE>,
20 | <CODE>int *</CODE>
21 | <CODE>IRANN</CODE>,
22 | <CODE>int *</CODE>
23 | <CODE>IRANM</CODE>
24 | <CODE>);</CODE>
25 | 
26 | <H1>Description</H1>
27 | <B>HPL_jumpit</B>
28 | jumps in the random sequence from the number  X(n) encoded
29 | in IRANN to the number  X(m)  encoded in  IRANM using the constants A
30 | and C encoded in MULT and IADD: X(m) = A * X(n) + C.  The constants A
31 | and C obviously depend on m and n,  see  the function  HPL_xjumpm  in
32 | order to initialize them.
33 | 
34 | <H1>Arguments</H1>
35 | <PRE>
36 | MULT    (local input)                 int *
37 |         On entry, MULT is an array of dimension 2, that contains the
38 |         16-lower and 15-higher bits of the constant A.
39 | </PRE>
40 | <PRE>
41 | IADD    (local input)                 int *
42 |         On entry, IADD is an array of dimension 2, that contains the
43 |         16-lower and 15-higher bits of the constant C.
44 | </PRE>
45 | <PRE>
46 | IRANN   (local input)                 int *
47 |         On entry,  IRANN  is an array of dimension 2,  that contains 
48 |         the 16-lower and 15-higher bits of the encoding of X(n).
49 | </PRE>
50 | <PRE>
51 | IRANM   (local output)                int *
52 |         On entry,  IRANM  is an array of dimension 2.  On exit, this
53 |         array contains respectively the 16-lower and  15-higher bits
54 |         of the encoding of X(m).
55 | </PRE>
56 | 
57 | <H1>See Also</H1>
58 | <A HREF="HPL_ladd.html">HPL_ladd</A>,
59 | <A HREF="HPL_lmul.html">HPL_lmul</A>,
60 | <A HREF="HPL_setran.html">HPL_setran</A>,
61 | <A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
62 | <A HREF="HPL_rand.html">HPL_rand</A>.
63 | 
64 | </BODY>
65 | </HTML>
66 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_ladd.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_ladd HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_ladd</B> Adds two long positive integers.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_ladd(</CODE>
16 | <CODE>int *</CODE>
17 | <CODE>J</CODE>,
18 | <CODE>int *</CODE>
19 | <CODE>K</CODE>,
20 | <CODE>int *</CODE>
21 | <CODE>I</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_ladd</B>
26 | adds  without carry two long positive integers  K and J  and
27 | puts the result into I. The long integers  I, J, K are encoded on 64
28 | bits using an array of 2 integers.  The 32-lower bits  are stored in
29 | the  first  entry  of each array,  the 32-higher bits  in the second
30 | entry.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | J       (local input)                 int *
35 |         On entry, J is an integer array of dimension 2 containing the
36 |         encoded long integer J.
37 | </PRE>
38 | <PRE>
39 | K       (local input)                 int *
40 |         On entry, K is an integer array of dimension 2 containing the
41 |         encoded long integer K.
42 | </PRE>
43 | <PRE>
44 | I       (local output)                int *
45 |         On entry, I is an integer array of dimension 2. On exit, this
46 |         array contains the encoded long integer result.
47 | </PRE>
48 | 
49 | <H1>See Also</H1>
50 | <A HREF="HPL_lmul.html">HPL_lmul</A>,
51 | <A HREF="HPL_setran.html">HPL_setran</A>,
52 | <A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
53 | <A HREF="HPL_jumpit.html">HPL_jumpit</A>,
54 | <A HREF="HPL_rand.html">HPL_rand</A>.
55 | 
56 | </BODY>
57 | </HTML>
58 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_lmul.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_lmul HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_lmul</B> multiplies 2 long positive integers.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_lmul(</CODE>
16 | <CODE>int *</CODE>
17 | <CODE>K</CODE>,
18 | <CODE>int *</CODE>
19 | <CODE>J</CODE>,
20 | <CODE>int *</CODE>
21 | <CODE>I</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_lmul</B>
26 | multiplies  without carry two long positive integers K and J
27 | and puts the result into I. The long integers  I, J, K are encoded on
28 | 64 bits using an array of 2 integers. The 32-lower bits are stored in
29 | the first entry of each array, the 32-higher bits in the second entry
30 | of each array. For efficiency purposes, the  intrisic modulo function
31 | is inlined.
32 | 
33 | <H1>Arguments</H1>
34 | <PRE>
35 | K       (local input)                 int *
36 |         On entry, K is an integer array of dimension 2 containing the
37 |         encoded long integer K.
38 | </PRE>
39 | <PRE>
40 | J       (local input)                 int *
41 |         On entry, J is an integer array of dimension 2 containing the
42 |         encoded long integer J.
43 | </PRE>
44 | <PRE>
45 | I       (local output)                int *
46 |         On entry, I is an integer array of dimension 2. On exit, this
47 |         array contains the encoded long integer result.
48 | </PRE>
49 | 
50 | <H1>See Also</H1>
51 | <A HREF="HPL_ladd.html">HPL_ladd</A>,
52 | <A HREF="HPL_setran.html">HPL_setran</A>,
53 | <A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
54 | <A HREF="HPL_jumpit.html">HPL_jumpit</A>,
55 | <A HREF="HPL_rand.html">HPL_rand</A>.
56 | 
57 | </BODY>
58 | </HTML>
59 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_max.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_max HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_max</B> Combine (max) two buffers.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_max(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>N</CODE>,
18 | <CODE>const void *</CODE>
19 | <CODE>IN</CODE>,
20 | <CODE>void *</CODE>
21 | <CODE>INOUT</CODE>,
22 | <CODE>const HPL_T_TYPE</CODE>
23 | <CODE>DTYPE</CODE>
24 | <CODE>);</CODE>
25 | 
26 | <H1>Description</H1>
27 | <B>HPL_max</B>
28 | combines (max) two buffers.
29 | 
30 | <H1>Arguments</H1>
31 | <PRE>
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | </PRE>
36 | <PRE>
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | </PRE>
40 | <PRE>
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | </PRE>
46 | <PRE>
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | </PRE>
50 | 
51 | <H1>See Also</H1>
52 | <A HREF="HPL_broadcast.html">HPL_broadcast</A>,
53 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
54 | <A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
55 | <A HREF="HPL_barrier.html">HPL_barrier</A>,
56 | <A HREF="HPL_min.html">HPL_min</A>,
57 | <A HREF="HPL_sum.html">HPL_sum</A>.
58 | 
59 | </BODY>
60 | </HTML>
61 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_min.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_min HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_min</B> Combine (min) two buffers.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_min(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>N</CODE>,
18 | <CODE>const void *</CODE>
19 | <CODE>IN</CODE>,
20 | <CODE>void *</CODE>
21 | <CODE>INOUT</CODE>,
22 | <CODE>const HPL_T_TYPE</CODE>
23 | <CODE>DTYPE</CODE>
24 | <CODE>);</CODE>
25 | 
26 | <H1>Description</H1>
27 | <B>HPL_min</B>
28 | combines (min) two buffers.
29 | 
30 | <H1>Arguments</H1>
31 | <PRE>
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | </PRE>
36 | <PRE>
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | </PRE>
40 | <PRE>
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | </PRE>
46 | <PRE>
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | </PRE>
50 | 
51 | <H1>See Also</H1>
52 | <A HREF="HPL_broadcast.html">HPL_broadcast</A>,
53 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
54 | <A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
55 | <A HREF="HPL_barrier.html">HPL_barrier</A>,
56 | <A HREF="HPL_max.html">HPL_max</A>,
57 | <A HREF="HPL_sum.html">HPL_sum</A>.
58 | 
59 | </BODY>
60 | </HTML>
61 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pabort.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pabort HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pabort</B> halts execution.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pabort(</CODE>
16 | <CODE>int</CODE>
17 | <CODE>LINE</CODE>,
18 | <CODE>const char *</CODE>
19 | <CODE>SRNAME</CODE>,
20 | <CODE>const char *</CODE>
21 | <CODE>FORM</CODE>,
22 | <CODE>...</CODE>
23 | <CODE>);</CODE>
24 | 
25 | <H1>Description</H1>
26 | <B>HPL_pabort</B>
27 | displays an error message on stderr and halts execution.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | LINE    (local input)                 int
32 |         On entry,  LINE  specifies the line  number in the file where
33 |         the  error  has  occured.  When  LINE  is not a positive line
34 |         number, it is ignored.
35 | </PRE>
36 | <PRE>
37 | SRNAME  (local input)                 const char *
38 |         On entry, SRNAME  should  be the name of the routine  calling
39 |         this error handler.
40 | </PRE>
41 | <PRE>
42 | FORM    (local input)                 const char *
43 |         On entry, FORM specifies the format, i.e., how the subsequent
44 |         arguments are converted for output.
45 | </PRE>
46 | <PRE>
47 |         (local input)                 ...
48 |         On entry,  ...  is the list of arguments to be printed within
49 |         the format string.
50 | </PRE>
51 | 
52 | <H1>See Also</H1>
53 | <A HREF="HPL_fprintf.html">HPL_fprintf</A>,
54 | <A HREF="HPL_pwarn.html">HPL_pwarn</A>.
55 | 
56 | </BODY>
57 | </HTML>
58 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_packL.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_packL HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_packL</B> Form the MPI structure for the row ring broadcasts.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_packL(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>,
18 | <CODE>const int</CODE>
19 | <CODE>INDEX</CODE>,
20 | <CODE>const int</CODE>
21 | <CODE>LEN</CODE>,
22 | <CODE>const int</CODE>
23 | <CODE>IBUF</CODE>
24 | <CODE>);</CODE>
25 | 
26 | <H1>Description</H1>
27 | <B>HPL_packL</B>
28 | forms  the MPI data type for the panel to be broadcast.
29 | Successful  completion  is  indicated  by  the  returned  error  code
30 | MPI_SUCCESS.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | PANEL   (input/output)                HPL_T_panel *
35 |         On entry,  PANEL  points to the  current panel data structure
36 |         being broadcast.
37 | </PRE>
38 | <PRE>
39 | INDEX   (input)                       const int
40 |         On entry,  INDEX  points  to  the  first entry of the  packed
41 |         buffer being broadcast.
42 | </PRE>
43 | <PRE>
44 | LEN     (input)                       const int
45 |         On entry, LEN is the length of the packed buffer.
46 | </PRE>
47 | <PRE>
48 | IBUF    (input)                       const int
49 |         On entry, IBUF  specifies the panel buffer/count/type entries
50 |         that should be initialized.
51 | </PRE>
52 | 
53 | <H1>See Also</H1>
54 | <A HREF="HPL_binit.html">HPL_binit</A>,
55 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
56 | <A HREF="HPL_bwait.html">HPL_bwait</A>.
57 | 
58 | </BODY>
59 | </HTML>
60 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pddriver.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>main HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>main</B> HPL main timing program.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>main();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>main</B>
19 | is the main driver program for testing the HPL routines.
20 | This  program is  driven  by  a short data file named  "HPL.dat".
21 | 
22 | <H1>See Also</H1>
23 | <A HREF="HPL_pdinfo.html">HPL_pdinfo</A>,
24 | <A HREF="HPL_pdtest.html">HPL_pdtest</A>.
25 | 
26 | </BODY>
27 | </HTML>
28 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdgesv.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdgesv HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdgesv</B> Solve A x = b.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pdgesv(</CODE>
16 | <CODE>HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>,
18 | <CODE>HPL_T_palg *</CODE>
19 | <CODE>ALGO</CODE>,
20 | <CODE>HPL_T_pmat *</CODE>
21 | <CODE>A</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_pdgesv</B>
26 | factors a N+1-by-N matrix using LU factorization with row
27 | partial pivoting.  The main algorithm  is the "right looking" variant
28 | with  or  without look-ahead.  The  lower  triangular  factor is left
29 | unpivoted and the pivots are not returned. The right hand side is the
30 | N+1 column of the coefficient matrix.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | </PRE>
38 | <PRE>
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | </PRE>
43 | <PRE>
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | </PRE>
48 | 
49 | <H1>See Also</H1>
50 | <A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
51 | <A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
52 | <A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
53 | <A HREF="HPL_pdtrsv.html">HPL_pdtrsv</A>.
54 | 
55 | </BODY>
56 | </HTML>
57 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdgesv0.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdgesv0 HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdgesv0</B> Factor an N x N+1 matrix.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pdgesv0(</CODE>
16 | <CODE>HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>,
18 | <CODE>HPL_T_palg *</CODE>
19 | <CODE>ALGO</CODE>,
20 | <CODE>HPL_T_pmat *</CODE>
21 | <CODE>A</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_pdgesv0</B>
26 | factors a N+1-by-N matrix using LU factorization with row
27 | partial pivoting.  The main algorithm  is the "right looking" variant
28 | without look-ahead. The lower triangular factor is left unpivoted and
29 | the pivots are not returned. The right hand side is the N+1 column of
30 | the coefficient matrix.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | </PRE>
38 | <PRE>
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | </PRE>
43 | <PRE>
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | </PRE>
48 | 
49 | <H1>See Also</H1>
50 | <A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
51 | <A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
52 | <A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
53 | <A HREF="HPL_pdfact.html">HPL_pdfact</A>,
54 | <A HREF="HPL_binit.html">HPL_binit</A>,
55 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
56 | <A HREF="HPL_bwait.html">HPL_bwait</A>,
57 | <A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
58 | <A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
59 | <A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
60 | <A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
61 | 
62 | </BODY>
63 | </HTML>
64 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdgesvK1.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdgesvK1 HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdgesvK1</B> Factor an N x N+1 matrix.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pdgesvK1(</CODE>
16 | <CODE>HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>,
18 | <CODE>HPL_T_palg *</CODE>
19 | <CODE>ALGO</CODE>,
20 | <CODE>HPL_T_pmat *</CODE>
21 | <CODE>A</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_pdgesvK1</B>
26 | factors a N+1-by-N matrix using LU factorization with row
27 | partial pivoting.  The main algorithm  is the "right looking" variant
28 | with look-ahead.  The  lower  triangular factor is left unpivoted and
29 | the pivots are not returned. The right hand side is the N+1 column of
30 | the coefficient matrix.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | </PRE>
38 | <PRE>
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | </PRE>
43 | <PRE>
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | </PRE>
48 | 
49 | <H1>See Also</H1>
50 | <A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
51 | <A HREF="HPL_pdgesvK2.html">HPL_pdgesvK2</A>,
52 | <A HREF="HPL_pdfact.html">HPL_pdfact</A>,
53 | <A HREF="HPL_binit.html">HPL_binit</A>,
54 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
55 | <A HREF="HPL_bwait.html">HPL_bwait</A>,
56 | <A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
57 | <A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
58 | <A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
59 | <A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
60 | 
61 | </BODY>
62 | </HTML>
63 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdgesvK2.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdgesvK2 HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdgesvK2</B> Factor an N x N+1 matrix.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pdgesvK2(</CODE>
16 | <CODE>HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>,
18 | <CODE>HPL_T_palg *</CODE>
19 | <CODE>ALGO</CODE>,
20 | <CODE>HPL_T_pmat *</CODE>
21 | <CODE>A</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_pdgesvK2</B>
26 | factors a N+1-by-N matrix using LU factorization with row
27 | partial pivoting.  The main algorithm  is the "right looking" variant
28 | with look-ahead.  The  lower  triangular factor is left unpivoted and
29 | the pivots are not returned. The right hand side is the N+1 column of
30 | the coefficient matrix.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | GRID    (local input)                 HPL_T_grid *
35 |         On entry,  GRID  points  to the data structure containing the
36 |         process grid information.
37 | </PRE>
38 | <PRE>
39 | ALGO    (global input)                HPL_T_palg *
40 |         On entry,  ALGO  points to  the data structure containing the
41 |         algorithmic parameters.
42 | </PRE>
43 | <PRE>
44 | A       (local input/output)          HPL_T_pmat *
45 |         On entry, A points to the data structure containing the local
46 |         array information.
47 | </PRE>
48 | 
49 | <H1>See Also</H1>
50 | <A HREF="HPL_pdgesv.html">HPL_pdgesv</A>,
51 | <A HREF="HPL_pdgesv0.html">HPL_pdgesv0</A>,
52 | <A HREF="HPL_pdgesvK1.html">HPL_pdgesvK1</A>,
53 | <A HREF="HPL_pdfact.html">HPL_pdfact</A>,
54 | <A HREF="HPL_binit.html">HPL_binit</A>,
55 | <A HREF="HPL_bcast.html">HPL_bcast</A>,
56 | <A HREF="HPL_bwait.html">HPL_bwait</A>,
57 | <A HREF="HPL_pdupdateNN.html">HPL_pdupdateNN</A>,
58 | <A HREF="HPL_pdupdateNT.html">HPL_pdupdateNT</A>,
59 | <A HREF="HPL_pdupdateTN.html">HPL_pdupdateTN</A>,
60 | <A HREF="HPL_pdupdateTT.html">HPL_pdupdateTT</A>.
61 | 
62 | </BODY>
63 | </HTML>
64 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdpanel_disp.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdpanel_disp HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdpanel_disp</B> Deallocate a panel data structure.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_pdpanel_disp(</CODE>
16 | <CODE>HPL_T_panel * *</CODE>
17 | <CODE>PANEL</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_pdpanel_disp</B>
22 | deallocates  the  panel  structure  and  resources  and
23 | stores the error code returned by the panel factorization.
24 | 
25 | <H1>Arguments</H1>
26 | <PRE>
27 | PANEL   (local input/output)          HPL_T_panel * *
28 |         On entry,  PANEL  points  to  the  address  of the panel data
29 |         structure to be deallocated.
30 | </PRE>
31 | 
32 | <H1>See Also</H1>
33 | <A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
34 | <A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
35 | <A HREF="HPL_pdpanel_free.html">HPL_pdpanel_free</A>.
36 | 
37 | </BODY>
38 | </HTML>
39 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pdpanel_free.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pdpanel_free HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pdpanel_free</B> Deallocate the panel ressources.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_pdpanel_free(</CODE>
16 | <CODE>HPL_T_panel *</CODE>
17 | <CODE>PANEL</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_pdpanel_free</B>
22 | deallocates  the panel resources  and  stores the error
23 | code returned by the panel factorization.
24 | 
25 | <H1>Arguments</H1>
26 | <PRE>
27 | PANEL   (local input/output)          HPL_T_panel *
28 |         On entry,  PANEL  points  to  the  panel data  structure from
29 |         which the resources should be deallocated.
30 | </PRE>
31 | 
32 | <H1>See Also</H1>
33 | <A HREF="HPL_pdpanel_new.html">HPL_pdpanel_new</A>,
34 | <A HREF="HPL_pdpanel_init.html">HPL_pdpanel_init</A>,
35 | <A HREF="HPL_pdpanel_disp.html">HPL_pdpanel_disp</A>.
36 | 
37 | </BODY>
38 | </HTML>
39 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pnum.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pnum HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pnum</B> Rank determination.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_pnum(</CODE>
16 | <CODE>const HPL_T_grid *</CODE>
17 | <CODE>GRID</CODE>,
18 | <CODE>const int</CODE>
19 | <CODE>MYROW</CODE>,
20 | <CODE>const int</CODE>
21 | <CODE>MYCOL</CODE>
22 | <CODE>);</CODE>
23 | 
24 | <H1>Description</H1>
25 | <B>HPL_pnum</B>
26 | determines  the  rank  of a  process  as a function  of  its
27 | coordinates in the grid.
28 | 
29 | <H1>Arguments</H1>
30 | <PRE>
31 | GRID    (local input)                 const HPL_T_grid *
32 |         On entry,  GRID  points  to the data structure containing the
33 |         process grid information.
34 | </PRE>
35 | <PRE>
36 | MYROW   (local input)                 const int
37 |         On entry,  MYROW  specifies the row coordinate of the process
38 |         whose rank is to be determined. MYROW must be greater than or
39 |         equal to zero and less than NPROW.
40 | </PRE>
41 | <PRE>
42 | MYCOL   (local input)                 const int
43 |         On entry,  MYCOL  specifies  the  column  coordinate  of  the
44 |         process whose rank is to be determined. MYCOL must be greater
45 |         than or equal to zero and less than NPCOL.
46 | </PRE>
47 | 
48 | <H1>See Also</H1>
49 | <A HREF="HPL_grid_init.html">HPL_grid_init</A>,
50 | <A HREF="HPL_grid_info.html">HPL_grid_info</A>,
51 | <A HREF="HPL_grid_exit.html">HPL_grid_exit</A>.
52 | 
53 | </BODY>
54 | </HTML>
55 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_ptimer.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_ptimer HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_ptimer</B> Timer facility.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_ptimer(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>I</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_ptimer</B>
22 | provides a  "stopwatch"  functionality  cpu/wall  timer in
23 | seconds.  Up to  64  separate timers can be functioning at once.  The
24 | first call starts the timer,  and the second stops it.  This  routine
25 | can be disenabled  by calling HPL_ptimer_disable(),  so that calls to
26 | the timer are ignored.  This feature can be used to make sure certain
27 | sections of code do not affect timings,  even  if  they call routines
28 | which have HPL_ptimer calls in them. HPL_ptimer_enable()  will enable
29 | the  timer  functionality.  One  can retrieve  the current value of a
30 | timer by calling
31 |  
32 | t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
33 |  
34 | where  I  is the timer index in  [0..64).  To  inititialize the timer
35 | functionality, one must have called HPL_ptimer_boot() prior to any of
36 | the functions mentioned above.
37 | 
38 | <H1>Arguments</H1>
39 | <PRE>
40 | I       (global input)                const int
41 |         On entry, I specifies the timer to stop/start.
42 | </PRE>
43 | 
44 | <H1>See Also</H1>
45 | <A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
46 | <A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>.
47 | 
48 | </BODY>
49 | </HTML>
50 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_ptimer_cputime.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_ptimer_cputime HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_ptimer_cputime</B> Return the CPU time.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>double</CODE>
15 | <CODE>HPL_ptimer_cputime();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>HPL_ptimer_cputime</B>
19 | returns the cpu time. If HPL_USE_CLOCK is defined,
20 | the  clock() function is used to return an approximation of processor
21 | time used by the program.  The value returned is the CPU time used so
22 | far as a clock_t;  to get the number of seconds used,  the result  is
23 | divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
24 | standard library.  If  HPL_USE_TIMES is defined, the times() function
25 | is used instead.  This  function  returns  the current process times.
26 | times() returns the number of clock ticks that have elapsed since the
27 | system has been up.  Otherwise and by default,  the  standard library
28 | function getrusage() is used.
29 | 
30 | <H1>See Also</H1>
31 | <A HREF="HPL_ptimer_walltime.html">HPL_ptimer_walltime</A>,
32 | <A HREF="HPL_ptimer.html">HPL_ptimer</A>.
33 | 
34 | </BODY>
35 | </HTML>
36 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_ptimer_walltime.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_ptimer_walltime HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_ptimer_walltime</B> Return the elapsed (wall-clock) time.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>double</CODE>
15 | <CODE>HPL_ptimer_walltime();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>HPL_ptimer_walltime</B>
19 | returns the elapsed (wall-clock) time.
20 | 
21 | <H1>See Also</H1>
22 | <A HREF="HPL_ptimer_cputime.html">HPL_ptimer_cputime</A>,
23 | <A HREF="HPL_ptimer.html">HPL_ptimer</A>.
24 | 
25 | </BODY>
26 | </HTML>
27 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_pwarn.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_pwarn HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_pwarn</B> displays an error message.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_pwarn(</CODE>
16 | <CODE>FILE *</CODE>
17 | <CODE>STREAM</CODE>,
18 | <CODE>int</CODE>
19 | <CODE>LINE</CODE>,
20 | <CODE>const char *</CODE>
21 | <CODE>SRNAME</CODE>,
22 | <CODE>const char *</CODE>
23 | <CODE>FORM</CODE>,
24 | <CODE>...</CODE>
25 | <CODE>);</CODE>
26 | 
27 | <H1>Description</H1>
28 | <B>HPL_pwarn</B>
29 | displays an error message.
30 | 
31 | <H1>Arguments</H1>
32 | <PRE>
33 | STREAM  (local input)                 FILE *
34 |         On entry, STREAM specifies the output stream.
35 | </PRE>
36 | <PRE>
37 | LINE    (local input)                 int
38 |         On entry,  LINE  specifies the line  number in the file where
39 |         the  error  has  occured.  When  LINE  is not a positive line
40 |         number, it is ignored.
41 | </PRE>
42 | <PRE>
43 | SRNAME  (local input)                 const char *
44 |         On entry, SRNAME  should  be the name of the routine  calling
45 |         this error handler.
46 | </PRE>
47 | <PRE>
48 | FORM    (local input)                 const char *
49 |         On entry, FORM specifies the format, i.e., how the subsequent
50 |         arguments are converted for output.
51 | </PRE>
52 | <PRE>
53 |         (local input)                 ...
54 |         On entry,  ...  is the list of arguments to be printed within
55 |         the format string.
56 | </PRE>
57 | 
58 | <H1>See Also</H1>
59 | <A HREF="HPL_pabort.html">HPL_pabort</A>,
60 | <A HREF="HPL_fprintf.html">HPL_fprintf</A>.
61 | 
62 | </BODY>
63 | </HTML>
64 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_rand.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_rand HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_rand</B> random number generator.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>double</CODE>
15 | <CODE>HPL_rand();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>HPL_rand</B>
19 | generates  the next number  in the  random  sequence.  This
20 | function  ensures  that this number lies in the interval (-0.5, 0.5].
21 |  
22 | The static array irand contains the information (2 integers) required
23 | to generate the  next number  in the sequence  X(n).  This  number is
24 | computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5,  where the
25 | constant d is the largest 64 bit positive integer. The array irand is
26 | then  updated  for the generation of the next number  X(n+1)  in  the
27 | random sequence as follows X(n+1) = a * X(n) + c. The constants a and
28 | c  should have been preliminarily stored in the arrays ias and ics as
29 | 2 pairs of integers.  The initialization of  ias,  ics and  irand  is
30 | performed by the function HPL_setran.
31 | 
32 | <H1>See Also</H1>
33 | <A HREF="HPL_ladd.html">HPL_ladd</A>,
34 | <A HREF="HPL_lmul.html">HPL_lmul</A>,
35 | <A HREF="HPL_setran.html">HPL_setran</A>,
36 | <A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
37 | <A HREF="HPL_jumpit.html">HPL_jumpit</A>.
38 | 
39 | </BODY>
40 | </HTML>
41 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_recv.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_recv HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_recv</B> Receive a message.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_recv(</CODE>
16 | <CODE>double *</CODE>
17 | <CODE>RBUF</CODE>,
18 | <CODE>int</CODE>
19 | <CODE>RCOUNT</CODE>,
20 | <CODE>int</CODE>
21 | <CODE>SRC</CODE>,
22 | <CODE>int</CODE>
23 | <CODE>RTAG</CODE>,
24 | <CODE>MPI_Comm</CODE>
25 | <CODE>COMM</CODE>
26 | <CODE>);</CODE>
27 | 
28 | <H1>Description</H1>
29 | <B>HPL_recv</B>
30 | is a simple wrapper around  MPI_Recv.  Its  main  purpose is
31 | to  allow for some  experimentation / tuning  of this simple routine.
32 | Successful  completion  is  indicated  by  the  returned  error  code
33 | HPL_SUCCESS.  In the case of messages of length less than or equal to
34 | zero, this function returns immediately.
35 | 
36 | <H1>Arguments</H1>
37 | <PRE>
38 | RBUF    (local output)                double *
39 |         On entry, RBUF specifies the starting address of buffer to be
40 |         received.
41 | </PRE>
42 | <PRE>
43 | RCOUNT  (local input)                 int
44 |         On entry,  RCOUNT  specifies  the number  of double precision
45 |         entries in RBUF. RCOUNT must be at least zero.
46 | </PRE>
47 | <PRE>
48 | SRC     (local input)                 int
49 |         On entry, SRC  specifies the rank of the  sending  process in
50 |         the communication space defined by COMM.
51 | </PRE>
52 | <PRE>
53 | RTAG    (local input)                 int
54 |         On entry,  STAG specifies the message tag to be used for this
55 |         communication operation.
56 | </PRE>
57 | <PRE>
58 | COMM    (local input)                 MPI_Comm
59 |         The MPI communicator identifying the communication space.
60 | </PRE>
61 | 
62 | <H1>See Also</H1>
63 | <A HREF="HPL_send.html">HPL_send</A>,
64 | <A HREF="HPL_sendrecv.html">HPL_sendrecv</A>.
65 | 
66 | </BODY>
67 | </HTML>
68 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_send.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_send HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_send</B> Send a message.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>int</CODE>
15 | <CODE>HPL_send(</CODE>
16 | <CODE>double *</CODE>
17 | <CODE>SBUF</CODE>,
18 | <CODE>int</CODE>
19 | <CODE>SCOUNT</CODE>,
20 | <CODE>int</CODE>
21 | <CODE>DEST</CODE>,
22 | <CODE>int</CODE>
23 | <CODE>STAG</CODE>,
24 | <CODE>MPI_Comm</CODE>
25 | <CODE>COMM</CODE>
26 | <CODE>);</CODE>
27 | 
28 | <H1>Description</H1>
29 | <B>HPL_send</B>
30 | is a simple wrapper around  MPI_Send.  Its  main  purpose is
31 | to  allow for some  experimentation / tuning  of this simple routine.
32 | Successful  completion  is  indicated  by  the  returned  error  code
33 | MPI_SUCCESS.  In the case of messages of length less than or equal to
34 | zero, this function returns immediately.
35 | 
36 | <H1>Arguments</H1>
37 | <PRE>
38 | SBUF    (local input)                 double *
39 |         On entry, SBUF specifies the starting address of buffer to be
40 |         sent.
41 | </PRE>
42 | <PRE>
43 | SCOUNT  (local input)                 int
44 |         On entry,  SCOUNT  specifies  the number of  double precision
45 |         entries in SBUF. SCOUNT must be at least zero.
46 | </PRE>
47 | <PRE>
48 | DEST    (local input)                 int
49 |         On entry, DEST specifies the rank of the receiving process in
50 |         the communication space defined by COMM.
51 | </PRE>
52 | <PRE>
53 | STAG    (local input)                 int
54 |         On entry,  STAG specifies the message tag to be used for this
55 |         communication operation.
56 | </PRE>
57 | <PRE>
58 | COMM    (local input)                 MPI_Comm
59 |         The MPI communicator identifying the communication space.
60 | </PRE>
61 | 
62 | <H1>See Also</H1>
63 | <A HREF="HPL_recv.html">HPL_recv</A>,
64 | <A HREF="HPL_sendrecv.html">HPL_sendrecv</A>.
65 | 
66 | </BODY>
67 | </HTML>
68 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_setran.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_setran HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_setran</B> Manage the random number generator.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_setran(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>OPTION</CODE>,
18 | <CODE>int *</CODE>
19 | <CODE>IRAN</CODE>
20 | <CODE>);</CODE>
21 | 
22 | <H1>Description</H1>
23 | <B>HPL_setran</B>
24 | initializes  the random generator with the encoding of the
25 | first number X(0) in the sequence,  and the constants a and c used to
26 | compute the next element in the sequence: X(n+1) = a*X(n) + c.  X(0),
27 | a and c are stored in the static variables  irand, ias and ics.  When
28 | OPTION is 0 (resp. 1 and 2),  irand  (resp. ia and ic)  is set to the
29 | values of the input array IRAN.  When OPTION is 3, IRAN is set to the
30 | current value of irand, and irand is then incremented.
31 | 
32 | <H1>Arguments</H1>
33 | <PRE>
34 | OPTION  (local input)                 const int
35 |         On entry, OPTION  is an integer that specifies the operations
36 |         to be performed on the random generator as specified above.
37 | </PRE>
38 | <PRE>
39 | IRAN    (local input/output)          int *
40 |         On entry,  IRAN is an array of dimension 2, that contains the
41 |         16-lower and 15-higher bits of a random number.
42 | </PRE>
43 | 
44 | <H1>See Also</H1>
45 | <A HREF="HPL_ladd.html">HPL_ladd</A>,
46 | <A HREF="HPL_lmul.html">HPL_lmul</A>,
47 | <A HREF="HPL_xjumpm.html">HPL_xjumpm</A>,
48 | <A HREF="HPL_jumpit.html">HPL_jumpit</A>,
49 | <A HREF="HPL_rand.html">HPL_rand</A>.
50 | 
51 | </BODY>
52 | </HTML>
53 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_sum.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_sum HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_sum</B> Combine (sum) two buffers.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_sum(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>N</CODE>,
18 | <CODE>const void *</CODE>
19 | <CODE>IN</CODE>,
20 | <CODE>void *</CODE>
21 | <CODE>INOUT</CODE>,
22 | <CODE>const HPL_T_TYPE</CODE>
23 | <CODE>DTYPE</CODE>
24 | <CODE>);</CODE>
25 | 
26 | <H1>Description</H1>
27 | <B>HPL_sum</B>
28 | combines (sum) two buffers.
29 | 
30 | <H1>Arguments</H1>
31 | <PRE>
32 | N       (input)                       const int
33 |         On entry, N  specifies  the  length  of  the  buffers  to  be
34 |         combined. N must be at least zero.
35 | </PRE>
36 | <PRE>
37 | IN      (input)                       const void *
38 |         On entry, IN points to the input-only buffer to be combined.
39 | </PRE>
40 | <PRE>
41 | INOUT   (input/output)                void *
42 |         On entry, INOUT  points  to  the  input-output  buffer  to be
43 |         combined.  On exit,  the  entries of this array contains  the
44 |         combined results.
45 | </PRE>
46 | <PRE>
47 | DTYPE   (input)                       const HPL_T_TYPE
48 |         On entry,  DTYPE  specifies the type of the buffers operands.
49 | </PRE>
50 | 
51 | <H1>See Also</H1>
52 | <A HREF="HPL_broadcast.html">HPL_broadcast</A>,
53 | <A HREF="HPL_reduce.html">HPL_reduce</A>,
54 | <A HREF="HPL_all_reduce.html">HPL_all_reduce</A>,
55 | <A HREF="HPL_barrier.html">HPL_barrier</A>,
56 | <A HREF="HPL_min.html">HPL_min</A>,
57 | <A HREF="HPL_max.html">HPL_max</A>,
58 | <A HREF="HPL_sum.html">HPL_sum</A>.
59 | 
60 | </BODY>
61 | </HTML>
62 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_timer.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_timer HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_timer</B> Timer facility.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_timer(</CODE>
16 | <CODE>const int</CODE>
17 | <CODE>I</CODE>
18 | <CODE>);</CODE>
19 | 
20 | <H1>Description</H1>
21 | <B>HPL_timer</B>
22 | provides a  "stopwatch"  functionality  cpu/wall  timer  in
23 | seconds.  Up to  64  separate timers can be functioning at once.  The
24 | first call starts the timer,  and the second stops it.  This  routine
25 | can be disenabled  by calling  HPL_timer_disable(),  so that calls to
26 | the timer are ignored.  This feature can be used to make sure certain
27 | sections of code do not affect timings,  even  if  they call routines
28 | which have HPL_timer calls in them. HPL_timer_enable() will re-enable
29 | the  timer  functionality.  One  can retrieve  the current value of a
30 | timer by calling
31 |  
32 | t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I )
33 |  
34 | where  I  is the timer index in  [0..64).  To  initialize  the  timer
35 | functionality, one must have called HPL_timer_boot()  prior to any of
36 | the functions mentioned above.
37 | 
38 | <H1>Arguments</H1>
39 | <PRE>
40 | I       (global input)                const int
41 |         On entry, I specifies the timer to stop/start.
42 | </PRE>
43 | 
44 | <H1>See Also</H1>
45 | <A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
46 | <A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>.
47 | 
48 | </BODY>
49 | </HTML>
50 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_timer_cputime.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_timer_cputime HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_timer_cputime</B> Return the CPU time.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>double</CODE>
15 | <CODE>HPL_timer_cputime();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>HPL_timer_cputime</B>
19 | returns the cpu time.  If HPL_USE_CLOCK is defined,
20 | the  clock() function is used to return an approximation of processor
21 | time used by the program.  The value returned is the CPU time used so
22 | far as a clock_t;  to get the number of seconds used,  the result  is
23 | divided by  CLOCKS_PER_SEC.  This function is part of the  ANSI/ISO C
24 | standard library.  If  HPL_USE_TIMES is defined, the times() function
25 | is used instead.  This  function  returns  the current process times.
26 | times() returns the number of clock ticks that have elapsed since the
27 | system has been up.  Otherwise and by default,  the  standard library
28 | function getrusage() is used.
29 | 
30 | <H1>See Also</H1>
31 | <A HREF="HPL_timer_walltime.html">HPL_timer_walltime</A>,
32 | <A HREF="HPL_timer.html">HPL_timer</A>.
33 | 
34 | </BODY>
35 | </HTML>
36 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_timer_walltime.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_timer_walltime HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_timer_walltime</B> Return the elapsed (wall-clock) time.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>double</CODE>
15 | <CODE>HPL_timer_walltime();</CODE>
16 | 
17 | <H1>Description</H1>
18 | <B>HPL_timer_walltime</B>
19 | returns the elapsed (wall-clock) time.
20 | 
21 | <H1>See Also</H1>
22 | <A HREF="HPL_timer_cputime.html">HPL_timer_cputime</A>,
23 | <A HREF="HPL_timer.html">HPL_timer</A>.
24 | 
25 | </BODY>
26 | </HTML>
27 | 


--------------------------------------------------------------------------------
/hpl/www/HPL_warn.html:
--------------------------------------------------------------------------------
 1 | <HTML>
 2 | <HEAD>
 3 | <TITLE>HPL_warn HPL 2.0 Library Functions September 10, 2008</TITLE> 
 4 | </HEAD>
 5 | 
 6 | <BODY BGCOLOR="WHITE" TEXT = "#000000" LINK = "#0000ff" VLINK = "#000099"
 7 |       ALINK = "#ffff00">
 8 | 
 9 | <H1>Name</H1>
10 | <B>HPL_warn</B> displays an error message.
11 | 
12 | <H1>Synopsis</H1>
13 | <CODE>#include "hpl.h"</CODE><BR><BR>
14 | <CODE>void</CODE>
15 | <CODE>HPL_warn(</CODE>
16 | <CODE>FILE *</CODE>
17 | <CODE>STREAM</CODE>,
18 | <CODE>int</CODE>
19 | <CODE>LINE</CODE>,
20 | <CODE>const char *</CODE>
21 | <CODE>SRNAME</CODE>,
22 | <CODE>const char *</CODE>
23 | <CODE>FORM</CODE>,
24 | <CODE>...</CODE>
25 | <CODE>);</CODE>
26 | 
27 | <H1>Description</H1>
28 | <B>HPL_warn</B>
29 | displays an error message.
30 | 
31 | <H1>Arguments</H1>
32 | <PRE>
33 | STREAM  (local input)                 FILE *
34 |         On entry, STREAM specifies the output stream.
35 | </PRE>
36 | <PRE>
37 | LINE    (local input)                 int
38 |         On entry,  LINE  specifies the line  number in the file where
39 |         the  error  has  occured.  When  LINE  is not a positive line
40 |         number, it is ignored.
41 | </PRE>
42 | <PRE>
43 | SRNAME  (local input)                 const char *
44 |         On entry, SRNAME  should  be the name of the routine  calling
45 |         this error handler.
46 | </PRE>
47 | <PRE>
48 | FORM    (local input)                 const char *
49 |         On entry, FORM specifies the format, i.e., how the subsequent
50 |         arguments are converted for output.
51 | </PRE>
52 | <PRE>
53 |         (local input)                 ...
54 |         On entry,  ...  is the list of arguments to be printed within
55 |         the format string.
56 | </PRE>
57 | 
58 | <H1>Example</H1>
59 | <CODE>#include "hpl.h"</CODE><BR><BR>
60 | <PRE>
61 | int main(int argc, char *argv[])
62 | {
63 |    HPL_warn( stderr, __LINE__, __FILE__,
64 |              "Demo.\n" );
65 |    exit(0); return(0);
66 | }
67 | </PRE>
68 | 
69 | <H1>See Also</H1>
70 | <A HREF="HPL_abort.html">HPL_abort</A>,
71 | <A HREF="HPL_fprintf.html">HPL_fprintf</A>.
72 | 
73 | </BODY>
74 | </HTML>
75 | 


--------------------------------------------------------------------------------
/hpl/www/aprunner.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/aprunner.gif


--------------------------------------------------------------------------------
/hpl/www/main.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/main.jpg


--------------------------------------------------------------------------------
/hpl/www/mat2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/mat2.jpg


--------------------------------------------------------------------------------
/hpl/www/pfact.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/pfact.jpg


--------------------------------------------------------------------------------
/hpl/www/roll.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/roll.jpg


--------------------------------------------------------------------------------
/hpl/www/rollM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/rollM.jpg


--------------------------------------------------------------------------------
/hpl/www/spread.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/spread.jpg


--------------------------------------------------------------------------------
/hpl/www/spreadM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icl-utk-edu/hpcc/d2b9a19b4498fdced2860f3394c03f27714b6160/hpl/www/spreadM.jpg


--------------------------------------------------------------------------------
/include/hpccver.h:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */
 2 | 
 3 | /*
 4 | Version has four components: MAJOR, MINOR, MICRO and RELEASE.
 5 | RELEASE is a, b, c, f (alpha, beta, candidate, and final).
 6 | */
 7 | #define HPCC_VERSION_MAJOR   1
 8 | #define HPCC_VERSION_MINOR   5
 9 | #define HPCC_VERSION_MICRO   0
10 | #define HPCC_VERSION_RELEASE 'f'
11 | 


--------------------------------------------------------------------------------
/python/Makefile:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | #
 3 | 
 4 | PYTHON = python
 5 | RM = rm -rf
 6 | PYREX = $(HOME)/build/Pyrex-0.9.3/pyrexc.py 
 7 | 
 8 | all: mpi.so
 9 | 
10 | mpi.c: mpi.pyx
11 | 	$(PYTHON) $(PYREX) mpi.pyx
12 | 
13 | mpi.so: mpi.c
14 | 	$(PYTHON) setup.py build_ext --inplace
15 | 
16 | .PHONY: clean dist dist-clean
17 | 
18 | clean:
19 | 	$(RM) mpi.c mpi.so *.o *.a *.pxi
20 | 
21 | dist:
22 | 	cd ../.. ; tar cvf hpcc.tar hpcc/python/mpi.pyx
23 | 	gzip --best ../hpcc.tar
24 | 
25 | dist-clean: clean
26 | 	$(RM) build *.pyc
27 | 


--------------------------------------------------------------------------------
/python/Makefile-grig:
--------------------------------------------------------------------------------
 1 | # -*- Makefile -*-
 2 | 
 3 | PYTHON_PREFIX=/home/luszczek/install/Python-2.4.1
 4 | 
 5 | CC = /opt/mpich-gm/bin/mpicc
 6 | CC = mpicc
 7 | CFLAGS = -I$(PYTHON_PREFIX)/include/python2.4
 8 | BLACS = /opt/lib/blacsCinit_MPI-LINUX-0.a /opt/lib/blacsF77init_MPI-LINUX-0.a /opt/lib/blacs_MPI-LINUX-0.a
 9 | LDFLAGS = -L$(PYTHON_PREFIX)/lib/python2.4/config -L/opt/lib -Xlinker -export-dynamic
10 | LOADLIBES = -lpython2.4 -lpthread -ldl -lutil -lscalapack /opt/lib/lapack_LINUX.a -lf77blas -latlas $(BLACS) -lg2c -lm
11 | LOADLIBES = -lpython2.4 -lpthread -ldl -lutil -lg2c -lm
12 | 
13 | hpcc: hpcc.o mpi.o
14 | 


--------------------------------------------------------------------------------
/python/grig-data/README:
--------------------------------------------------------------------------------
1 | Tests were performed on two dual Intel Xeon EMT64 3.2 GHz computers with 2 GiBytes per processor.
2 | Inteconnect was Myrnet2000 and Lam 7.1.1 over TCP/IP was used as messaging layer.
3 | 


--------------------------------------------------------------------------------
/python/grig-data/hpccoutf.txt-1procs:
--------------------------------------------------------------------------------
 1 | ep_stream_add=2.444186
 2 | ep_stream_copy=2.114080
 3 | ep_stream_scale=2.091390
 4 | ep_stream_triad=0.876273
 5 | ep_stream_vector_size=5592405.000000
 6 | mpira_errors=0.000000
 7 | mpira_gups=0.000010
 8 | mpira_time=6770.738119
 9 | mpira_updates=67108864.000000
10 | mpira_vtime=44837.195384
11 | 


--------------------------------------------------------------------------------
/python/grig-data/hpccoutf.txt-2procs:
--------------------------------------------------------------------------------
 1 | ep_stream_add=1.161543
 2 | ep_stream_copy=1.099663
 3 | ep_stream_scale=1.066691
 4 | ep_stream_triad=0.487169
 5 | ep_stream_vector_size=5592405.000000
 6 | mpira_errors=0.000000
 7 | mpira_gups=0.000002
 8 | mpira_time=38408.924820
 9 | mpira_updates=67108864.000000
10 | mpira_vtime=45160.052983
11 | 


--------------------------------------------------------------------------------
/python/grig-data/hpccoutf.txt-3procs:
--------------------------------------------------------------------------------
 1 | ep_stream_add=1.512155
 2 | ep_stream_copy=1.310381
 3 | ep_stream_scale=1.300129
 4 | ep_stream_triad=0.604198
 5 | ep_stream_vector_size=5592405.000000
 6 | mpira_errors=0.000000
 7 | mpira_gups=0.000002
 8 | mpira_time=26591.705770
 9 | mpira_updates=44739244.000000
10 | mpira_vtime=31486.854541
11 | 


--------------------------------------------------------------------------------
/python/grig-data/hpccoutf.txt-4procs:
--------------------------------------------------------------------------------
 1 | ep_stream_add=1.079364
 2 | ep_stream_copy=1.041416
 3 | ep_stream_scale=1.013868
 4 | ep_stream_triad=0.458820
 5 | ep_stream_vector_size=5592405.000000
 6 | mpira_errors=0.000000
 7 | mpira_gups=0.000002
 8 | mpira_time=44599.139596
 9 | mpira_updates=67108864.000000
10 | mpira_vtime=48049.464034
11 | 


--------------------------------------------------------------------------------
/python/hpcc.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */
 2 | 
 3 | #include <Python.h>
 4 | 
 5 | DL_EXPORT(void) init_netlib(void); /*proto*/
 6 | 
 7 | int
 8 | main(int argc, char *argv[]) {
 9 |   PyObject *pName, *pModule, *pDict, *pFunc;
10 | 
11 |   Py_Initialize();
12 | 
13 |   if (argc < 2) {
14 |     fprintf( stderr, "%s file.py\n", argv[0] );
15 |     return 0;
16 |   }
17 | 
18 |   PyRun_SimpleString( "import sys; sys.argv = ['hpcc.py']" );
19 |   initmpi();
20 | 
21 |   /*
22 |   pName = PyString_FromString("mpi");
23 |   pModule = PyImport_Import(pName);
24 |   if (!pModule) {
25 |     PyErr_Print();
26 |     return 0;
27 |   }
28 |   */
29 | 
30 |   PyRun_SimpleString( "import sys; sys.path.append('.')" );
31 |   PyRun_SimpleString( "import hpcc; hpcc.main('hpcc.py')" );
32 |   /*
33 |   PyRun_SimpleString( "import sys; print sys.path" );
34 |   PyRun_SimpleString( "execfile(\"server.py\")" );
35 |   PyRun_SimpleFile( stderr, argv[1] );
36 |   */
37 | 
38 |   Py_Finalize();
39 |   return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/python/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <title>Sample HPCC Awards Class 2 Submission</title>
 4 | </head>
 5 | <body>
 6 | <h1>Sample HPCC Awards Class 2 Submission</h1>
 7 | <h2>Overview</h2>
 8 | <p>
 9 | The language for implementation is Python. The messaging used is
10 | MPI. The implemented tests are <b>EP-STREAM</b> and <b>RandomAccess</b>.
11 | </p>
12 | <h2>Files</h2>
13 | <p>
14 | The complete implementation of the benchmark tests is in file
15 | <a href="hpcc.py.html"><code>hpcc.py</code></a> (raw source code
16 | is <a href="hpcc.py">available as well</a>).
17 | </p>
18 | <p>
19 | There is no default (out-of-the-box) interface to MPI in Python
20 | so the submission includes one written in
21 | <a href="http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/">Pyrex</a>.
22 | It's in the file <a href="mpi.pyx"><code>mpi.pyx</code></a>
23 | </p>
24 | <p>
25 | Installation procedure should be self-explanatory after reading
26 | a <a href="Makefile"><code>make(1)</code> file</a>.
27 | </p>
28 | <h2>Results</h2>
29 | The tests were performed on two dual Intel Xeon EMT64 3.2 GHz computers with
30 | 2 GiBytes per processor. Inteconnect was Myrnet2000 and Lam 7.1.1 over TCP/IP
31 | was used as messaging layer. The output files are:
32 | <ol>
33 | <li><a href="grig-data/hpccoutf.txt-1procs">One MPI process</a></li>
34 | <li><a href="grig-data/hpccoutf.txt-2procs">Two MPI processes</a></li>
35 | <li><a href="grig-data/hpccoutf.txt-3procs">Three MPI processes</a></li>
36 | <li><a href="grig-data/hpccoutf.txt-4procs">Four MPI processes</a></li>
37 | </ol>
38 | </body>
39 | </html>


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- mode: Python; tab-width: 4; indent-tabs-mode: nil; fill-column: 79;  coding: iso-latin-1-unix -*-
 3 | #
 4 | 
 5 | import os, sys
 6 | 
 7 | from distutils.core import setup, Extension
 8 | 
 9 | module = Extension("mpi",
10 |                    libraries = ["mpi", "lam"],
11 |                    sources = ["mpi.c"])
12 | setup (name = "mpi",
13 |        version = "0.1",
14 |        description = "MPI binding",
15 |        author = "Piotr Luszczek",
16 |        author_email = "luszczek __at__ cs __dot__ utk __dot__ edu",
17 |        url = "http://icl.cs.utk.edu/hpcc/",
18 |        long_description = """MPI Python binding using numarray.""",
19 |        ext_modules = [module])
20 | 


--------------------------------------------------------------------------------
/src/extfinalize.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This routine is called right before MPI_Finalize() and allows finalization
 3 |  of external software and hardware components. It can be replaced
 4 |  at the time of installation. A sample implemenation may finialize
 5 |  proprietary computational and communication libraries.
 6 |  The parameter "extdata" points to an object of size of a pointer.
 7 |  "extdata" comes from HPCC_external_init().
 8 |  Upon success, the function should return 0.
 9 |  */
10 | int
11 | HPCC_external_finalize(int argc, char *argv[], void *extdata) {
12 |   return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/extinit.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This routine is called right after MPI_Init() and allows initialization
 3 |  of external software and hardware components. It can be replaced
 4 |  at the time of installation. A sample implemenation may initialize
 5 |  proprietary computational and communication libraries.
 6 |  The parameter "extdata" points to an object of size of a pointer.
 7 |  The function may choose to store a pointer to its internal data
 8 |  and it will be passed to the finalization routine HPCC_external_finalize().
 9 |  Upon success, the function should return 0.
10 |  */
11 | int
12 | HPCC_external_init(int argc, char *argv[], void *extdata) {
13 |   return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/src/noopt.c:
--------------------------------------------------------------------------------
 1 | /* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */
 2 | 
 3 | double
 4 | HPCC_dweps() {
 5 |   double eps, one, half;
 6 | 
 7 |   one = 1.0;
 8 |   half = one / 2.0;
 9 |   eps = one;
10 | 
11 |   while (one + eps != one)
12 |     eps *= half;
13 | 
14 |   return eps;
15 | }
16 | 
17 | float
18 | HPCC_sweps() {
19 |   float eps, one, half;
20 | 
21 |   one = 1.0f;
22 |   half = one / 2.0f;
23 |   eps = one;
24 | 
25 |   while (one + eps != one)
26 |     eps *= half;
27 | 
28 |   return eps;
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/tools/hpccoutf.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import re
 4 | import sys
 5 | 
 6 | HPCC_out = dict(re.findall(r"^(\w+)=(\d.*)$", sys.stdin.read(), re.MULTILINE))
 7 | 
 8 | Walk_Order = (
 9 |   "HPL_Tflops",                      "PTRANS_GBs",
10 |   "MPIRandomAccess_GUPs",            "MPIFFT_Gflops",
11 |   "StarSTREAM_Triad*CommWorldProcs", "StarSTREAM_Triad",
12 |   "StarDGEMM_Gflops",                "RandomlyOrderedRingBandwidth_GBytes",
13 |   "RandomlyOrderedRingLatency_usec" )
14 | 
15 | Walk_Units = (
16 |   "Tera Flops per Second",   "Tera Bytes per Second",
17 |   "Giga Updates per Second", "Tera Flops per Second",
18 |   "Tera Bytes per Second",   "Giga Bytes per Second",
19 |   "Giga Flops per Second",   "Giga Bytes per second",
20 |   "micro-seconds");
21 | 
22 | Cross_Walk = {
23 |     "HPL_Tflops"           : "G-HPL",
24 |     "PTRANS_GBs"           : "G-PTRANS",
25 |     "MPIRandomAccess_GUPs" :  "G-RandomAccess",
26 |     "MPIFFT_Gflops"        :  "G-FFT",
27 |     "StarSTREAM_Triad*CommWorldProcs"     :  "EP-STREAM Sys",
28 |     "CommWorldProcs"                      :  "MPI Processes",
29 |     # StarSTREAM_Triad * CommWorldProcs   :   EP-STREAM Sys
30 |     "StarSTREAM_Triad"                    :  "EP-STREAM Triad",
31 |     "StarDGEMM_Gflops"                    :  "EP-DGEMM",
32 |     "RandomlyOrderedRingBandwidth_GBytes" :  "RandomRing Bandwidth",
33 |     "RandomlyOrderedRingLatency_usec"     :  "RandomRing Latency",
34 | }
35 | 
36 | def show_all():
37 |   for key in sorted(HPCC_out.keys()):
38 |     print key +":", HPCC_out[key]
39 | 
40 | def show_web():
41 |   count = 0
42 |   for key in Walk_Order:
43 |     if key == "StarSTREAM_Triad*CommWorldProcs":
44 |       print key, Cross_Walk[key], float(HPCC_out["StarSTREAM_Triad"]) * int(HPCC_out["CommWorldProcs"]), Walk_Units[count]
45 |     else:
46 |       print key, Cross_Walk[key], HPCC_out[key], Walk_Units[count]
47 |     count += 1
48 | 
49 | Show_all = 1
50 | Show_web = 0
51 | 
52 | if Show_all:
53 |     show_all()
54 | 
55 | if Show_web:
56 |     show_web()
57 | 


--------------------------------------------------------------------------------
/tools/hpccxml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | from xml.dom.minidom import parse
 5 | 
 6 | class XMLNODE:
 7 |   prefix_el = "HPCC:"
 8 | 
 9 |   def __init__(self, node):
10 |     self.node = node
11 | 
12 |   def __getattr__(self, name):
13 |     prfx = self.prefix_el
14 |     if not name.startswith(prfx):
15 |       name = prfx + name
16 | 
17 |     name = name.replace("_", "-")
18 | 
19 |     for n in self.node.childNodes:
20 |       #print "N", name, n.nodeName, n.attributes, n.nodeValue, len(n.childNodes)
21 |       if len(n.childNodes) > 0 and n.nodeName == name:
22 |         return n.childNodes[0].nodeValue
23 | 
24 | class XML:
25 |   site_el = "HPCC:Site"
26 |   id_el = "HPCC:ID"
27 | 
28 |   def __init__(self, filename_or_file):
29 |     self.dom = parse(filename_or_file)
30 | 
31 |   def __getitem__(self, idx):
32 |     sidx = "%d" % idx
33 |     if idx < 1 or idx > 500:
34 |       raise ValueError, sidx
35 |     for n in self.dom.childNodes[0].childNodes:
36 |       if n.ELEMENT_NODE == n.nodeType and n.nodeName == self.site_el:
37 |         name = self.id_el
38 |         for nn in n.childNodes:
39 |           if nn.nodeName == name:
40 |             if nn.childNodes[0].nodeValue == sidx:
41 |               return XMLNODE(n)
42 | 
43 |   def min_id(self): return self.minmax_id()[0]
44 |   def max_id(self): return self.minmax_id()[1]
45 |   def minmax_id(self):
46 |     min_idx = 391
47 |     max_idx = 1
48 |     for n in self.dom.childNodes[0].childNodes:
49 |       if n.ELEMENT_NODE == n.nodeType and n.nodeName == self.site_el:
50 |         name = self.id_el
51 |         for nn in n.childNodes:
52 |           if nn.nodeName == name:
53 |             idx = int(nn.childNodes[0].nodeValue)
54 |             if idx < min_idx:
55 |               min_idx = idx
56 |             if idx > max_idx:
57 |               max_idx = idx
58 |     return min_idx, max_idx
59 | 
60 | def main(argv):
61 |   fname = argv[1]
62 |   d = XML(fname)
63 |   for idx in range(d.min_id(), d.max_id()+1):
64 |     nde = d[idx]
65 |     if nde is None: continue
66 |     print idx, nde.HPL, nde.SingleMPIProcessDGEMM, nde.HPLNodes
67 | 
68 | if "__main__" == __name__:
69 |   sys.exit(main(sys.argv))
70 | 


--------------------------------------------------------------------------------
/tools/readme.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import sys
 3 | 
 4 | if len(sys.argv) > 1:
 5 |   filename = sys.argv[1]
 6 | else:
 7 |   filename = "README.html"
 8 | 
 9 | d=open(filename).read().replace("</style>","tt {color:navy}\nh2,h3,h4 {color:#527bbd;}\nh2 {border-bottom: 2px solid silver;}\n</style>")
10 | open(filename,"w").write(re.sub("^[.]c000.*",".c000{font-family:monospace;color:navy;}", d, flags=re.MULTILINE))
11 | 


--------------------------------------------------------------------------------
/work/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC = g++
 3 | CFLAGS = -pipe -g -W -Wall
 4 | CXXFLAGS = -pipe -g -W -Wall
 5 | LDFLAGS = -g
 6 | 
 7 | mem4fft: mem4fft.o fft235.o zfft1d.o
 8 | 
 9 | clean:
10 | 	rm -f *.o mem4fft
11 | 


--------------------------------------------------------------------------------
/work/conly/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | CFLAGS = -g -pipe -W -Wall
3 | LDFLAGS = -g
4 | 
5 | mem4fft: fft235.o zfft1d.o
6 | 
7 | clean:
8 | 	rm -f *.o mem4fft
9 | 


--------------------------------------------------------------------------------
/work/conly/c.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^c'|sort -n -k 2" using 2:3 title "c", \
4 |  sqrt(x)*16
5 | 


--------------------------------------------------------------------------------
/work/conly/d.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^d'|sort -n -k 2" using 2:3 title "d", \
4 |  sqrt(x)
5 | 


--------------------------------------------------------------------------------
/work/conly/results.txt:
--------------------------------------------------------------------------------
 1 | INPUT[1048576] 1048575
 2 | OUTPUT[1048576] 1048575
 3 | w1[1048576] 127
 4 | w2[1048576] 127
 5 | ww[1048576] 1023
 6 | ww2[1048576] 1023
 7 | ww3[1048576] 1023
 8 | ww4[1048576] 1023
 9 | c[1048576] 16503
10 | d[1048576] 1023
11 | INPUT[33554432] 33554431
12 | OUTPUT[33554432] 33554431
13 | w1[33554432] 1023
14 | w2[33554432] 511
15 | ww[33554432] 4095
16 | ww2[33554432] 4095
17 | ww3[33554432] 8191
18 | ww4[33554432] 8191
19 | c[33554432] 65655
20 | d[33554432] 4095
21 | INPUT[67108864] 67108863
22 | OUTPUT[67108864] 67108863
23 | w1[67108864] 1023
24 | w2[67108864] 1023
25 | ww[67108864] 4095
26 | ww2[67108864] 8191
27 | ww3[67108864] 8191
28 | ww4[67108864] 16383
29 | c[67108864] 131191
30 | d[67108864] 8191
31 | INPUT[134217728] 134217727
32 | OUTPUT[134217728] 134217727
33 | w1[134217728] 2047
34 | w2[134217728] 1023
35 | ww[134217728] 8191
36 | ww2[134217728] 16383
37 | ww3[134217728] 8191
38 | ww4[134217728] 16383
39 | c[134217728] 131191
40 | d[134217728] 8191
41 | INPUT[268435456] 268435455
42 | OUTPUT[268435456] 268435455
43 | w1[268435456] 2047
44 | w2[268435456] 2047
45 | ww[268435456] 16383
46 | ww2[268435456] 16383
47 | ww3[268435456] 16383
48 | ww4[268435456] 16383
49 | c[268435456] 262263
50 | d[268435456] 16383
51 | INPUT[536870912] 536870911
52 | OUTPUT[536870912] 536870911
53 | w1[536870912] 4095
54 | w2[536870912] 2047
55 | ww[536870912] 16383
56 | ww2[536870912] 16383
57 | ww3[536870912] 32767
58 | ww4[536870912] 32767
59 | c[536870912] 262263
60 | d[536870912] 16383
61 | INPUT[1073741824] 1073741823
62 | OUTPUT[1073741824] 1073741823
63 | w1[1073741824] 4095
64 | w2[1073741824] 4095
65 | ww[1073741824] 16383
66 | ww2[1073741824] 32767
67 | ww3[1073741824] 32767
68 | ww4[1073741824] 65535
69 | c[1073741824] 524407
70 | d[1073741824] 32767
71 | 


--------------------------------------------------------------------------------
/work/conly/w1.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w1|sort -n -k 2" using 2:3 title "w1", \
4 |  1.1*sqrt(x)
5 | 


--------------------------------------------------------------------------------
/work/conly/w2.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:3 title "w2", \
4 |  sqrt(x)*0.375
5 | 
6 | # exp(log(x)*0.50)/sqrt(2*3*5), \
7 | # exp(log(x)*0.33)
8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \
9 | 


--------------------------------------------------------------------------------
/work/conly/ww.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww[^0-9]'|sort -n -k 2" using 2:3 title "ww", \
4 |  sqrt(x)
5 | 
6 | # exp(log(x)*0.50)/sqrt(2*3*5), \
7 | # exp(log(x)*0.33)
8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \
9 | 


--------------------------------------------------------------------------------
/work/conly/ww2.gpt:
--------------------------------------------------------------------------------
 1 | 
 2 | plot \
 3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww2'|sort -n -k 2" using 2:3 title "ww2", \
 4 |  sqrt(x)*3.9
 5 | 
 6 | # exp(log(x)*0.50)/sqrt(2*3*5), \
 7 | # exp(log(x)*0.33)
 8 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep ^w2|sort -n -k 2" using 2:($3/sqrt($2)/0.375) title "w2"#, \
 9 | # "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww2'|sort -n -k 2" using 2:(sqrt($2)/$3/6) title "ww2"#, \
10 | 


--------------------------------------------------------------------------------
/work/conly/ww3.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww3'|sort -n -k 2" using 2:3 title "ww3", \
4 |  sqrt(x)*5.4772
5 | 


--------------------------------------------------------------------------------
/work/conly/ww4.gpt:
--------------------------------------------------------------------------------
1 | 
2 | plot \
3 |  "< sed -e 's/[][]/ /g' enumerate_all.txt| grep '^ww4'|sort -n -k 2" using 2:3 title "ww4", \
4 |  x/256
5 | 


--------------------------------------------------------------------------------
/work/cpp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Preprocess C files.
 4 | 
 5 | Change c_re() c_im() under some circumstances.
 6 | """
 7 | 
 8 | import sys
 9 | 
10 | def errlog(msg):
11 |   sys.stderr.write(str(msg))
12 |   sys.stderr.write("\n")
13 | 
14 | def proc_c_xx(oldline):
15 |   l = list()
16 |   idx = -1
17 |   while 1:
18 |     idx = oldline.find("c_", idx+1)
19 |     if idx < 0:
20 |       break
21 | 
22 |     if oldline[idx+2:].startswith("re") or oldline[idx+2:].startswith("im"):
23 |       idx = oldline.find("(", idx)+1
24 |       while oldline[idx].isspace():
25 |         idx += 1
26 | 
27 |     l.append(idx)
28 | 
29 |   newline = oldline
30 | 
31 |   for idx in l:
32 |     oparen = oldline.find("(", idx)
33 |     cparen = oldline.find(")", idx)
34 |     sqobrkt = oldline.find("[", idx)
35 |     sqcbrkt = oldline.find("]", idx)
36 | 
37 |     if sqobrkt < 0 or sqobrkt > cparen: # '[' is not there or is beyond ')'
38 |       continue
39 | 
40 |     if oparen >= 0 and oparen < sqobrkt: # if '(' is there and it's before '['
41 |       continue
42 | 
43 |     newline = newline[:sqobrkt] + "->sqbracket(" + newline[sqobrkt+1:sqcbrkt] + ")" + newline[sqcbrkt+1:]
44 | 
45 |   return newline
46 | 
47 | 
48 | def cpp(fname):
49 |   for fline in open(fname):
50 |     if fline.find("c_re") >= 0 or fline.find("c_im"):
51 |       newline = proc_c_xx(fline)
52 |     else:
53 |       newline = fline
54 |     print newline,
55 | 
56 | def main(argv):
57 |   for a in argv[1:]:
58 |     print "/****", a, "****/"
59 |     cpp(a)
60 | 
61 | if "__main__" == __name__:
62 |   sys.exit(main(sys.argv))
63 | 


--------------------------------------------------------------------------------
/work/fft.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | settbls(w1, w2, w3, w4, n1, n2, m1, m2) {
 3 | w1[m1, m2]
 4 | w3[m2, n1/m1]
 5 | w2[m1,n2/m2]
 6 | w4[n1/m1,n2/m2]
 7 | n1*n2=N
 8 | n2*2*3*5 >= n1 >=n2
 9 | }
10 | 


--------------------------------------------------------------------------------
/work/fftbug.txt:
--------------------------------------------------------------------------------
 1 | On 7/2/2011 11:05 AM, Piotr Luszczek wrote:
 2 | John,
 3 | 
 4 | thank you for the report. I would definitely like to see
 5 | more details on this. My access to large machines
 6 | is limited to few architectures. Maybe that's why
 7 | I'm not seeing this on my side.
 8 | 
 9 | Well, this one was very strange.  I've been running HPCC 1.4.1 with FFTE for a while on a wide number of systems without seeing problems.  But recently on SGI UV systems I started seeing glibc heap corruption errors in StarFFT.  The same binary, MPI library, and input libraries that ran successfully on one of our clusters gave the heap errors on UV.  I can reproduce the problem with only a single MPI rank.  And using Totalview, the memory checker found the array bounds overrun.
10 | 
11 | Hopefully you'll be able to reproduce what I'm seeing under the following conditions:
12 | 
13 | src/io.c modified to run only StarFFT
14 | P = Q = 1
15 | N = 20120
16 | 
17 | Compile with -DHPCC_FFT_235 (I also use -DHPCC_MEMALLCTR -DRA_SANDIA_OPT2, not sure if the former plays a role).
18 | I use -g optimization so I don't think it's a compiler issue.
19 | 
20 | For this case when HPCC_fftw_create_plan() is called, p->ww has 262184 elements.  This routine calls HPCC_zfft1d(), from which settbls() is called with 4th argument w4 = ww+nw4, where nw4 is 41752.
21 | 
22 | In settbls() w4 is initialized in a double loop with loop variable 'is' ranging up to n2/m2-1 = 161 and 'ir' ranging up to n1/m1-1 = 2429, with ldw4 = 2430.  With ARR2D(w4,ir,is,ldw4) expanding to w4[ir+is*ldw4], the maximum array element is 2429+161*2430 = 393659, which is well past the allocated end index 262184-41752 = 220432.
23 | 
24 | Totalview dies when the index hits 220432.  When I run outside of Totalview (on UV), the job doesn't die until HPCC_fftw_destroy_plan() tries to free p->ww.  And as I said on some systems the job doesn't die at all and the results produced are correct.
25 | 
26 | thanks,
27 | John
28 | 


--------------------------------------------------------------------------------
/www/hpcchallenge.org/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all: index.html
3 | 
4 | index.html: pre_all.html Home.html poste_all.html
5 | 	cat pre_all.html Home.html poste_all.html > index.html
6 | 
7 | .PHONY: all
8 | 


--------------------------------------------------------------------------------
/www/hpcchallenge.org/poste_all.html:
--------------------------------------------------------------------------------
1 | </td>
2 | </tr>
3 | </table>
4 | </body>
5 | </html>
6 | 


--------------------------------------------------------------------------------
/www/hpcchallenge.org/pre_all.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <style>
 4 | BODY {
 5 |   font-family: Helvetica, Arial,"Lucida Grande", Verdana, Geneva, Lucida, sans-serif;
 6 |   font-size:11px;
 7 |   line-height:21px; 
 8 |   background-color:#FFFFFF;
 9 | }
10 | </style>
11 | </head>
12 | <body>
13 | <table border="0" width="755" class="Style" cellpadding="0" cellspacing="0">
14 | <tr>
15 | <td>
16 | 


--------------------------------------------------------------------------------