├── NEWS ├── examples ├── corcol │ ├── util.h │ ├── .gitignore │ └── Makefile ├── covcol │ ├── util.h │ ├── .gitignore │ └── Makefile ├── dsyrk │ ├── util.h │ ├── tile.sizes │ ├── .gitignore │ └── Makefile ├── fdtd-2d │ ├── test.c │ └── .gitignore ├── pca │ ├── data.in │ ├── decls.h │ ├── util.h │ ├── .pca.c.swp │ ├── .gitignore │ └── Makefile ├── ssymm │ ├── util.h │ ├── .gitignore │ └── Makefile ├── tmm │ ├── util.h │ ├── .gitignore │ ├── decls.h │ └── Makefile ├── apop │ ├── tile.sizes │ └── Makefile ├── fdtd-1d │ ├── tile.sizes │ ├── Makefile │ ├── data.decls │ └── .gitignore ├── heat-1d │ ├── tile.sizes │ └── Makefile ├── heat-2d │ ├── tile.sizes │ └── Makefile ├── seidel │ ├── tile.sizes │ ├── .gitignore │ ├── Makefile │ └── util.h ├── game-of-life │ ├── tile.sizes │ └── Makefile ├── jacobi-1d-imper │ ├── tile.sizes │ ├── .gitignore │ ├── util.h │ └── Makefile ├── .gitignore ├── doitgen │ ├── tile.sizes │ ├── fst │ │ ├── transformation_0.fst │ │ ├── transformation_1.fst │ │ ├── transformation_2.fst │ │ └── transformation_3.fst │ ├── decls.h │ ├── .gitignore │ └── template.c ├── gemver │ ├── tile.sizes │ ├── fst │ │ ├── transformation_0.fst │ │ ├── transformation_1.fst │ │ ├── transformation_2.fst │ │ ├── transformation_4.fst │ │ ├── transformation_3.fst │ │ ├── transformation_5.fst │ │ ├── transformation_6.fst │ │ └── transformation_7.fst │ └── .gitignore ├── heat-3d │ ├── tile.sizes │ └── Makefile ├── dsyr2k │ ├── tile.sizes │ ├── .gitignore │ ├── Makefile │ └── util.h ├── lu │ ├── tile.sizes │ └── .gitignore ├── matmul │ ├── tile.sizes │ ├── .gitignore │ └── dgemm.f ├── matmul-init │ ├── tile.sizes │ ├── decls.h │ ├── .gitignore │ ├── Makefile │ ├── util.h │ └── matmul-init.c ├── trisolv │ ├── decls.h │ ├── .gitignore │ ├── util.h │ └── Makefile ├── mvt │ ├── decls.h │ ├── Makefile │ └── .gitignore ├── 3d7pt │ ├── filename.sh │ └── filename1.sh ├── jacobi-2d-imper │ ├── Makefile │ └── .gitignore ├── lbm │ ├── fpc │ │ └── d2q9 │ │ │ └── Makefile │ ├── ldc │ │ ├── d2q9 │ │ │ └── Makefile │ │ ├── d3q19 │ │ │ └── Makefile │ │ └── d3q27 │ │ │ └── Makefile │ ├── mrt │ │ └── d2q9 │ │ │ └── Makefile │ └── poiseuille │ │ └── d2q9 │ │ └── Makefile ├── adi │ ├── .gitignore │ ├── decls.h │ ├── Makefile │ └── util.h ├── dct │ ├── .gitignore │ └── Makefile ├── strmm │ ├── .gitignore │ ├── Makefile │ └── util.h ├── strsm │ ├── .gitignore │ └── Makefile ├── tce │ ├── .gitignore │ ├── data.decls │ └── Makefile ├── advect3d │ ├── .gitignore │ ├── advect3d2.f │ ├── Makefile │ ├── test.f │ ├── advect3d1.f │ └── adi.f ├── template │ ├── .gitignore │ ├── Makefile │ ├── template.c │ └── util.h ├── .upload └── floyd │ └── Makefile ├── orio-0.1.0 ├── src │ ├── __init__.py │ ├── main │ │ ├── __init__.py │ │ ├── tspec │ │ │ └── __init__.py │ │ └── tuner │ │ │ ├── __init__.py │ │ │ └── search │ │ │ ├── __init__.py │ │ │ ├── random │ │ │ └── __init__.py │ │ │ ├── simplex │ │ │ └── __init__.py │ │ │ ├── annealing │ │ │ └── __init__.py │ │ │ └── exhaustive │ │ │ └── __init__.py │ ├── module │ │ ├── __init__.py │ │ ├── loop │ │ │ ├── __init__.py │ │ │ ├── ast_lib │ │ │ │ └── __init__.py │ │ │ └── submodule │ │ │ │ ├── __init__.py │ │ │ │ ├── tile │ │ │ │ └── __init__.py │ │ │ │ ├── arrcopy │ │ │ │ └── __init__.py │ │ │ │ ├── composite │ │ │ │ └── __init__.py │ │ │ │ ├── permut │ │ │ │ └── __init__.py │ │ │ │ ├── pragma │ │ │ │ └── __init__.py │ │ │ │ ├── regtile │ │ │ │ └── __init__.py │ │ │ │ ├── unroll │ │ │ │ └── __init__.py │ │ │ │ ├── unrolljam │ │ │ │ └── __init__.py │ │ │ │ ├── boundreplace │ │ │ │ └── __init__.py │ │ │ │ └── scalarreplace │ │ │ │ └── __init__.py │ │ ├── align │ │ │ └── __init__.py │ │ ├── polysyn │ │ │ └── __init__.py │ │ └── simplyrewrite │ │ │ └── __init__.py │ └── tool │ │ ├── __init__.py │ │ ├── ply │ │ └── __init__.py │ │ └── README ├── testsuite │ ├── axpy │ │ ├── results │ │ │ └── bgl │ │ │ │ ├── 150608.output │ │ │ │ ├── _axpy4_50000.c │ │ │ │ ├── _axpy4_5000000.c │ │ │ │ ├── second_set.tgz │ │ │ │ ├── axpy4.c │ │ │ │ ├── axpy4_10.c │ │ │ │ ├── axpy4_100.c │ │ │ │ ├── axpy4_1000.c │ │ │ │ ├── axpy4_10000.c │ │ │ │ ├── axpy4_100000.c │ │ │ │ ├── axpy4_1000000.c │ │ │ │ ├── axpy4_2000000.c │ │ │ │ ├── axpy4_50000.c │ │ │ │ ├── axpy4_500000.c │ │ │ │ ├── axpy4_5000000.c │ │ │ │ ├── 150600.output │ │ │ │ ├── 150601.output │ │ │ │ ├── 150606.output │ │ │ │ ├── 150602.output │ │ │ │ ├── 150603.output │ │ │ │ ├── 150604.output │ │ │ │ ├── 150605.output │ │ │ │ ├── 150607.output │ │ │ │ └── axpy10.bg.spec │ │ ├── axpy4.c │ │ ├── runjobs.py │ │ └── axpy.bg.spec │ ├── lu │ │ ├── pluto │ │ │ ├── clean │ │ │ ├── lu.tar.gz │ │ │ └── graphs │ │ │ │ ├── lu.png │ │ │ │ ├── lu-par.png │ │ │ │ ├── lu-par.dat │ │ │ │ ├── lu.dat │ │ │ │ ├── lu-par.plot │ │ │ │ └── lu.plot │ │ └── lu.c │ ├── jacobi │ │ ├── pluto │ │ │ ├── jacobi-1d_decl_code.h │ │ │ ├── clean │ │ │ ├── jacobi-1d_init_code.c │ │ │ ├── jacobi-1d.c │ │ │ ├── jacobi-1d_profiling.c │ │ │ ├── cmds │ │ │ └── jacobi-1d_skeleton_code.c │ │ └── jacobi-1d.c │ ├── ptune2_decl_code.h │ ├── spr │ │ ├── pluto │ │ │ ├── clean │ │ │ └── spr.c │ │ └── spr.c │ ├── symv │ │ ├── pluto │ │ │ ├── clean │ │ │ └── symv.c │ │ └── symv.c │ ├── trmm │ │ ├── pluto │ │ │ ├── clean │ │ │ ├── graphs │ │ │ │ ├── trmm.png │ │ │ │ ├── trmm-par.png │ │ │ │ ├── trmm-par.dat │ │ │ │ └── trmm.dat │ │ │ ├── trmm.c │ │ │ ├── trmm_profiling.c │ │ │ └── cmds │ │ └── trmm.c │ ├── adi │ │ ├── pluto │ │ │ ├── clean │ │ │ ├── graphs │ │ │ │ ├── adi.png │ │ │ │ ├── adi-par.png │ │ │ │ ├── adi-par.dat │ │ │ │ ├── adi.dat │ │ │ │ ├── adi-par.plot │ │ │ │ └── adi.plot │ │ │ ├── adi.c │ │ │ ├── adi_profiling.c │ │ │ └── cmds │ │ └── adi.c │ ├── fdtd │ │ ├── pluto2 │ │ │ ├── clean │ │ │ ├── a.out │ │ │ ├── graphs │ │ │ │ ├── fdtd-2d.png │ │ │ │ ├── fdtd-2d-par.png │ │ │ │ ├── fdtd-2d-par.dat │ │ │ │ ├── fdtd-2d.dat │ │ │ │ ├── fdtd-2d-par.plot │ │ │ │ └── fdtd-2d.plot │ │ │ ├── cmds │ │ │ ├── fdtd-2d.c │ │ │ └── fdtd-2d_profiling.c │ │ ├── pluto │ │ │ ├── run.1d.base │ │ │ ├── run.2d.base │ │ │ ├── run.1d.par │ │ │ ├── run.1d.seq │ │ │ ├── run.2d.par │ │ │ └── run.2d.seq │ │ ├── fdtd-1d.c │ │ └── fdtd-2d.c │ ├── gemver │ │ ├── pluto │ │ │ ├── clean │ │ │ ├── a.out │ │ │ ├── graphs │ │ │ │ ├── gemver.png │ │ │ │ ├── gemver-par.png │ │ │ │ ├── gemver-par.dat │ │ │ │ ├── gemver.dat │ │ │ │ ├── gemver-par.plot │ │ │ │ └── gemver.plot │ │ │ ├── cmds │ │ │ ├── gemver.c │ │ │ └── gemver_profiling.c │ │ └── gemver.c │ ├── seidel │ │ ├── pluto │ │ │ ├── clean │ │ │ ├── seidel.tar.gz │ │ │ ├── graphs │ │ │ │ ├── seidel.png │ │ │ │ ├── seidel-par.png │ │ │ │ ├── seidel-par.dat │ │ │ │ ├── seidel.dat │ │ │ │ ├── seidel-par.plot │ │ │ │ └── seidel.plot │ │ │ ├── seidel_profiling.c │ │ │ └── cmds │ │ └── seidel.c │ ├── simplyrewrite.py │ ├── mm │ │ ├── mm.c │ │ └── pluto │ │ │ ├── run.base │ │ │ ├── run.best │ │ │ ├── graphs │ │ │ ├── matmul-par.dat │ │ │ └── matmul.dat │ │ │ └── run.mkl │ ├── mvt │ │ ├── mvt.fused.c │ │ ├── pluto │ │ │ └── result.txt │ │ └── mvt.c │ ├── ptune2_init_code.c │ ├── hessian │ │ ├── pluto │ │ │ └── run.base │ │ └── haxpy3.c │ ├── pragma.c │ ├── petsc │ │ ├── Notes.txt │ │ ├── bratu.c │ │ ├── bratu.bg.spec │ │ ├── bratu.regtiling.bg.spec │ │ ├── results │ │ │ └── bratu.regtiling.bg.spec │ │ └── bratu.spec │ ├── trmv │ │ ├── pluto │ │ │ └── trmv.c │ │ └── trmv.c │ ├── tensor-contraction │ │ ├── pluto │ │ │ └── run.base │ │ └── 4d2d.c │ ├── README │ ├── unroll.c │ ├── regtile.c │ ├── tile.c │ ├── align.c │ ├── permut1.c │ ├── unrolljam.c │ ├── polysyn.c │ ├── polysyn_profiling.c │ ├── ptune2_skeleton_code.c │ ├── arrcopy.c │ ├── ptune.c │ ├── axpy4.c │ └── composite.c ├── doc │ └── orio_logo.jpg ├── orf ├── PKG-INFO └── orcc ├── test ├── .gitignore ├── unit_tests.in ├── Makefile ├── noloop.c ├── fm1.in ├── fm2.in ├── ind.c ├── test4.c ├── polyprod-imper.c ├── polyprod-func.c ├── tricky4.c ├── 1dloop-invar.c ├── griebl.c ├── nonconst.c ├── param.c ├── tile.c ├── triangular.c ├── multidim.c ├── test6.c ├── read.c ├── unroll.c ├── fmri.c ├── spatial.c ├── tricky1.c ├── banerjee.c ├── fw.c ├── polyprod-systol.c ├── fusion7.c ├── tricky2.c ├── constdep.c ├── seq.c ├── nodep.c ├── lu-nopivot.c ├── luq.c ├── mxv.c ├── test5.c ├── matmul-init.c ├── pascal.c ├── fusion6.c ├── fusion8.c ├── trisolv.c ├── wavefront.c ├── qr.inner.c ├── reverse-iss.c ├── example.c ├── haar1.c ├── farkas.in ├── farkas2.in ├── test8.c ├── func.c ├── fusion1.c ├── gemver1.c ├── fusion9.c ├── testclos.c ├── memmin.c ├── fusion.c ├── test1.c ├── tricky3.c ├── polynomial.c ├── lu.c ├── test2.c ├── 2d-bidirec.c ├── fusion5.c ├── mxv-seq.c ├── negparam.c ├── fusion2.c ├── multidim-seq.c ├── fusion3.c ├── fusion4.c ├── mpeg4-strip-down.c ├── costfunc.c ├── limlam.c ├── haar.c ├── matmult-imper.c ├── multi-stmt-lazy-lin-ind.c ├── jacobi-1d-mod.c ├── dep-1,1.c ├── clauss.c ├── haar.cpp ├── matmul-seq2.c ├── dfp │ ├── typed-fuse-1.c │ ├── typed-fuse-2.c │ └── scalar-distribute.c ├── mxv-seq3.c ├── jacobi-1d-periodic.c ├── fusion11.c ├── tce-2index-transform.c ├── mpeg4.c ├── shift.c ├── jacobi-2d.c ├── diamond-tile-example.c ├── jacobi-1d-periodic-even.c ├── nas.c ├── matmul.c ├── cholesky.c ├── matmul-seq.c ├── multi-loop-param.c ├── matmul-seq3.c ├── test7.c ├── advect3d1.f ├── fusion10.c ├── doitgen.c ├── tss1.c ├── intratileopt2.c ├── deep.c ├── intratileopt3.c ├── priv2.c ├── tce-3index-transform.c ├── largebounds.c ├── poisson.c ├── hard.f ├── intratileopt4.c ├── multi-stmt-periodic.c ├── dynprog_param.c ├── intratileopt1.c ├── multi-stmt-stencil-seq.c ├── heat-2d.c ├── mvt.c ├── durbin.c ├── adi.f ├── durbin_param.c ├── nonconst.in ├── mpeg4.bak.c ├── jacobi-2d-periodic.c ├── kmeans.c └── test-per-cc-obj.c ├── poly_hyperplane.png ├── m4 └── .gitignore ├── lib ├── .gitignore ├── tile_size_selection_model.h └── pet_to_pluto.h ├── tool ├── .gitignore ├── pluto_codegen_if.h ├── osl_pluto.h └── ast_transform.h ├── AUTHORS ├── getversion.sh.in ├── doc └── SubmittingPatches ├── .gitmodules ├── CONTRIBUTING.md ├── recloog └── include └── pluto └── matrix.h /NEWS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/corcol/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/covcol/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/dsyrk/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/fdtd-2d/test.c: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pca/data.in: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pca/decls.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pca/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/ssymm/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/tmm/util.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/tool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tspec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/tool/ply/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/apop/tile.sizes: -------------------------------------------------------------------------------- 1 | 1000 2 | 1000 3 | -------------------------------------------------------------------------------- /examples/fdtd-1d/tile.sizes: -------------------------------------------------------------------------------- 1 | 256 2 | 256 3 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/polysyn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/dsyrk/tile.sizes: -------------------------------------------------------------------------------- 1 | 16 2 | 32 3 | 32 4 | -------------------------------------------------------------------------------- /examples/heat-1d/tile.sizes: -------------------------------------------------------------------------------- 1 | 1024 2 | 1024 3 | -------------------------------------------------------------------------------- /examples/heat-2d/tile.sizes: -------------------------------------------------------------------------------- 1 | 64 2 | 64 3 | 64 4 | -------------------------------------------------------------------------------- /examples/seidel/tile.sizes: -------------------------------------------------------------------------------- 1 | 10 2 | 15 3 | 15 4 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/ast_lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/game-of-life/tile.sizes: -------------------------------------------------------------------------------- 1 | 64 2 | 64 3 | 64 4 | -------------------------------------------------------------------------------- /examples/jacobi-1d-imper/tile.sizes: -------------------------------------------------------------------------------- 1 | 250 2 | 1024 3 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/search/random/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/search/simplex/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/tile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/simplyrewrite/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150608.output: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/_axpy4_50000.c: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | swim 2 | *.pipepar.c 3 | pipepar 4 | -------------------------------------------------------------------------------- /examples/doitgen/tile.sizes: -------------------------------------------------------------------------------- 1 | 4 2 | 8 3 | 8 4 | 31 5 | 8 6 | -------------------------------------------------------------------------------- /examples/gemver/tile.sizes: -------------------------------------------------------------------------------- 1 | 400 2 | 16 3 | 20 4 | 16 5 | -------------------------------------------------------------------------------- /examples/heat-3d/tile.sizes: -------------------------------------------------------------------------------- 1 | 16 2 | 16 3 | 16 4 | 1000 5 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/search/annealing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/main/tuner/search/exhaustive/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/arrcopy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/composite/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/permut/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/pragma/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/regtile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/unroll/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/unrolljam/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/_axpy4_5000000.c: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/boundreplace/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /orio-0.1.0/src/module/loop/submodule/scalarreplace/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/dsyr2k/tile.sizes: -------------------------------------------------------------------------------- 1 | 64 2 | 800 3 | 8 4 | 4 5 | 4 6 | 4 7 | -------------------------------------------------------------------------------- /examples/lu/tile.sizes: -------------------------------------------------------------------------------- 1 | 16 2 | 100 3 | 16 4 | 16 5 | 2 6 | 16 7 | -------------------------------------------------------------------------------- /examples/matmul/tile.sizes: -------------------------------------------------------------------------------- 1 | 8 2 | 128 3 | 8 4 | 16 5 | 2 6 | 16 7 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/lu/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* -------------------------------------------------------------------------------- /examples/doitgen/fst/transformation_0.fst: -------------------------------------------------------------------------------- 1 | 1 2 | 3 3 | 0 1 2 4 | 256 5 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_0.fst: -------------------------------------------------------------------------------- 1 | 1 2 | 4 3 | 0 1 2 3 4 | 256 5 | -------------------------------------------------------------------------------- /examples/matmul-init/tile.sizes: -------------------------------------------------------------------------------- 1 | 8 2 | 127 3 | 8 4 | 8 5 | 2 6 | 8 7 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/jacobi/pluto/jacobi-1d_decl_code.h: -------------------------------------------------------------------------------- 1 | double a[T][N]; 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/ptune2_decl_code.h: -------------------------------------------------------------------------------- 1 | 2 | double x[N]; 3 | double y[N]; 4 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/jacobi/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _* parsetab.* a.out 3 | 4 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/spr/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* *.kernel.* gmon* -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/symv/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* *.kernel.* gmon* -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/trmm/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* a.out 3 | 4 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | *.cloog 2 | *.opt.c 3 | *.par.c 4 | *.tiled.c 5 | *.pluto.c 6 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/adi/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* a.out _* 3 | 4 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/fdtd/pluto2/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* a.out _* 3 | 4 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/gemver/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* a.out _* 3 | 4 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/seidel/pluto/clean: -------------------------------------------------------------------------------- 1 | 2 | rm *~ _annot* parsetab.* a.out 3 | 4 | -------------------------------------------------------------------------------- /test/unit_tests.in: -------------------------------------------------------------------------------- 1 | 2 2 2 | 1 1 3 | 2 2 4 | // CHECK: The rank of this matrix is 1 5 | -------------------------------------------------------------------------------- /examples/doitgen/fst/transformation_1.fst: -------------------------------------------------------------------------------- 1 | 2 2 | 2 3 | 0 1 4 | 256 5 | 1 6 | 2 7 | 256 8 | -------------------------------------------------------------------------------- /examples/doitgen/fst/transformation_2.fst: -------------------------------------------------------------------------------- 1 | 2 2 | 1 3 | 0 4 | 256 5 | 2 6 | 1 2 7 | 256 8 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_1.fst: -------------------------------------------------------------------------------- 1 | 2 2 | 3 3 | 0 1 2 4 | 256 5 | 1 6 | 3 7 | 256 8 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_2.fst: -------------------------------------------------------------------------------- 1 | 2 2 | 2 3 | 0 1 4 | 256 5 | 2 6 | 2 3 7 | 256 8 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_4.fst: -------------------------------------------------------------------------------- 1 | 2 2 | 1 3 | 0 4 | 256 5 | 3 6 | 1 2 3 7 | 256 8 | -------------------------------------------------------------------------------- /poly_hyperplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bondhugula/pluto/HEAD/poly_hyperplane.png -------------------------------------------------------------------------------- /examples/pca/.pca.c.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bondhugula/pluto/HEAD/examples/pca/.pca.c.swp -------------------------------------------------------------------------------- /m4/.gitignore: -------------------------------------------------------------------------------- 1 | libtool.m4 2 | ltoptions.m4 3 | ltsugar.m4 4 | ltversion.m4 5 | lt~obsolete.m4 6 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -rf *.out* *.cloog *.opt.c *.par.c *.tiled.c *.kernel.* *.pluto.c 3 | -------------------------------------------------------------------------------- /test/noloop.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | s = 0; 5 | #pragma endscop 6 | -------------------------------------------------------------------------------- /examples/trisolv/decls.h: -------------------------------------------------------------------------------- 1 | #define NMAX 1000 2 | 3 | static double B[NMAX][NMAX], L[NMAX][NMAX]; 4 | 5 | -------------------------------------------------------------------------------- /lib/.gitignore: -------------------------------------------------------------------------------- 1 | # just for developer's convenience 2 | CVS 3 | .cvsignore 4 | version.h 5 | libpluto.la 6 | -------------------------------------------------------------------------------- /orio-0.1.0/doc/orio_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bondhugula/pluto/HEAD/orio-0.1.0/doc/orio_logo.jpg -------------------------------------------------------------------------------- /tool/.gitignore: -------------------------------------------------------------------------------- 1 | # just for developer's convenience 2 | CVS 3 | .cvsignore 4 | version.h 5 | libpluto.la 6 | -------------------------------------------------------------------------------- /examples/mvt/decls.h: -------------------------------------------------------------------------------- 1 | #define N 8000 2 | double a[N][N], y_1[N+17], y_2[N+19], x1[N+23], z0[N+29], x2[N+31]; 3 | -------------------------------------------------------------------------------- /test/fm1.in: -------------------------------------------------------------------------------- 1 | 6 5 0 2 | 1 0 0 0 -1 3 | -1 0 0 1 0 4 | -1 1 0 0 0 5 | 0 -1 0 1 0 6 | 1 0 -1 0 0 7 | -1 0 1 0 0 8 | -------------------------------------------------------------------------------- /test/fm2.in: -------------------------------------------------------------------------------- 1 | 6 5 1 2 | 1 0 0 0 -1 3 | -1 0 0 1 0 4 | -1 1 0 0 0 5 | 0 -1 0 1 0 6 | 0 0 -1 0 0 7 | 0 0 1 0 0 8 | -------------------------------------------------------------------------------- /test/ind.c: -------------------------------------------------------------------------------- 1 | constant N; 2 | 3 | i1 = 4; 4 | 5 | for (i = 0; i < N; i++) { 6 | a[i1] = a[i - 1] + 1; 7 | } 8 | -------------------------------------------------------------------------------- /test/test4.c: -------------------------------------------------------------------------------- 1 | constant n, m; 2 | 3 | a[10] = 5; 4 | 5 | do 6 | i = 1, n a[i] = a[i + n + 1]; 7 | end do 8 | -------------------------------------------------------------------------------- /examples/doitgen/decls.h: -------------------------------------------------------------------------------- 1 | #define N 25 2 | 3 | double A[N][N][N]; 4 | double sum[N][N][N]; 5 | double C4[N][N]; 6 | -------------------------------------------------------------------------------- /examples/matmul-init/decls.h: -------------------------------------------------------------------------------- 1 | #define N 1024 2 | double A[N][N+13]; 3 | double B[N][N+23]; 4 | double C[N][N+43]; 5 | -------------------------------------------------------------------------------- /test/polyprod-imper.c: -------------------------------------------------------------------------------- 1 | CONSTANT n; 2 | 3 | DO i = 0, n DO j = 0, n C[i + j] = C[i + j] + A[i] * B[j] END DO END DO 4 | -------------------------------------------------------------------------------- /examples/doitgen/fst/transformation_3.fst: -------------------------------------------------------------------------------- 1 | 3 2 | 1 3 | 0 4 | 256 5 | 1 6 | 1 7 | 256 8 | 1 9 | 2 10 | 256 11 | -------------------------------------------------------------------------------- /examples/fdtd-1d/Makefile: -------------------------------------------------------------------------------- 1 | 2 | SRC=fdtd-1d 3 | 4 | PLCFLAGS= 5 | TILEFLAGS = #--unroll 6 | 7 | include ../common.mk 8 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_3.fst: -------------------------------------------------------------------------------- 1 | 3 2 | 2 3 | 0 1 4 | 256 5 | 1 6 | 2 7 | 0 8 | 1 9 | 3 10 | 256 11 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_5.fst: -------------------------------------------------------------------------------- 1 | 3 2 | 1 3 | 0 4 | 256 5 | 2 6 | 1 2 7 | 256 8 | 1 9 | 3 10 | 256 11 | -------------------------------------------------------------------------------- /examples/gemver/fst/transformation_6.fst: -------------------------------------------------------------------------------- 1 | 3 2 | 1 3 | 0 4 | 256 5 | 1 6 | 1 7 | 256 8 | 2 9 | 2 3 10 | 256 11 | -------------------------------------------------------------------------------- /test/polyprod-func.c: -------------------------------------------------------------------------------- 1 | CONSTANT n; 2 | 3 | DO i = 0, n DO j = 0, n C[i, j] = C[i - 1, j + 1] + A[i] * B[j] END DO END DO 4 | -------------------------------------------------------------------------------- /examples/3d7pt/filename.sh: -------------------------------------------------------------------------------- 1 | VAR=`tr '\n' '-' 3 | 4 | Contributions (sorted by first name) by: 5 | 6 | Anoop JS 7 | Aravind Acharya 8 | Arvind M 9 | Chandan G 10 | Roshan Dathathri 11 | Sven Verdoolaege 12 | Taj Khan 13 | Uday Bondhugula 14 | Vinayaka Bandishti 15 | -------------------------------------------------------------------------------- /test/memmin.c: -------------------------------------------------------------------------------- 1 | constant N; 2 | 3 | DO i = 1, N a[i] = c[i]; 4 | e[i] = d[i]; 5 | end do 6 | 7 | DO i = 1, 8 | N - 2 a[i] = 0.33 * (c[i - 1] + c[i] + c[i + 1]); 9 | END DO 10 | 11 | DO i = 2, 12 | N - 3 d[i] = 0.33 * (a[i - 1] + a[i] + a[i + 1]); 13 | END DO 14 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/unroll.c: -------------------------------------------------------------------------------- 1 | 2 | /*@ begin Loop ( 3 | for (i = 0; i <= M-1; i++) 4 | transform Unroll(ufactor=2) 5 | for (j = 0; j <= N-1; j++) 6 | S(i,j,k); 7 | ) @*/ 8 | 9 | for (i = 0; i <= M-1; i++) 10 | for (j = 0; j <= N-1; j++) 11 | S(i,j,k); 12 | 13 | /*@ end @*/ 14 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/tensor-contraction/4d2d.c: -------------------------------------------------------------------------------- 1 | for (v1=0; v1<=V-1; v1=v1+1) 2 | for (v2=0; v2<=V-1; v2=v2+1) 3 | for (o1=0; o1<=O-1; o1=o1+1) 4 | for (o2=0; o2<=O-1; o2=o2+1) 5 | for (ox=0; ox<=O-1; ox=ox+1) 6 | R[v1][v2][o1][o2]=R[v1][v2][o1][o2]+T[v1][ox][o1][o2]*A2[v2][ox]; 7 | -------------------------------------------------------------------------------- /test/fusion.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | #pragma scop 3 | for (i = 1; i < N - 2; i++) { 4 | A[i] = 0.33 * (In[i - 1] + In[i] + In[i + 1]); 5 | } 6 | 7 | for (i = 2; i < N - 3; i++) { 8 | Out[i] = 0.33 * (A[i - 1] + A[i] + A[i + 1]); 9 | // CHECK: Output written 10 | } 11 | #pragma endscop 12 | -------------------------------------------------------------------------------- /test/test1.c: -------------------------------------------------------------------------------- 1 | #pragma scop 2 | 3 | for (x = 0; x < 100; x++) { 4 | for (z = 0; z < 4; z++) { 5 | A[4 * x + z] = 1; 6 | } 7 | } 8 | 9 | for (x = 0; x < 100; x++) { 10 | for (z = 0; z < 4; z++) { 11 | B[4 * x + z] = A[4 * x + z]; 12 | } 13 | } 14 | #pragma endscop 15 | 16 | write(); 17 | -------------------------------------------------------------------------------- /test/tricky3.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | for (p = 0; p < pointc; ++p) { 5 | dist_min = 0; 6 | for (k = 0; k < clusterc; ++k) { 7 | dist = 0; 8 | kmin = 0; 9 | } 10 | for (d = 0; d < dims; ++d) { 11 | clusterv = 0; 12 | } 13 | } 14 | #pragma endscop 15 | -------------------------------------------------------------------------------- /orio-0.1.0/orf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # orf - Compile shell for Orio (Fortran source code) 4 | # 5 | 6 | #import src.main.main, sys 7 | #src.main.main.start(sys.argv, src.main.main.FORTRAN) 8 | 9 | import orio.main.main, sys 10 | orio.main.main.start(sys.argv, orio.main.main.FORTRAN) 11 | 12 | -------------------------------------------------------------------------------- /test/polynomial.c: -------------------------------------------------------------------------------- 1 | // Hyperplane i+j is parallel. 2 | // CHECK: T(S2): (1, i+j, i) 3 | #pragma scop 4 | for (i = 0; i < 2 * n; i++) { 5 | c[i] = 0; 6 | } 7 | 8 | for (i = 0; i < n; i++) { 9 | for (j = 0; j < n; j++) { 10 | c[i + j] = c[i + j] + a[i] * b[j]; 11 | } 12 | } 13 | #pragma endscop 14 | -------------------------------------------------------------------------------- /test/lu.c: -------------------------------------------------------------------------------- 1 | 2 | #pragma scop 3 | for (k = 0; k < N; k++) { 4 | for (j = k + 1; j < N; j++) { 5 | a[k][j] = a[k][j] / a[k][k]; 6 | } 7 | for (i = k + 1; i < N; i++) { 8 | for (j = k + 1; j < N; j++) { 9 | a[i][j] = a[i][j] - a[i][k] * a[k][j]; 10 | } 11 | } 12 | } 13 | #pragma endscop 14 | -------------------------------------------------------------------------------- /test/test2.c: -------------------------------------------------------------------------------- 1 | #pragma scop 2 | 3 | for (i = 0; i < n; i++) 4 | A[i] = ina[i]; 5 | 6 | for (i = 0; i < n; i++) 7 | B[i] = inb[i]; 8 | 9 | for (i = 0; i < n; i++) 10 | for (j = 0; j < n; j++) 11 | B[i] = B[i] + A[j]; 12 | 13 | for (i = 0; i < n; i++) 14 | outb[i] = B[i]; 15 | #pragma endscop 16 | -------------------------------------------------------------------------------- /test/2d-bidirec.c: -------------------------------------------------------------------------------- 1 | 2 | int main() { 3 | int i, j; 4 | long N = 1000; 5 | 6 | int a[1000][1000]; 7 | #pragma scop 8 | for (i = 1; i < N; i++) { 9 | for (j = 0; j < N; j++) { 10 | a[i][j] += a[i - 1][j] + a[i - 1][N - 1 - j]; 11 | } 12 | } 13 | #pragma endscop 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /test/fusion5.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | for (i = 0; i < N; i++) { 5 | for (j = 0; j < N; j++) { 6 | B[i][j] = 2 * A[i][j]; 7 | } 8 | } 9 | 10 | for (i = 0; i < N; i++) { 11 | for (j = 0; j < N; j++) { 12 | C[i][j] = 3 * B[i - 1][N - j]; 13 | } 14 | } 15 | #pragma endscop 16 | -------------------------------------------------------------------------------- /test/mxv-seq.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | #pragma scop 3 | for (i = 0; i < N; i++) { 4 | for (j = 0; j < N; j++) { 5 | y[i] = y[i] + a[i][j] * x[j]; 6 | } 7 | } 8 | 9 | for (i = 0; i < N; i++) { 10 | for (j = 0; j < N; j++) { 11 | z[i] = z[i] + b[i][j] * y[j]; 12 | } 13 | } 14 | #pragma endscop 15 | -------------------------------------------------------------------------------- /test/negparam.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | for (i = 4 - n; i <= n + 2; i++) 5 | for (j = 4 - n; j <= n + 2; j++) 6 | a[i][j] = a[i - 1][j] + 2; 7 | 8 | for (i = 4 - n; i <= n + 2; i++) 9 | for (j = 4 - n; j <= n + 2; j++) 10 | a[i][j] = a[i][j] + 1; 11 | 12 | #pragma endscop 13 | -------------------------------------------------------------------------------- /test/fusion2.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | for (x = 0; x < 100; x++) { 5 | for (y = 0; y < 100; y++) { 6 | A[x][y] = 1; 7 | } 8 | } 9 | 10 | for (x = 0; x < 100; x++) { 11 | for (y = 0; y < 100; y++) { 12 | B[x][y] = A[x + 1][y] + A[x][y + 1]; 13 | } 14 | } 15 | #pragma endscop 16 | -------------------------------------------------------------------------------- /test/multidim-seq.c: -------------------------------------------------------------------------------- 1 | /* pluto start (N) */ 2 | 3 | DO i = 1, 4 | N s = s + a[i] END DO 5 | 6 | DO i = 1, 7 | N s = s + a[i] END DO 8 | 9 | DO i = 1, 10 | N s = s + a[i] END DO 11 | 12 | DO i = 1, 13 | N s = s + a[i] END DO 14 | /* pluto end */ 15 | -------------------------------------------------------------------------------- /test/fusion3.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | #pragma scop 3 | for (y = 0; y < 10000; y++) { 4 | for (x = 0; x < 10000; x++) { 5 | B[y][x] = x + y; 6 | } 7 | } 8 | 9 | for (x = 0; x < 10000; x++) { 10 | for (y = 0; y < 10000; y++) { 11 | C[y][x] = B[y - 1][x - 1] + B[y][x]; 12 | } 13 | } 14 | #pragma endscop 15 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/symv/pluto/symv.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* pluto start (N,alpha,beta) */ 4 | 5 | for (j=0; j<=N-1; j++) 6 | for (i=0; i<=j-1; i++) 7 | y[i] = beta*y[i] + alpha*A[i][j]*x[j]; 8 | 9 | for (i=0; i<=N-1; i++) 10 | for (j=i; j<=N-1; j++) 11 | y[i] = beta*y[i] + alpha*A[i][j]*x[j]; 12 | 13 | /* pluto end */ 14 | -------------------------------------------------------------------------------- /getversion.sh.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # getversion.h will be automatically generated from this 3 | # Output of getversion.h goes into src/version.h 4 | 5 | githead=@abs_top_srcdir@/.git/HEAD 6 | version=@PACKAGE_VERSION@ 7 | 8 | if [ -f $githead ]; then 9 | echo `git describe --tags --always` 10 | else 11 | echo $version 12 | fi 13 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/regtile.c: -------------------------------------------------------------------------------- 1 | 2 | /*@ begin Loop( 3 | transform RegTile(loops=['i','j','k'], ufactors=[2,2,2]) 4 | for (i=0; i<=M-1; i++) 5 | { 6 | for (j=0; j<=N-1; j++) 7 | { 8 | for (k=i; k<=O-1; k++) 9 | { 10 | S(i,j,k); 11 | } 12 | } 13 | } 14 | ) @*/ 15 | 16 | /*@ end @*/ 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/fusion4.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | 3 | #pragma scop 4 | for (x = 0; x < 10000; x++) { 5 | for (y = 0; y < 10000; y++) { 6 | B[x][y] = x + y; 7 | } 8 | } 9 | 10 | for (x = 0; x < 10000; x++) { 11 | for (y = 0; y < 10000; y++) { 12 | C[x][y] = B[x - 1][y - 1] + B[x][y - 1]; 13 | } 14 | } 15 | #pragma endscop 16 | -------------------------------------------------------------------------------- /test/mpeg4-strip-down.c: -------------------------------------------------------------------------------- 1 | constant n, m; 2 | 3 | for (y_p = 1; y_p <= m - 1; y_p++) { 4 | sad[y_p][0] = sad[y_p - 1][m - 1] + curr[y_p][0] + prev[y_p][0]; 5 | 6 | for (x_p = 1; x_p <= m - 1; x_p++) { 7 | sad[y_p][x_p] = sad[y_p][x_p - 1] + curr[y_p][x_p] + prev[y_p][x_p]; 8 | } 9 | } 10 | 11 | result = sad[m - 1][m - 1]; 12 | -------------------------------------------------------------------------------- /doc/SubmittingPatches: -------------------------------------------------------------------------------- 1 | 2 | - Please follow the code style guidelines in doc/CodingStyle 3 | 4 | - Please sign your commits and generate patch against the git version, 5 | i.e., use 6 | 7 | $ git format-patch -M origin 8 | 9 | - Send patches to pluto-development@googlegroups.com or raise a pull request on Pluto's GitHub. 10 | 11 | -------------------------------------------------------------------------------- /test/costfunc.c: -------------------------------------------------------------------------------- 1 | // This validates Pluto's cost function. (i+j) leads to a constant amount of 2 | // communication/boundary misses, i.e., u = 0. 3 | // CHECK: T(S1): (i+j, i) 4 | #pragma scop 5 | 6 | for (i = 0; i < N; i++) { 7 | for (j = 1; j < N; j++) { 8 | a[i][j] = a[j][i] + a[i][j - 1]; 9 | } 10 | } 11 | 12 | #pragma endscop 13 | -------------------------------------------------------------------------------- /test/limlam.c: -------------------------------------------------------------------------------- 1 | /* Pluto doesn't support negative number in transformation matrices - so can't 2 | * do anything on this; however, Pluto+ can. */ 3 | #pragma scop 4 | 5 | for (i = 0; i < N; i++) { 6 | for (j = 0; j < N; j++) { 7 | x[i][j] = x[i][j] + y[i - 1][j]; 8 | y[i][j] = x[i][j - 1] * y[i][j]; 9 | } 10 | } 11 | #pragma endscop 12 | -------------------------------------------------------------------------------- /orio-0.1.0/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: orio 3 | Version: 0.0.1 4 | Summary: ORIO -- An Annotation-Based Performance Tuning Tool 5 | Home-page: https://trac.mcs.anl.gov/projects/performance/wiki/Orio 6 | Author: Albert Hartono 7 | Author-email: hartonoa@cse.ohio-state.edu 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/tile.c: -------------------------------------------------------------------------------- 1 | 2 | /*@ begin Loop( 3 | transform Tile(tsize=32, tindex='ii') 4 | for (i = 0; i <= m-1; i++) 5 | transform Tile(tsize=32, tindex='jj') 6 | for (j = 0; j <= n-1; j++) 7 | S(i,j); 8 | ) @*/ 9 | 10 | for (i = 0; i <= m-1; i++) 11 | for (j = 0; j <= n-1; j++) 12 | S(i,j); 13 | 14 | /*@ end @*/ 15 | 16 | 17 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/align.c: -------------------------------------------------------------------------------- 1 | 2 | void axpy_4(int n, double *y, double a1, double *x1, double a2, double *x2, 3 | double a3, double *x3, double a4, double *x4) 4 | { 5 | int i; 6 | 7 | /*@ begin Align(x1[],x2[],x3[],x4[],y[]) @*/ 8 | for (i=0; i < n; i++) 9 | y[i] = y[i] + a1*x1[i] + a2*x2[i] + a3*x3[i] + a4*x4[i]; 10 | /*@ end @*/ 11 | 12 | } 13 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/mm/pluto/run.base: -------------------------------------------------------------------------------- 1 | 2 | export OMP_NUM_THREADS=1 3 | icc -fast -I/usr/local/icc/include mm.base.c -DREPS=4 -DNCONT=10000 -DCONT=1 4 | ./a.out 5 | ./a.out 6 | rm a.out 7 | 8 | 9 | export OMP_NUM_THREADS=2 10 | icc -fast -parallel -I/usr/local/icc/include mm.base.c -DREPS=4 -DNCONT=10000 -DCONT=1 11 | ./a.out 12 | ./a.out 13 | rm a.out 14 | 15 | -------------------------------------------------------------------------------- /examples/tce/Makefile: -------------------------------------------------------------------------------- 1 | SRC=tce-4index-transform 2 | 3 | PLCFLAGS=--codegen-context=32 4 | TILEFLAGS = 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) --output $(SRC).kernel.par2d.c 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/hessian/haxpy3.c: -------------------------------------------------------------------------------- 1 | for (i=0; i<=N-1; i=i+1) 2 | for (j=0; j<=N-1; j=j+1) 3 | Y[i][j]= 4 | a0*X0[i][j]+a1*X1[i][j]+a2*X2[i][j] 5 | +2.0*b00*u0[i]*u0[j] 6 | +2.0*b11*u1[i]*u1[j] 7 | +2.0*b22*u2[i]*u2[j] 8 | +b01*(u0[i]*u1[j]+u1[i]*u0[j]) 9 | +b02*(u0[i]*u2[j]+u2[i]*u0[j]) 10 | +b12*(u1[i]*u2[j]+u2[i]*u1[j]); 11 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/mm/pluto/run.best: -------------------------------------------------------------------------------- 1 | 2 | export OMP_NUM_THREADS=1 3 | icc -fast -openmp -I/usr/local/icc/include mm.best-seq.c -DREPS=2 -DNCONT=100 -DCONT=100000 4 | ./a.out 5 | ./a.out 6 | rm a.out 7 | 8 | export OMP_NUM_THREADS=4 9 | icc -fast -openmp -I/usr/local/icc/include mm.best-par.c -DREPS=2 -DNCONT=100 -DCONT=100000 10 | ./a.out 11 | ./a.out 12 | rm a.out 13 | 14 | -------------------------------------------------------------------------------- /test/haar.c: -------------------------------------------------------------------------------- 1 | /* pluto start (n,m) */ 2 | 3 | for (i = 0; i < n; i++) { 4 | vec[0][i] = 0; 5 | } 6 | 7 | for (i = 1; i < m; i++) { 8 | for (j = 0; j < n; j++) { 9 | vec[i, j] = (vec[i - 1, 2 * j] + vec[i - 1, 2 * j + 1]); 10 | } 11 | for (j = 0; j < n; j++) { 12 | vec[i, j + n] = (vec[i - 1, 2 * j] - vec[i - 1, 2 * j + 1]); 13 | } 14 | } 15 | /* pluto end */ 16 | -------------------------------------------------------------------------------- /test/matmult-imper.c: -------------------------------------------------------------------------------- 1 | CONSTANT n; 2 | 3 | for (i = 0; i < n; i++) 4 | for (k = 0; k < n; k++) 5 | for (j = 0; j < n; j++) 6 | C[i, j] = C[i, j] + 7 | A[i, k] * 8 | B[k, j] for (i = 0; i < n; i++) for (k = 0; k < n; 9 | k++) for (j = 0; j < n; j++) 10 | D[i, j] = D[i, j] + E[i, k] * C[k, j] 11 | -------------------------------------------------------------------------------- /test/multi-stmt-lazy-lin-ind.c: -------------------------------------------------------------------------------- 1 | /* A stripped-down version of a kernel obtained from Tomofomo Yuki */ 2 | void kernel(int M, int N, int D[M][N]) { 3 | int i, j; 4 | int Dp[N][M]; 5 | 6 | #pragma scop 7 | for (i = 1; i < M; i++) { 8 | for (j = 1; j < N; j++) { 9 | D[i][j] = D[i - 1][j - 1]; 10 | Dp[i][j] = D[j][i]; 11 | } 12 | } 13 | #pragma endscop 14 | } 15 | -------------------------------------------------------------------------------- /tool/pluto_codegen_if.h: -------------------------------------------------------------------------------- 1 | #ifndef _PLUTO_CODEGEN_IF_H_ 2 | #define _PLUTO_CODEGEN_IF_H_ 3 | 4 | #include "osl/extensions/loop.h" 5 | 6 | typedef struct plutoProg PlutoProg; 7 | 8 | osl_loop_p pluto_get_vector_loop_list(const PlutoProg *prog); 9 | osl_loop_p pluto_get_parallel_loop_list(const PlutoProg *prog, 10 | int vloopsfound); 11 | #endif 12 | -------------------------------------------------------------------------------- /orio-0.1.0/orcc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # orcc - Compile shell for Orio (C/C++ source code) 4 | # 5 | 6 | import src.main.main, sys 7 | import warnings 8 | 9 | warnings.simplefilter("ignore", DeprecationWarning) 10 | 11 | 12 | src.main.main.start(sys.argv, src.main.main.C_CPP) 13 | 14 | #import orio.main.main, sys 15 | #orio.main.main.start(sys.argv, orio.main.main.C_CPP) 16 | 17 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/permut1.c: -------------------------------------------------------------------------------- 1 | 2 | /*@ begin Loop( 3 | transform Permut(seq=('k',['x'],'i','j')) 4 | for (i = 1; i <= p; i += 1) 5 | for (j = 1; j <= q; j += 1) 6 | for (k = 1; k <= r; k += 1) 7 | S(i,j,k); 8 | ) @*/ 9 | for (i = 1; i <= p; i += 1) 10 | for (j = 1; j <= q; j += 1) 11 | for (k = 1; k <= r; k += 1) 12 | S(i,j,k); 13 | /*@ end @*/ 14 | 15 | -------------------------------------------------------------------------------- /test/jacobi-1d-mod.c: -------------------------------------------------------------------------------- 1 | 2 | int u[2][1000]; 3 | 4 | int main() { 5 | int N = 1000, T = 1000; 6 | int i, t; 7 | 8 | #pragma scop 9 | for (t = 1; t <= T - 1; t++) { 10 | for (i = 1; i <= N - 2; i++) { 11 | u[t % 2][i] = 12 | u[(t - 1) % 2][i - 1] + u[(t - 1) % 2][i] + u[(t - 1) % 2][i + 1]; 13 | } 14 | } 15 | #pragma endscop 16 | 17 | return (int)u[T - 1][1]; 18 | } 19 | -------------------------------------------------------------------------------- /test/dep-1,1.c: -------------------------------------------------------------------------------- 1 | // TILE-PARALLEL: T(S1): (i, j) 2 | // TILE-PARALLEL: [Pluto] After tiling: 3 | // TILE-PARALLEL: T(S1): (i/32, j/32, i, j) 4 | // TILE-PARALLEL: [Pluto] After tile scheduling: 5 | // TILE-PARALLEL: T(S1): (i/32+j/32, j/32, i, j) 6 | #pragma scop 7 | 8 | for (i = 1; i < N; i++) { 9 | for (j = 1; j < N; j++) { 10 | a[i][j] = a[i - 1][j] + a[i][j - 1]; 11 | } 12 | } 13 | #pragma endscop 14 | -------------------------------------------------------------------------------- /orio-0.1.0/src/tool/README: -------------------------------------------------------------------------------- 1 | This directory contains programs used as tools by the annotation 2 | software. 3 | 4 | The following is a list of available tools. 5 | PLY - An implementation of lex and yacc parsing tools for Python. 6 | URL: http://www.dabeaz.com/ply 7 | ZestyParser - a Python package for writing data parsers 8 | URL: http://zestyparser.adamatlas.org/ 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /test/clauss.c: -------------------------------------------------------------------------------- 1 | #define N 10 2 | 3 | int main(void) { 4 | int i, j; 5 | int **a, **b; 6 | 7 | #pragma scop 8 | for (i = 0; i < 3 * N - 1; i++) 9 | for (j = 0; j < N; j++) { 10 | if ((i + j >= N - 1) && (i + j <= 3 * N - 2)) 11 | a[i][j] = 0; 12 | if ((i + j >= 2 * N - 1) && (i + j <= 4 * N - 2)) 13 | b[i][j] = a[i - N][j]; 14 | } 15 | #pragma endscop 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/haar.cpp: -------------------------------------------------------------------------------- 1 | constant n, sqrt2, w; 2 | 3 | for(i=0;i1) 7 | { 8 | /* w = w / 2; */ 9 | for(i=0;i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | 11 | double rtclock() 12 | { 13 | struct timezone Tzp; 14 | struct timeval Tp; 15 | int stat; 16 | stat = gettimeofday (&Tp, &Tzp); 17 | if (stat != 0) printf("Error return from gettimeofday: %d",stat); 18 | return(Tp.tv_sec + Tp.tv_usec*1.0e-6); 19 | } 20 | -------------------------------------------------------------------------------- /test/dfp/typed-fuse-2.c: -------------------------------------------------------------------------------- 1 | /* With typed fusion, these loop nests have to be distributed. The test case 2 | * looks for loop distribution when the two sccs are connected. */ 3 | 4 | // TYPED-FUSE-CHECK: T(S1): (0, i) 5 | // TYPED-FUSE-CHECK: T(S2): (1, i) 6 | // CHECK: Output written 7 | 8 | #pragma scop 9 | for (i = 2; i < N; i++) { 10 | A[i] = A[i - 1] + A[i - 2]; 11 | } 12 | 13 | for (i = 0; i < N; i++) { 14 | B[i] = A[i]; 15 | } 16 | #pragma endscop 17 | -------------------------------------------------------------------------------- /examples/advect3d/advect3d2.f: -------------------------------------------------------------------------------- 1 | constant nx, ny, nz, nbdy; 2 | 3 | do j = -2,ny+nbdy 4 | do i = -2,nx+nbdy 5 | do k = -2,nz+nbdy 6 | af(k,i,j) = 1 7 | end do 8 | end do 9 | end do 10 | 11 | 12 | do j = -2,ny+nbdy 13 | do i = -2,nx+nbdy 14 | do k = -2,nz+nbdy 15 | athird(k,i,j) = af(k+1,i,j) 16 | end do 17 | end do 18 | end do 19 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/mm/pluto/run.mkl: -------------------------------------------------------------------------------- 1 | 2 | export OMP_NUM_THREADS=1 3 | icc -fast -I/usr/local/mkl/include mm.mkl.c -DREPS=1 -DNCONT=10000 -DCONT=1 -L /usr/local/mkl/lib/em64t -lmkl_em64t -lguide -lpthread 4 | ./a.out 5 | ./a.out 6 | rm a.out 7 | 8 | export OMP_NUM_THREADS=4 9 | icc -fast -parallel -I/usr/local/mkl/include mm.mkl.c -DREPS=1 -DNCONT=10000 -DCONT=1 -L /usr/local/mkl/lib/em64t -lmkl_em64t -lguide -lpthread 10 | ./a.out 11 | ./a.out 12 | rm a.out 13 | 14 | -------------------------------------------------------------------------------- /test/fusion11.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | #pragma scop 3 | for (int _i0 = 0; (_i0 < (N - 7371)); _i0++) { 4 | for (int _i1 = 0; (_i1 <= 7238); _i1++) { 5 | yf1[_i0] = (yf1[_i0] + (yds[((7238 + _i0) - _i1)] * taps1[_i1])); 6 | } 7 | } 8 | for (int _i0 = 7238; (_i0 < (N - 14609 + 7238)); _i0++) { 9 | for (int _i1 = 0; (_i1 <= 7238); _i1++) { 10 | yf2[_i0 - 7238] = (yf2[_i0 - 7238] + (yf1[((_i0)-_i1)] * taps2[_i1])); 11 | } 12 | } 13 | #pragma endscop 14 | -------------------------------------------------------------------------------- /examples/.upload: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ssh stdsun "rm -rf ~/WWW/pluto/examples; mkdir ~/WWW/pluto/examples" 4 | 5 | scp -r ../pluto-examples/* stdsun:WWW/pluto/examples/ 6 | ssh stdsun "rm -rf ~/WWW/pluto/examples/CVS" 7 | ssh stdsun "cd ~/WWW/pluto/; tar cvf - examples | gzip > pluto-examples-0.0.1.tgz" 8 | 9 | ssh stdsun "chmod 755 ~/WWW/pluto/examples" 10 | ssh stdsun "chmod 755 ~/WWW/pluto/examples/*" 11 | ssh stdsun "find ~/WWW/pluto/examples -type f -exec chmod -R 644 {} \;" 12 | -------------------------------------------------------------------------------- /test/tce-2index-transform.c: -------------------------------------------------------------------------------- 1 | constant N; 2 | 3 | DO a = 1, N DO q = 1, N DO r = 1, N DO s = 1, N DO p = 1, 4 | N T1[a, q, r, s] = 5 | T1[a, q, r, s] + A[p, q, r, s] * 6 | C4[p, a] END DO END DO END DO END DO END DO 7 | 8 | DO a = 1, 9 | N DO b = 1, N DO r = 1, N DO s = 1, N DO q = 1, 10 | N T2[a, b, r, s] = 11 | T2[a, b, r, s] + 12 | T1[a, q, r, s] * C3[q, b] END DO END DO END DO END DO END DO 13 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/gemver/pluto/cmds: -------------------------------------------------------------------------------- 1 | 2 | 3 | *** icc *** 4 | seq: 5 | export OMP_NUM_THREADS=1; icc -fast gemver.base.c -DREPS=4 -DN=10000 6 | par: 7 | export OMP_NUM_THREADS=4; icc -fast -parallel gemver.base.c -DREPS=4 -DN=10000 8 | 9 | *** pluto+ancc *** 10 | seq: 11 | export OMP_NUM_THREADS=1; icc -fast -openmp gemver.pluto_ancc.seq.c -DREPS=4 -DN=10000 12 | par: 13 | export OMP_NUM_THREADS=4; icc -fast -openmp gemver.pluto_ancc.par.c -DREPS=4 -DN=10000 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/gemver/pluto/gemver.c: -------------------------------------------------------------------------------- 1 | 2 | /* pluto start (N,alpha,beta) */ 3 | for (i=0; i<=N-1; i++) 4 | for (j=0; j<=N-1; j++) 5 | B[i][j] = A[i][j] + u1[i]*v1[j] + u2[i]*v2[j]; 6 | 7 | for (i=0; i<=N-1; i++) 8 | for (j=0; j<=N-1; j++) 9 | x[i] = x[i] + beta* B[j][i]*y[j]; 10 | 11 | 12 | for (i=0; i<=N-1; i++) 13 | x[i] = x[i] + z[i]; 14 | 15 | for (i=0; i<=N-1; i++) 16 | for (j=0; j<=N-1; j++) 17 | w[i] = w[i] + alpha* B[i][j]*x[j]; 18 | /* pluto end */ 19 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/jacobi/pluto/jacobi-1d_profiling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define T 100 5 | #define N 100000 6 | double a[T][N]; 7 | 8 | void init_array() 9 | { 10 | int i, j; 11 | for (i=0; i 2 | #include 3 | 4 | #define N 600 5 | #define T 300 6 | double A[N][N]; 7 | 8 | void init_array() 9 | { 10 | int i, j; 11 | 12 | for (i=0; i 2 | #include 3 | 4 | #define T 400 5 | #define N 400 6 | double X[N][N+20]; 7 | double A[N][N+20]; 8 | double B[N][N+20]; 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | for (i=0; i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i = 0; i < NMAX; i++) { 15 | for (j = 0; j < NMAX; j++) { 16 | B[i][j] = i+j; 17 | L[i][j] = (i+j+3.45) *i*j*0.5; 18 | } 19 | } 20 | 21 | 22 | } 23 | 24 | 25 | double rtclock() 26 | { 27 | struct timezone Tzp; 28 | struct timeval Tp; 29 | int stat; 30 | stat = gettimeofday (&Tp, &Tzp); 31 | if (stat != 0) printf("Error return from gettimeofday: %d",stat); 32 | return(Tp.tv_sec + Tp.tv_usec*1.0e-6); 33 | } 34 | -------------------------------------------------------------------------------- /test/intratileopt3.c: -------------------------------------------------------------------------------- 1 | // 2 | // Per Pluto intra-tile loop order cost function, accesses with temporal reuse 3 | // are more important than spatial reuse. 4 | // 5 | // CHECK: T(S1): (j, i) 6 | // 7 | // For tiling and parallelization as well as for intra-tile locality, it's 8 | // profitable to have the j loop outside. 9 | // TILE-PARALLEL: T(S1): (j, i) 10 | // TILE-PARALLEL: [Pluto] After tiling: 11 | // TILE-PARALLEL: T(S1): (j/32, i/32, j, i) 12 | // TILE-PARALLEL: [pluto] After intra-tile optimize 13 | // TILE-PARALLEL: T(S1): (j/32, i/32, j, i) 14 | // 15 | #pragma scop 16 | for (i = 0; i < N; i++) { 17 | for (j = 0; j < M; j++) { 18 | d[j] += a[i][j]; 19 | } 20 | } 21 | #pragma endscop 22 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/runjobs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import re,os,shutil 4 | 5 | basefile = 'axpy4' 6 | sizes = [10, 100,1000,10000,50000,100000,500000,1000000,2000000,5000000] 7 | 8 | for s in sizes: 9 | f = open('axpy.bg.spec','r') 10 | contents = f.read() 11 | f.close() 12 | contents = re.sub('@THESIZE@',str(s),contents) 13 | fname = 'axpy' + str(s) + '.bg.spec' 14 | f = open(fname,'w') 15 | f.write(contents) 16 | f.close() 17 | srcfile = basefile + '_' + str(s) + '.c' 18 | shutil.copyfile(basefile + '.c',srcfile) 19 | # run the tests 20 | cmd = 'ancc -v -s ' + fname + ' ' + srcfile + ' > axpy' + str(s) + '.bg.output.txt 2>&1 &' 21 | print cmd 22 | os.system(cmd) 23 | -------------------------------------------------------------------------------- /examples/pca/Makefile: -------------------------------------------------------------------------------- 1 | SRC= pca 2 | 3 | PLCFLAGS = #--codegen-context=100 --unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/trmm/pluto/trmm_profiling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 700 5 | #define alpha 1 6 | double A[N][N+20]; 7 | double B[N][N+20]; 8 | 9 | void init_array() 10 | { 11 | int i,j; 12 | for (i=0; i out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/adi/pluto/cmds: -------------------------------------------------------------------------------- 1 | 2 | 3 | *** icc *** 4 | seq: 5 | export OMP_NUM_THREADS=1; icc -fast adi.base.c -DREPS=1 -DT=512 -DN=512 6 | par: 7 | export OMP_NUM_THREADS=4; icc -fast -parallel adi.base.c -DREPS=1 -DT=512 -DN=512 8 | 9 | *** pluto *** 10 | seq: 11 | export OMP_NUM_THREADS=1; icc -fast adi.pluto.seq.c -DREPS=1 -DT=512 -DN=512 12 | par: 13 | export OMP_NUM_THREADS=4; icc -fast -openmp adi.pluto.par.c -DREPS=1 -DT=512 -DN=512 14 | 15 | *** pluto+ancc *** 16 | seq: 17 | export OMP_NUM_THREADS=1; icc -fast -openmp adi.pluto_ancc.seq_par.c -DREPS=1 -DT=512 -DN=512 18 | par: 19 | export OMP_NUM_THREADS=4; icc -fast -openmp adi.pluto_ancc.seq_par.c -DREPS=1 -DT=512 -DN=512 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/trmm/pluto/cmds: -------------------------------------------------------------------------------- 1 | 2 | 3 | *** icc *** 4 | seq: 5 | export OMP_NUM_THREADS=1; icc -fast trmm.base.c -DREPS=1 -Dalpha=1 -DN=1024 6 | par: 7 | export OMP_NUM_THREADS=1; icc -fast -parallel trmm.base.c -DREPS=1 -Dalpha=1 -DN=1024 8 | 9 | *** mkl *** 10 | seq-par: 11 | export OMP_NUM_THREADS=1; icc -fast -I/usr/local/mkl/include trmm.mkl.c -L /usr/local/mkl/lib/em64t -lmkl_em64t -lguide -lpthread -DREPS=1 -DMYalpha=1 -DMYN=1024 12 | 13 | *** pluto+ancc *** 14 | seq: 15 | export OMP_NUM_THREADS=1; icc -fast -openmp trmm.pluto_ancc.seq_par.c -DREPS=1 -Dalpha=1 -DN=1024 16 | par: 17 | export OMP_NUM_THREADS=4; icc -fast -openmp trmm.pluto_ancc.seq_par.c -DREPS=1 -Dalpha=1 -DN=1024 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /examples/dsyrk/Makefile: -------------------------------------------------------------------------------- 1 | SRC= dsyrk 2 | 3 | PLCFLAGS = #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/strmm/Makefile: -------------------------------------------------------------------------------- 1 | SRC= strmm 2 | 3 | PLCFLAGS = #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/strsm/Makefile: -------------------------------------------------------------------------------- 1 | SRC= strsm 2 | 3 | PLCFLAGS = #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/trisolv/Makefile: -------------------------------------------------------------------------------- 1 | SRC= trisolv 2 | 3 | PLCFLAGS = #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/advect3d/test.f: -------------------------------------------------------------------------------- 1 | constant nx, ny, nz, nbdy; 2 | 3 | do j = 4-nbdy,ny+nbdy-3 4 | do i = 4-nbdy,nx+nbdy-3 5 | do k = 4-nbdy,nz+nbdy-2 6 | af(k,i,j) = (f60 * (a(k-1,i,j) + a(k,i,j)) + f61 7 | * (a(k-2,i,j) + a(k+1,i,j)) + f62 * (a(k-3,i,j) + a(k+2,i,j))) 8 | * thirddtbydz * uzf(k,i,j) 9 | end do 10 | end do 11 | end do 12 | 13 | 14 | do j = 4-nbdy,ny+nbdy-3 15 | do i = 4-nbdy,nx+nbdy-3 16 | do k = 4-nbdy,nz+nbdy-3 17 | athird(k,i,j) = a(k,i,j) + (al(k,i+1,j) - al(k,i,j)) 18 | + (ab(k,i,j+1) - ab(k,i,j)) + (af(k+1,i,j) - af(k,i,j)) 19 | end do 20 | end do 21 | end do 22 | -------------------------------------------------------------------------------- /examples/advect3d/advect3d1.f: -------------------------------------------------------------------------------- 1 | constant nx, ny, nz, nbdy; 2 | 3 | do j = 4-nbdy,ny+nbdy-3 4 | do i = 4-nbdy,nx+nbdy-3 5 | do k = 4-nbdy,nz+nbdy-2 6 | af(k,i,j) = (f60 * (a(k-1,i,j) + a(k,i,j)) + f61 7 | * (a(k-2,i,j) + a(k+1,i,j)) + f62 * (a(k-3,i,j) + a(k+2,i,j))) 8 | * thirddtbydz * uzf(k,i,j) 9 | end do 10 | end do 11 | end do 12 | 13 | 14 | do j = 4-nbdy,ny+nbdy-3 15 | do i = 4-nbdy,nx+nbdy-3 16 | do k = 4-nbdy,nz+nbdy-3 17 | athird(k,i,j) = a(k,i,j) + (al(k,i+1,j) - al(k,i,j)) 18 | + (ab(k,i,j+1) - ab(k,i,j)) + (af(k+1,i,j) - af(k,i,j)) 19 | end do 20 | end do 21 | end do 22 | -------------------------------------------------------------------------------- /examples/dsyr2k/Makefile: -------------------------------------------------------------------------------- 1 | SRC= dsyr2k 2 | 3 | PLCFLAGS = #--unroll --ufactor=4 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /test/tce-3index-transform.c: -------------------------------------------------------------------------------- 1 | constant N; 2 | 3 | DO a = 1, N DO q = 1, N DO r = 1, N DO s = 1, N DO p = 1, 4 | N T1[a, q, r, s] = 5 | T1[a, q, r, s] + A[p, q, r, s] * 6 | C4[p, a] END DO END DO END DO END DO END DO 7 | 8 | DO a = 1, 9 | N DO b = 1, N DO r = 1, N DO s = 1, N DO q = 1, 10 | N T2[a, b, r, s] = 11 | T2[a, b, r, s] + T1[a, q, r, s] * 12 | C3[q, b] END DO END DO END DO END DO END DO 13 | 14 | DO a = 1, 15 | N DO b = 1, N DO c = 1, N DO s = 1, N DO r = 1, 16 | N T3[a, b, c, s] = 17 | T3[a, b, c, s] + 18 | T2[a, b, r, s] * C2[r, c] END DO END DO END DO END DO END DO 19 | -------------------------------------------------------------------------------- /test/largebounds.c: -------------------------------------------------------------------------------- 1 | 2 | #pragma scop 3 | 4 | for (x = 0; x < 10000; x = x + 1) { 5 | for (y = 0; y < 10000; y = y + 1) { 6 | green[x][y] = x + y; 7 | } 8 | } 9 | 10 | for (x = 0; x < 10000; x = x + 1) { 11 | for (y = 0; y < 10000; y = y + 1) { 12 | red[x][y] = x + y; 13 | } 14 | } 15 | for (x = 0; x < 10000; x = x + 1) { 16 | for (y = 0; y < 10000; y = y + 1) { 17 | red[x][y] = diff(red[x][y], green[x][y]); 18 | } 19 | } 20 | 21 | for (x = 1; x < 4999; x += 1) { 22 | for (y = 1; y < 4999; y += 1) { 23 | red[2 * x + 1][2 * y + 1] = 24 | RBK_3x3_1(red[2 * x][2 * y], red[2 * x][2 * y + 2], 25 | red[2 * x + 2][2 * y], red[2 * x + 2][2 * y + 2]); 26 | } 27 | } 28 | 29 | #pragma endscop 30 | -------------------------------------------------------------------------------- /examples/covcol/Makefile: -------------------------------------------------------------------------------- 1 | SRC= covcol 2 | 3 | PLCFLAGS = --codegen-context=100 #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/floyd/Makefile: -------------------------------------------------------------------------------- 1 | SRC= floyd 2 | 3 | PLCFLAGS += --forceparallel=2 #--unroll 4 | TILEFLAGS += #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/ssymm/Makefile: -------------------------------------------------------------------------------- 1 | SRC= ssymm 2 | 3 | PLCFLAGS = --codegen-context=100 #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/apop/Makefile: -------------------------------------------------------------------------------- 1 | SRC=apop 2 | 3 | PLCFLAGS = --pet #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | DISTOPT_FLAGS += 6 | 7 | include ../common.mk 8 | 9 | $(SRC).par2d.c: 10 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 11 | 12 | par2d: $(SRC).par2d.c decls.h util.h 13 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 14 | 15 | par2d_test: $(SRC).par2d.c decls.h util.h 16 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 17 | 18 | ptest: tiled_test par2d_test par_test 19 | ./tiled_test 2> out_tiled 20 | export OMP_NUM_THREADS=4 21 | ./par_test 2> out_par4 22 | diff -q out_tiled out_par4 23 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 24 | diff -q out_tiled out_par2d 25 | -------------------------------------------------------------------------------- /examples/seidel/Makefile: -------------------------------------------------------------------------------- 1 | SRC= seidel 2 | 3 | PLCFLAGS = #--codegen-context=100 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(CFLAGS) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /examples/template/Makefile: -------------------------------------------------------------------------------- 1 | SRC= template 2 | 3 | PLCFLAGS = --codegen-context=100 #--unroll 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /test/poisson.c: -------------------------------------------------------------------------------- 1 | constant N1, N2, N3; 2 | 3 | for (i = 1; i < N1 - 1; i++) { 4 | for (j = 1; j < N2 - 1; j++) { 5 | for (k = 1; k < N3 - 1; k++) { 6 | A[i][j][k] = 7 | (A[i][j][k] + 8 | B[0] * (A[i][j][k + 1] + A[i][j][k - 1] + A[i][j + 1][k] + 9 | A[i][j - 1][k] + A[i + 1][j][k] + A[i - 1][j][k]) + 10 | B[1] * 11 | (A[i][j + 1][k + 1] + A[i][j + 1][k - 1] + A[i + 1][j][k + 1] + 12 | A[i + 1][j][k - 1] + A[i][j - 1][k + 1] + A[i][j - 1][k - 1] + 13 | A[i - 1][j][k + 1] + A[i - 1][j][k - 1] + A[i + 1][j + 1][k] + 14 | A[i + 1][j - 1][k] + A[i - 1][j + 1][k] + A[i - 1][j - 1][k])) * 15 | 0.50; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /test/hard.f: -------------------------------------------------------------------------------- 1 | 2 | /* pluto start (nx,ny,nz,nbdy) */ 3 | 4 | do j = 4-nbdy,ny+nbdy-3 5 | do i = 4-nbdy,nx+nbdy-3 6 | do k = 4-nbdy,nz+nbdy-2 7 | af(k,i,j) = (f60 * (a(k-1,i,j) + a(k,i,j)) + f61 8 | * (a(k-2,i,j) + a(k+1,i,j)) + f62 * (a(k-3,i,j) + a(k+2,i,j))) 9 | * thirddtbydz * uzf(k,i,j) 10 | end do 11 | end do 12 | end do 13 | 14 | 15 | do j = 4-nbdy,ny+nbdy-3 16 | do i = 4-nbdy,nx+nbdy-3 17 | do k = 4-nbdy,nz+nbdy-3 18 | athird(k,i,j) = a(k,i,j) + (al(k,i+1,j) - al(k,i,j)) 19 | + (ab(k,i,j+1) - ab(k,i,j)) + (af(k+1,i,j) - af(k,i,j)) 20 | end do 21 | end do 22 | end do 23 | /* pluto end */ 24 | -------------------------------------------------------------------------------- /test/intratileopt4.c: -------------------------------------------------------------------------------- 1 | // Outer parallelism + intra-tile locality: while the (i, j) loop order is 2 | // better for coarse-grained parallelism, the (j, i) is better for locality. 3 | // 4 | // CHECK: T(S1): (i, j) 5 | // CHECK: [pluto] After intra-tile optimize 6 | // CHECK: T(S1): (j, i) 7 | // 8 | // When tiling, only the intra-tile loop order is changed for locality here. 9 | // TILE-PARALLEL: T(S1): (i, j) 10 | // TILE-PARALLEL: [Pluto] After tiling: 11 | // TILE-PARALLEL: T(S1): (i/32, j/32, i, j) 12 | // TILE-PARALLEL: [pluto] After intra-tile optimize 13 | // TILE-PARALLEL: T(S1): (i/32, j/32, j, i) 14 | // 15 | #pragma scop 16 | for (i = 0; i < N; i++) { 17 | for (j = 0; j < M; j++) { 18 | a[i] += b[j]*c[j][i]; 19 | } 20 | } 21 | #pragma endscop 22 | -------------------------------------------------------------------------------- /examples/corcol/Makefile: -------------------------------------------------------------------------------- 1 | SRC= corcol 2 | 3 | PLCFLAGS = --codegen-context=100 #--unroll --ufactor=8 4 | TILEFLAGS = #--second-level-tile 5 | 6 | include ../common.mk 7 | 8 | $(SRC).par2d.c: 9 | $(PLC) $(SRC).c --tile --parallel --multipipe $(TILEFLAGS) $(PLCFLAGS) 10 | 11 | par2d: $(SRC).par2d.c decls.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d 13 | 14 | par2d_test: $(SRC).par2d.c decls.h util.h 15 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).par2d.c -o par2d_test -DTEST 16 | 17 | ptest: tiled_test par2d_test par_test 18 | ./tiled_test 2> out_tiled 19 | export OMP_NUM_THREADS=4 20 | ./par_test 2> out_par4 21 | diff -q out_tiled out_par4 22 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 23 | diff -q out_tiled out_par2d 24 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150600.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 2.05714e-11, '[15]' : 2.07143e-11, '[8]' : 2.34286e-11, '[9]' : 2.5e-11, '[18]' : 2.47143e-11, '[10]' : 2.47143e-11, '[11]' : 2.5e-11, '[6]' : 2.75714e-11, '[7]' : 2.3e-11, '[12]' : 2.5e-11, '[4]' : 2.51429e-11, '[5]' : 2.68571e-11, '[2]' : 2.61429e-11, '[3]' : 2.61429e-11, '[16]' : 2.51429e-11, '[17]' : 2.02857e-11, '[1]' : 2.5e-11, '[14]' : 2.5e-11, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /test/multi-stmt-periodic.c: -------------------------------------------------------------------------------- 1 | 2 | int u[2][1000]; 3 | int v[2][1000]; 4 | int w[2][1000]; 5 | 6 | // CHECK: 2i-N = 0 7 | // CHECK: [iss] Splitting S1 into 2 statements 8 | // CHECK: [iss] Splitting S2 into 2 statements 9 | int main() { 10 | int N = 1000, T = 1000; 11 | int i, t; 12 | 13 | #pragma scop 14 | for (t = 1; t <= T - 1; t++) { 15 | for (i = 0; i <= N - 1; i++) { 16 | u[t % 2][i] = v[(t - 1) % 2][i == 0 ? N - 1 : i - 1] + u[(t - 1) % 2][i] + 17 | u[(t - 1) % 2][i == N - 1 ? 0 : i + 1]; 18 | v[t % 2][i] = v[(t - 1) % 2][i == 0 ? N - 1 : i - 1] + v[(t - 1) % 2][i] + 19 | u[(t - 1) % 2][i == N - 1 ? 0 : i + 1]; 20 | } 21 | } 22 | #pragma endscop 23 | 24 | return (int)u[T - 1][1]; 25 | } 26 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/polysyn_profiling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 500 5 | double A[N][N]; 6 | double L[N][N]; 7 | double U[N][N]; 8 | 9 | void init_array() 10 | { 11 | int i, j, k; 12 | for (i=0; i out_tiled 20 | export OMP_NUM_THREADS=4 21 | ./par_test 2> out_par4 22 | diff -q out_tiled out_par4 23 | export OMP_NUM_THREADS=4; ./par2d_test 2> out_par2d 24 | diff -q out_tiled out_par2d 25 | -------------------------------------------------------------------------------- /lib/pet_to_pluto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Pluto: An automatic parallelier and locality optimizer 3 | * 4 | * Copyright (C) 2007-2012 Uday Bondhugula 5 | * 6 | * This software is available under the MIT license. Please see LICENSE 7 | * in the top-level directory for details. 8 | * 9 | * This file is part of libpluto. 10 | * 11 | */ 12 | #ifndef _PET_TO_PLUTO_H_ 13 | #define _PET_TO_PLUTO_H_ 14 | 15 | #include "pet.h" 16 | 17 | typedef struct plutoProg PlutoProg; 18 | typedef struct plutoContext PlutoContext; 19 | 20 | #if defined(__cplusplus) 21 | extern "C" { 22 | #endif 23 | 24 | PlutoProg *pet_to_pluto_prog(struct pet_scop *pscop, isl_ctx *, 25 | PlutoContext *context); 26 | 27 | #if defined(__cplusplus) 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150601.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 6.3e-11, '[15]' : 6.61429e-11, '[8]' : 1.07e-10, '[9]' : 6.48571e-11, '[18]' : 1.05286e-10, '[10]' : 1.07429e-10, '[11]' : 7.04286e-11, '[6]' : 1.06857e-10, '[7]' : 6.9e-11, '[12]' : 1.21714e-10, '[4]' : 1.05857e-10, '[5]' : 7.18571e-11, '[2]' : 1.11857e-10, '[3]' : 7.07143e-11, '[16]' : 1.06e-10, '[17]' : 6.75714e-11, '[1]' : 7.28571e-11, '[14]' : 1.05286e-10, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150606.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 1.31043e-09, '[15]' : 1.22457e-09, '[8]' : 1.85586e-09, '[9]' : 1.22557e-09, '[18]' : 1.83343e-09, '[10]' : 1.84086e-09, '[11]' : 1.22943e-09, '[6]' : 1.831e-09, '[7]' : 1.16271e-09, '[12]' : 1.85586e-09, '[4]' : 1.85886e-09, '[5]' : 1.255e-09, '[2]' : 1.25243e-09, '[3]' : 1.38086e-09, '[16]' : 1.82143e-09, '[17]' : 1.10714e-09, '[1]' : 1.34343e-09, '[14]' : 1.95186e-09, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/jacobi/pluto/cmds: -------------------------------------------------------------------------------- 1 | 2 | 3 | *** icc *** 4 | seq: 5 | export OMP_NUM_THREADS=1; icc -fast jacobi-1d.base.c -DREPS=1 -DT=100 -DN=2500000 6 | par: 7 | export OMP_NUM_THREADS=4; icc -fast -parallel jacobi-1d.base.c -DREPS=1 -DT=100 -DN=2500000 8 | 9 | *** pluto *** 10 | seq: 11 | export OMP_NUM_THREADS=1; icc -fast jacobi-1d.pluto.seq.c -DREPS=1 -DT=100 -DN=2500000 12 | par: 13 | export OMP_NUM_THREADS=4; icc -fast -openmp jacobi-1d.pluto.par.c -DREPS=1 -DT=100 -DN=2500000 14 | 15 | *** pluto+orio *** 16 | seq: 17 | export OMP_NUM_THREADS=1; icc -fast -openmp jacobi-1d.pluto_orio.seq_par.c -DREPS=1 -DT=100 -DN=2500000 18 | par: 19 | export OMP_NUM_THREADS=4; icc -fast -openmp jacobi-1d.pluto_orio.seq_par.c -DREPS=1 -DT=100 -DN=2500000 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "isl"] 2 | path = isl 3 | url = https://github.com/bondhugula/isl-for-pluto.git 4 | [submodule "cloog-isl"] 5 | path = cloog-isl 6 | url = https://github.com/bondhugula/cloog.git 7 | [submodule "piplib"] 8 | path = piplib 9 | url = https://github.com/periscop/piplib.git 10 | [submodule "polylib"] 11 | path = polylib 12 | url = https://github.com/vincentloechner/polylib.git 13 | [submodule "candl"] 14 | path = candl 15 | url = https://github.com/periscop/candl.git 16 | [submodule "clan"] 17 | path = clan 18 | url = https://github.com/periscop/clan.git 19 | [submodule "openscop"] 20 | path = openscop 21 | url = https://github.com/periscop/openscop.git 22 | [submodule "pet"] 23 | path = pet 24 | url = https://github.com/bondhugula/pet-for-pluto.git 25 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150602.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 5.06795e-06, '[15]' : 5.06789e-06, '[8]' : 5.10353e-06, '[9]' : 5.07549e-06, '[18]' : 5.08327e-06, '[10]' : 5.08326e-06, '[11]' : 5.06794e-06, '[6]' : 5.09854e-06, '[7]' : 5.10514e-06, '[12]' : 5.08327e-06, '[4]' : 5.19119e-06, '[5]' : 5.09495e-06, '[2]' : 5.12875e-06, '[3]' : 5.12565e-06, '[16]' : 5.08326e-06, '[17]' : 5.06795e-06, '[1]' : 5.12571e-06, '[14]' : 5.08326e-06, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150603.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 1.20775e-06, '[15]' : 1.20746e-06, '[8]' : 1.19008e-06, '[9]' : 1.20759e-06, '[18]' : 1.19022e-06, '[10]' : 1.19008e-06, '[11]' : 1.20772e-06, '[6]' : 1.22399e-06, '[7]' : 1.18079e-06, '[12]' : 1.19021e-06, '[4]' : 1.23573e-06, '[5]' : 1.18927e-06, '[2]' : 1.23016e-06, '[3]' : 1.19698e-06, '[16]' : 1.19009e-06, '[17]' : 1.20772e-06, '[1]' : 1.19705e-06, '[14]' : 1.19014e-06, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150604.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 1.43005e-07, '[15]' : 1.42993e-07, '[8]' : 2.21566e-07, '[9]' : 1.42986e-07, '[18]' : 2.21958e-07, '[10]' : 2.22229e-07, '[11]' : 1.43007e-07, '[6]' : 2.21581e-07, '[7]' : 1.42986e-07, '[12]' : 2.21589e-07, '[4]' : 2.21567e-07, '[5]' : 1.42993e-07, '[2]' : 2.21581e-07, '[3]' : 1.35874e-07, '[16]' : 2.21579e-07, '[17]' : 1.43025e-07, '[1]' : 1.35863e-07, '[14]' : 2.2157e-07, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150605.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 1.43659e-08, '[15]' : 1.43464e-08, '[8]' : 2.22094e-08, '[9]' : 1.43539e-08, '[18]' : 2.22281e-08, '[10]' : 2.22059e-08, '[11]' : 1.43504e-08, '[6]' : 2.31321e-08, '[7]' : 1.43527e-08, '[12]' : 2.22464e-08, '[4]' : 2.22114e-08, '[5]' : 1.41926e-08, '[2]' : 1.34196e-08, '[3]' : 1.43539e-08, '[16]' : 2.22177e-08, '[17]' : 1.43497e-08, '[1]' : 1.43497e-08, '[14]' : 2.22089e-08, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/results/bgl/150607.output: -------------------------------------------------------------------------------- 1 | {'[13]' : 2.54014e-06, '[15]' : 2.54005e-06, '[8]' : 2.54864e-06, '[9]' : 2.54015e-06, '[18]' : 2.54853e-06, '[10]' : 2.54877e-06, '[11]' : 2.54011e-06, '[6]' : 2.54854e-06, '[7]' : 2.54017e-06, '[12]' : 2.54864e-06, '[4]' : 2.54855e-06, '[5]' : 2.54008e-06, '[2]' : 2.54868e-06, '[3]' : 2.5396e-06, '[16]' : 2.54865e-06, '[17]' : 2.5401e-06, '[1]' : 2.53955e-06, '[14]' : 2.54883e-06, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0, '' : 0} 2 | -------------------------------------------------------------------------------- /test/dynprog_param.c: -------------------------------------------------------------------------------- 1 | #define length 100 2 | 3 | // dynamic programming 4 | int main() { 5 | int i, j, k; 6 | int W[length][length]; //:input; 7 | int sum_c[length][length][length]; 8 | int c[length][length]; 9 | int out[1]; //:output; 10 | 11 | for (i = 0; i <= length - 2; i++) { 12 | for (j = length - 1 - i; j <= length - 1; j++) { 13 | sum_c[length - 2 - i][j][length - 2 - i] = 0; 14 | for (k = length - 2 - i + 1; k <= j - 1; k++) { 15 | sum_c[length - 2 - i][j][k] = 16 | sum_c[length - 2 - i][j][k - 1] + c[length - 2 - i][k] + c[k][j]; 17 | } 18 | c[length - 2 - i][j] = 19 | sum_c[length - 2 - i][j][j - 1] + W[length - 2 - i][j]; 20 | } 21 | } 22 | out[0] = c[0][length - 1]; 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /test/intratileopt1.c: -------------------------------------------------------------------------------- 1 | // 2 | // While (i, j) provides spatial reuse for 'b', (i, j) provides spatial reuse 3 | // for 'a' and temporal reuse for 'c'. 4 | // CHECK: T(S1): (i, j) 5 | // CHECK: [pluto] After intra-tile optimize 6 | // CHECK: T(S1): (j, i) 7 | // CHECK: Output written 8 | // 9 | // When tiling, only the intra-tile loop 10 | // order is changed for locality here. 11 | // TILE-PARALLEL: T(S1): (i, j) 12 | // TILE-PARALLEL: [Pluto] After tiling: 13 | // TILE-PARALLEL: T(S1): (i/32, j/32, i, j) 14 | // TILE-PARALLEL: [pluto] After intra-tile optimize 15 | // TILE-PARALLEL: T(S1): (i/32, j/32, j, i) 16 | // 17 | #pragma scop 18 | for (i = 0; i < N; i++) { 19 | for (j = 0; j < M; j++) { 20 | a[j][i] = b[i][j] + c[j] + 1; 21 | } 22 | } 23 | 24 | #pragma endscop 25 | 26 | 27 | -------------------------------------------------------------------------------- /test/multi-stmt-stencil-seq.c: -------------------------------------------------------------------------------- 1 | // 2 | // Pluto with fuse these with shifts with the default heuristic (leads to a loss 3 | // of parallelism but better locality). 4 | // 5 | // CHECK: T(S1): (i, 0) 6 | // CHECK: T(S2): (i+1, 1) 7 | // CHECK: T(S3): (i+2, 2) 8 | // CHECK: T(S4): (i+3, 3) 9 | // CHECK: T(S5): (i+4, 4) 10 | #pragma scop 11 | for (i = 1; i < n - 1; i++) { 12 | a1[i] = a0[i - 1] + a0[i] + a0[i + 1]; 13 | } 14 | for (i = 2; i < n - 2; i++) { 15 | a2[i] = a1[i - 1] + a1[i] + a1[i + 1]; 16 | } 17 | for (i = 3; i < n - 3; i++) { 18 | a3[i] = a2[i - 1] + a2[i] + a2[i + 1]; 19 | } 20 | for (i = 4; i < n - 4; i++) { 21 | a4[i] = a3[i - 1] + a3[i] + a3[i + 1]; 22 | } 23 | for (i = 5; i < n - 5; i++) { 24 | a5[i] = a4[i - 1] + a4[i] + a4[i + 1]; 25 | } 26 | #pragma endscop 27 | -------------------------------------------------------------------------------- /test/heat-2d.c: -------------------------------------------------------------------------------- 1 | // CHECK: T(S1): (t, t+i, t+j) 2 | // TILE-PARALLEL: T(S1): ((t-i)/32+(t+i)/32, (t+i)/32, (t+j)/32, t, t+i, t+j) 3 | #define N 4000L 4 | #define T 1000L 5 | 6 | /* Define our arrays */ 7 | double A[2][N + 2][N + 2]; 8 | 9 | int main(int argc, char *argv[]) { 10 | #pragma scop 11 | for (int t = 0; t < T; t++) { 12 | for (int i = 1; i < N + 1; i++) { 13 | for (int j = 1; j < N + 1; j++) { 14 | A[(t + 1) % 2][i][j] = 15 | 0.125 * (A[t % 2][i + 1][j] - 2.0 * A[t % 2][i][j] + 16 | A[t % 2][i - 1][j]) + 17 | 0.125 * (A[t % 2][i][j + 1] - 2.0 * A[t % 2][i][j] + 18 | A[t % 2][i][j - 1]) + 19 | A[t % 2][i][j]; 20 | } 21 | } 22 | } 23 | #pragma endscop 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | - Pluto now uses C99 and C++11; it originally used ANSI C / C89 and so a lot of 3 | the existing code is still in that form. The old code is typically updated 4 | when it is touched. 5 | 6 | - The LLVM coding style is used. Automatic formatting for this can be done 7 | using clang-format: 8 | 9 | $ clang-format -style=LLVM 10 | 11 | A .clang-format corresponding to this config exists in its top-level directory. 12 | 13 | - Functions, structure members, and non-obvious variables should have comments. 14 | 15 | - Contributions should be submitted as pull requests on the Github repo: 16 | https://github.com/bondhugula/pluto 17 | 18 | - Documentation comments for functions should be in doxygen format (C++ style 19 | for C++ sources, and either C or C++ style for C sources). 20 | -------------------------------------------------------------------------------- /test/mvt.c: -------------------------------------------------------------------------------- 1 | // CHECK: Output written 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "decls.h" 9 | 10 | #include "util.h" 11 | 12 | int main() { 13 | int i, j, k, l, t; 14 | 15 | double t_start, t_end; 16 | 17 | init_array(); 18 | 19 | IF_TIME(t_start = rtclock()); 20 | 21 | #pragma scop 22 | for (i = 0; i < N; i++) { 23 | for (j = 0; j < N; j++) { 24 | x1[i] = x1[i] + a[i][j] * y_1[j]; 25 | } 26 | } 27 | 28 | for (i = 0; i < N; i++) { 29 | for (j = 0; j < N; j++) { 30 | x2[i] = x2[i] + a[j][i] * y_2[j]; 31 | } 32 | } 33 | #pragma endscop 34 | 35 | IF_TIME(t_end = rtclock()); 36 | IF_TIME(printf("%0.6lfs\n", t_end - t_start)); 37 | 38 | #ifdef TEST 39 | print_array(); 40 | #endif 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /examples/matmul-init/Makefile: -------------------------------------------------------------------------------- 1 | SRC=matmul-init 2 | 3 | PLCFLAGS = --codegen-context=1000 4 | TILEFLAGS = --second-level-tile #--unroll 5 | 6 | include ../common.mk 7 | 8 | pouchet: $(SRC).pouchet.c decls.h papi_defs.h util.h 9 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).pouchet.c -o pouchet $(CFLAGS) 10 | 11 | pouchet_test: $(SRC).pouchet.c decls.h papi_defs.h util.h 12 | $(CC) $(OPT_FLAGS) -openmp -lm $(SRC).pouchet.c -o pouchet_test $(CFLAGS) -DTEST 13 | 14 | 15 | ptest: opt_test tiled_test orig_test par_test pouchet_test 16 | ./orig_test 2> out_orig 17 | ./tiled_test 2> out_tiled 18 | diff -q out_orig out_tiled 19 | export OMP_NUM_THREADS=4; ./par_test 2> out_par4 20 | diff -q out_orig out_par4 21 | export OMP_NUM_THREADS=4; ./pouchet_test 2> out_pouchet4 22 | diff -q out_orig out_pouchet4 23 | @echo Success! 24 | 25 | clean: 26 | -------------------------------------------------------------------------------- /test/durbin.c: -------------------------------------------------------------------------------- 1 | #define N 100 2 | 3 | // durbin 4 | int main(int argc, char *argv[]) { 5 | int k, i; 6 | int y[N][N]; 7 | int sum[N][N]; 8 | int beta[N]; 9 | int alpha[N]; 10 | int r[N]; // input 11 | int out[N]; // output 12 | 13 | y[0][0] = r[0]; 14 | beta[0] = 1; 15 | alpha[0] = r[0]; 16 | 17 | for (k = 1; k <= N - 1; k++) { 18 | beta[k] = beta[k - 1] - alpha[k - 1] * alpha[k - 1] * beta[k - 1]; 19 | sum[0][k] = r[k]; 20 | for (i = 0; i <= k - 1; i++) { 21 | sum[i + 1][k] = sum[i][k] + r[k - i - 1] * y[i][k - 1]; 22 | } 23 | alpha[k] = -sum[k][k] * beta[k]; 24 | for (i = 0; i <= k - 1; i++) { 25 | y[i][k] = y[i][k - 1] + alpha[k] * y[k - i - 1][k - 1]; 26 | } 27 | y[k][k] = alpha[k]; 28 | } 29 | for (i = 0; i <= N - 1; i++) { 30 | out[i] = y[i][N - 1]; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/ptune2_skeleton_code.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /*@ global @*/ 6 | 7 | double rtclock() 8 | { 9 | struct timezone tzp; 10 | struct timeval tp; 11 | int stat; 12 | gettimeofday (&tp, &tzp); 13 | return (tp.tv_sec + tp.tv_usec*1.0e-6); 14 | } 15 | 16 | int main() 17 | { 18 | /*@ prologue @*/ 19 | 20 | double orio_t_start=0, orio_t_end=0, orio_t_total=0; 21 | int orio_i; 22 | 23 | for (orio_i=0; orio_i 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #define N 1000 8 | double f[N][N+13]; 9 | 10 | #include "util.h" 11 | 12 | double t_start, t_end; 13 | 14 | int main() 15 | { 16 | int i, j, k, t; 17 | 18 | init_array() ; 19 | 20 | #ifdef PERFCTR 21 | PERF_INIT; 22 | #endif 23 | 24 | IF_TIME(t_start = rtclock()); 25 | 26 | /* pluto start (N) */ 27 | #pragma scop 28 | for (i=1; i<=N-2; i++) { 29 | for (j=1; j<=N-2; j++) { 30 | f[i][j] = f[j][i] + f[i][j-1]; 31 | } 32 | } 33 | #pragma endscop 34 | /* pluto end */ 35 | 36 | IF_TIME(t_end = rtclock()); 37 | IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start)); 38 | 39 | if (fopen(".test", "r")) { 40 | print_array(); 41 | } 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/jacobi/pluto/jacobi-1d_skeleton_code.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /*@ global @*/ 6 | 7 | double rtclock() 8 | { 9 | struct timezone tzp; 10 | struct timeval tp; 11 | int stat; 12 | gettimeofday (&tp, &tzp); 13 | return (tp.tv_sec + tp.tv_usec*1.0e-6); 14 | } 15 | 16 | int main() 17 | { 18 | /*@ prologue @*/ 19 | 20 | double orio_t_start=0, orio_t_end=0, orio_t_total=0; 21 | int orio_i; 22 | 23 | for (orio_i=0; orio_i <-l option to CLooG> 12 | # 13 | # 14 | 15 | if [ $# -ne 4 ]; then 16 | echo -n -e "Usage:" 17 | echo -e "\trecloog {-f option to cloog} {-l option to CLooG} {existing C file} {new .cloog file}" 18 | exit 19 | fi 20 | 21 | cloog -cpp 1 -esp 1 -csp 1 -f $1 -l $2 $4 > .out.c 22 | 23 | NUMLINES=`wc -l $3 | awk '{print $1}'` 24 | 25 | grep -B $NUMLINES "/* Generated from" $3 | grep -v "/* Generated" > .header 26 | grep -A $NUMLINES "/* End of CLooG code" $3 > .footer 27 | 28 | cat .header .out.c .footer > $3 29 | rm -f .header .out.c .footer 30 | -------------------------------------------------------------------------------- /test/durbin_param.c: -------------------------------------------------------------------------------- 1 | int N; 2 | 3 | // durbin 4 | void main() { 5 | int k, i; 6 | // int N=100;//this parameter is to be changed 7 | int **y; 8 | int **sum; 9 | int *beta; 10 | int *alpha; 11 | int *r; 12 | int *out; 13 | 14 | y[0][0] = r[0]; 15 | beta[0] = 1; 16 | alpha[0] = r[0]; 17 | 18 | for (k = 1; k <= N - 1; k++) { 19 | beta[k] = beta[k - 1] - alpha[k - 1] * alpha[k - 1] * beta[k - 1]; 20 | sum[0][k] = r[k]; 21 | for (i = 0; i <= k - 1; i++) { 22 | sum[i + 1][k] = sum[i][k] + r[k - i - 1] * y[i][k - 1]; 23 | } 24 | alpha[k] = -sum[k][k] * beta[k]; 25 | for (i = 0; i <= k - 1; i++) { 26 | y[i][k] = y[i][k - 1] + alpha[k] * y[k - i - 1][k - 1]; 27 | } 28 | y[k][k] = alpha[k]; 29 | } 30 | for (i = 0; i <= N - 1; i++) { 31 | out[i] = y[i][N - 1]; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/arrcopy.c: -------------------------------------------------------------------------------- 1 | 2 | /*@ begin Loop( 3 | transform ArrCopy(aref='C[i][j]', dimsizes=[32,32], suffix='_copy') 4 | transform ArrCopy(aref='A[i][k]', dimsizes=[32,32]) 5 | transform ArrCopy(aref='B[k][j]', dimsizes=[32,32], dtype='double') 6 | for (iii=0; iii<=cbv_1; iii=iii+128) 7 | for (jjj=0; jjj<=N-1; jjj=jjj+512) 8 | for (kkk=0; kkk<=K-1; kkk=kkk+64) 9 | for (ii=iii; ii<=min(M-1,iii+96); ii=ii+32) 10 | for (jj=jjj; jj<=min(N-1,jjj+480); jj=jj+32) 11 | for (kk=kkk; kk<=min(K-1,kkk+32); kk=kk+32) 12 | for (i=ii; i<=min(M-1,ii+31); i=i+1) 13 | for (j=jj; j<=min(N-1,jj+31); j=j+1) 14 | for (k=kk; k<=min(K-1,kk+31); k=k+1) 15 | C[i][j]=C[i][j]+A[i][k]*B[k][j]; 16 | ) @*/ 17 | 18 | /*@ end @*/ 19 | 20 | 21 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/fdtd/pluto2/fdtd-2d_profiling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define tmax 400 5 | #define nx 800 6 | #define ny 800 7 | double ex[nx][ny+1]; 8 | double ey[nx+1][ny]; 9 | double hz[nx][ny]; 10 | 11 | void init_array() 12 | { 13 | int i, j; 14 | 15 | for (i=0; i 2 | #include 3 | 4 | #define N 5000 5 | #define alpha 1 6 | #define beta 1 7 | 8 | double A[N][N]; 9 | double B[N][N]; 10 | double x[N]; 11 | double u1[N]; 12 | double u2[N]; 13 | double v2[N]; 14 | double v1[N]; 15 | double w[N]; 16 | double y[N]; 17 | double z[N]; 18 | 19 | void init_array() 20 | { 21 | int i, j; 22 | for (i=0; i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i 2 | #include 3 | 4 | void k_classify_local(float *pointv, uint32_t pointc, uint32_t point_lda, 5 | float *clusterv, float *clusterv2, uint32_t *counterv, 6 | uint32_t clusterc, uint32_t dims) { 7 | uint32_t p, k, d, kmin; 8 | float dist_min, dist, tmp; 9 | 10 | // A conservative (accesses) version 11 | 12 | #pragma scop 13 | for (p = 0; p < pointc; ++p) { 14 | kmin = -1; 15 | dist_min = FLT_MAX; 16 | for (k = 0; k < clusterc; ++k) { 17 | dist = 0.0; 18 | for (d = 0; d < dims; ++d) { 19 | tmp = clusterv[0] - pointv[0]; 20 | dist += tmp * tmp; 21 | } 22 | dist_min = (dist < dist_min) ? dist : dist_min; 23 | kmin = (dist < dist_min) ? k : kmin; 24 | } 25 | counterv[kmin]++; 26 | for (d = 0; d < dims; ++d) { 27 | clusterv2[0] += pointv[0]; 28 | } 29 | } 30 | #pragma endscop 31 | } 32 | -------------------------------------------------------------------------------- /test/test-per-cc-obj.c: -------------------------------------------------------------------------------- 1 | // CC-OBJ-CHECK: T(S1): (0, i, k, 2) 2 | // CC-OBJ-CHECK: T(S2): (0, i, k, 3) 3 | // CC-OBJ-CHECK: T(S3): (1, i+1, 0, 0) 4 | // CC-OBJ-CHECK: T(S4): (1, i, 0, 1) 5 | // CHECK: After intra-tile optimize 6 | // CC-OBJ-CHECK: T(S1): (0, k, i, 2) 7 | // CC-OBJ-CHECK: T(S2): (0, k, i, 3) 8 | // CC-OBJ-CHECK: T(S3): (1, i+1, 0, 0) 9 | // CC-OBJ-CHECK: T(S4): (1, i, 0, 1) 10 | // CHECK: Output written 11 | 12 | /* Had per CC objective not been used, then Pluto would have found the solution 13 | * T(S3): (1, i, 0, 0), T(S4): (1, i, 0, 1), which is not parallel for 14 | * statements S3 and S4 */ 15 | #pragma scop 16 | for (k = 0; k < N; k++) { 17 | for (i = 0; i < N; i++) { 18 | A[k][i] = i; 19 | } 20 | for (i = 0; i < N; i++) { 21 | B[k][i] = A[k][i - 1] + A[N - k][i]; 22 | } 23 | } 24 | for (i = 0; i < N; i++) { 25 | C[i] = B[0][i]; 26 | } 27 | for (i = 0; i < N; i++) { 28 | D[i] = C[i - 1]; 29 | } 30 | #pragma endscop 31 | -------------------------------------------------------------------------------- /include/pluto/matrix.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * libpluto - A library version of Pluto 3 | ****************************************************************************** 4 | * 5 | * Copyright (C) 2012 Uday Bondhugula 6 | * 7 | * This software is available under the MIT license. Please see LICENSE in the 8 | * top-level directory for details. 9 | * 10 | * This file is part of libpluto. 11 | * 12 | */ 13 | #ifndef _PLUTO_MATRIX_H 14 | #define _PLUTO_MATRIX_H 15 | 16 | #include 17 | 18 | typedef struct plutoContext PlutoContext; 19 | 20 | /* A matrix */ 21 | struct pluto_matrix { 22 | /* The values */ 23 | int64_t **val; 24 | 25 | unsigned nrows; 26 | unsigned ncols; 27 | 28 | /* Pre-allocated number of rows */ 29 | int alloc_nrows; 30 | int alloc_ncols; 31 | 32 | PlutoContext *context; 33 | }; 34 | typedef struct pluto_matrix PlutoMatrix; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy4.c: -------------------------------------------------------------------------------- 1 | /*@ begin PerfTuning ( 2 | def build { 3 | arg command = 'gcc'; 4 | arg options = '-O0'; 5 | } 6 | 7 | def performance_params { 8 | param UF[] = range(1,33); 9 | } 10 | 11 | def input_params { 12 | param N[] = [1000,10000000]; 13 | } 14 | 15 | def input_vars { 16 | decl static double y[N] = 0; 17 | decl double a1 = random; 18 | decl double a2 = random; 19 | decl double a3 = random; 20 | decl double a4 = random; 21 | decl static double x1[N] = random; 22 | decl static double x2[N] = random; 23 | decl static double x3[N] = random; 24 | decl static double x4[N] = random; 25 | } 26 | ) @*/ 27 | 28 | int i; 29 | 30 | /*@ begin Loop ( 31 | transform Unroll(ufactor=UF) 32 | for (i=0; i<=N-1; i++) 33 | y[i] = y[i] + a1*x1[i] + a2*x2[i] + a3*x3[i] + a4*x4[i]; 34 | ) @*/ 35 | for (i=0; i<=N-1; i++) 36 | y[i] = y[i] + a1*x1[i] + a2*x2[i] + a3*x3[i] + a4*x4[i]; 37 | /*@ end @*/ 38 | 39 | /*@ end @*/ 40 | 41 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/petsc/bratu.bg.spec: -------------------------------------------------------------------------------- 1 | let REGS=32; 2 | 3 | spec bratu { 4 | def build 5 | { 6 | arg command = 'mpixlc '; 7 | arg options = '-O3 -qstrict -lm'; 8 | } 9 | 10 | def performance_counter 11 | { 12 | arg method = 'basic timer'; 13 | arg repetitions = 1000; 14 | } 15 | 16 | def performance_params 17 | { 18 | param Ui[] = range(1,32); 19 | param Uj[] = range(1,4); 20 | constraint reg_capacity = Ui * Uj <= REGS; 21 | } 22 | 23 | def input_params 24 | { 25 | param SIZE = 512; 26 | param lambda = 6; 27 | decl int jl = 0; 28 | decl int il = 0; 29 | decl int jh = SIZE; 30 | decl int ih = SIZE; 31 | decl double x[ih][ih] = random; 32 | decl double f[jh][jh] = 0; 33 | } 34 | 35 | def search 36 | { 37 | arg algorithm = 'Exhaustive'; 38 | arg time_limit = 10; 39 | arg run_command = 'cqsub -n 64 -t 10 -q short '; 40 | arg num_processes = 64; 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/petsc/bratu.regtiling.bg.spec: -------------------------------------------------------------------------------- 1 | let REGS=32; 2 | 3 | spec bratu { 4 | def build 5 | { 6 | arg command = 'mpixlc '; 7 | arg options = '-O3 -qstrict -lm'; 8 | } 9 | 10 | def performance_counter 11 | { 12 | arg method = 'basic timer'; 13 | arg repetitions = 1000; 14 | } 15 | 16 | def performance_params 17 | { 18 | param Ui[] = range(1,32); 19 | param Uj[] = range(1,4); 20 | constraint reg_capacity = Ui * Uj <= REGS; 21 | } 22 | 23 | def input_params 24 | { 25 | param SIZE = 512; 26 | param lambda = 6; 27 | decl int jl = 0; 28 | decl int il = 0; 29 | decl int jh = SIZE; 30 | decl int ih = SIZE; 31 | decl double x[ih][ih] = random; 32 | decl double f[jh][jh] = 0; 33 | } 34 | 35 | def search 36 | { 37 | arg algorithm = 'Exhaustive'; 38 | arg time_limit = 10; 39 | arg run_command = 'cqsub -n 64 -t 10 -q short '; 40 | arg num_processes = 64; 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/petsc/results/bratu.regtiling.bg.spec: -------------------------------------------------------------------------------- 1 | let REGS=32; 2 | 3 | spec bratu { 4 | def build 5 | { 6 | arg command = 'mpixlc '; 7 | arg options = '-O3 -qstrict -lm'; 8 | } 9 | 10 | def performance_counter 11 | { 12 | arg method = 'basic timer'; 13 | arg repetitions = 1000; 14 | } 15 | 16 | def performance_params 17 | { 18 | param Ui[] = range(1,32); 19 | param Uj[] = range(1,4); 20 | constraint reg_capacity = Ui * Uj <= REGS; 21 | } 22 | 23 | def input_params 24 | { 25 | param SIZE = 512; 26 | param lambda = 6; 27 | decl int jl = 0; 28 | decl int il = 0; 29 | decl int jh = SIZE; 30 | decl int ih = SIZE; 31 | decl double x[ih][ih] = random; 32 | decl double f[jh][jh] = 0; 33 | } 34 | 35 | def search 36 | { 37 | arg algorithm = 'Exhaustive'; 38 | arg time_limit = 10; 39 | arg run_command = 'cqsub -n 64 -t 10 -q short '; 40 | arg num_processes = 64; 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /examples/matmul-init/util.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | #include "decls.h" 7 | 8 | #ifdef PERFCTR 9 | #include 10 | #include "papi_defs.h" 11 | #endif 12 | 13 | #include "util.h" 14 | 15 | int main() 16 | { 17 | int i, j, k; 18 | register double s; 19 | double t_start, t_end; 20 | 21 | init_array(); 22 | 23 | #ifdef PERFCTR 24 | PERF_INIT; 25 | #endif 26 | 27 | IF_TIME(t_start = rtclock()); 28 | 29 | #pragma scop 30 | for(i=0; i 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "decls.h" 8 | #include "util.h" 9 | 10 | double t_start, t_end; 11 | 12 | int main() 13 | { 14 | int i, j, k, t; 15 | 16 | 17 | init_array() ; 18 | 19 | #ifdef PERFCTR 20 | PERF_INIT; 21 | #endif 22 | 23 | IF_TIME(t_start = rtclock()); 24 | 25 | /* pluto start (T,N) */ 26 | for (t=0; t<=T-1; t++) { 27 | for (i=1; i<=N-2; i++) { 28 | for (j=1; j<=N-2; j++) { 29 | a[i][j] = (a[i-1][j-1] + a[i-1][j] + a[i-1][j+1] 30 | + a[i][j-1] + a[i][j] + a[i][j+1] 31 | + a[i+1][j-1] + a[i+1][j] + a[i+1][j+1])/9.0; 32 | } 33 | } 34 | } 35 | /* pluto end */ 36 | 37 | IF_TIME(t_end = rtclock()); 38 | IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start)); 39 | 40 | #ifdef TEST 41 | print_array(); 42 | #endif 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/gemver/pluto/graphs/gemver-par.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | #set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Number of cores' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "GEMVER (Parallel) with N=10000" #font "Helvetica,20" 17 | 18 | set xtics (1, 2, 3, 4) 19 | 20 | set xrange [0.7:4.3] 21 | set yrange [0:2] 22 | 23 | set terminal postscript enhanced color eps #"Times-Roman" 22 24 | #set terminal png enhanced tiny size 450,350 25 | 26 | set output 'gemver-par.eps' 27 | #set output 'gemver-par.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'gemver-par.dat' using 1:2 title 'ICC -parallel -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | 36 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/fdtd/pluto2/graphs/fdtd-2d-par.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | #set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Number of cores' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "FDTD-2D (Parallel) with T=500, N=2000" #font "Helvetica,20" 17 | 18 | set xtics (1, 2, 3, 4) 19 | 20 | set xrange [0.7:4.3] 21 | set yrange [0:10] 22 | 23 | #set terminal postscript enhanced color eps #"Times-Roman" 22 24 | set terminal png enhanced tiny size 450,350 25 | 26 | #set output 'fdtd-2d-par.eps' 27 | set output 'fdtd-2d-par.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'fdtd-2d-par.dat' using 1:2 title 'ICC -parallel -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/gemver/pluto/graphs/gemver.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | #set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 10 | #"6x" 6, "7x" 7, "8x" 8) 11 | 12 | set xlabel 'Matrix size (N)' #font "Helvetica,16" 13 | set ylabel "GFLOPs" #font "Helvetica,16" 14 | 15 | set title "GEMVER (Sequential)" #font "Helvetica,18" 16 | 17 | set xtics ("2k" 2000, "4k" 4000, "6k" 6000, "8k" 8000, "10k" 10000) 18 | 19 | set xrange [1000:11000] 20 | set yrange [0:2] 21 | 22 | set terminal postscript enhanced color eps #"Times-Roman" 22 23 | #set terminal png enhanced tiny size 450,350 24 | 25 | set output 'gemver.eps' 26 | #set output 'gemver.png' 27 | 28 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 29 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 30 | 31 | plot 'gemver.dat' using 1:2 title 'ICC -fast' , \ 32 | '' using 1:3 title 'PLuTo 0.0.1', \ 33 | '' using 1:4 title 'PLuTo+ancc' 34 | 35 | 36 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/adi/pluto/graphs/adi-par.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | #set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Number of cores' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "ADI -- Alternate Direction Implicit (Parallel) with T=512, N=1024" #font "Helvetica,20" 17 | 18 | set xtics (1, 2, 3, 4) 19 | 20 | set xrange [0.7:4.3] 21 | set yrange [0:4.5] 22 | 23 | set terminal postscript enhanced color eps #"Times-Roman" 22 24 | #set terminal png enhanced tiny size 450,350 25 | 26 | set output 'adi-par.eps' 27 | #set output 'adi-par.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'adi-par.dat' using 1:2 title 'ICC -parallel -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/fdtd/pluto2/graphs/fdtd-2d.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Matrix size (N)' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "FDTD-2D (Sequential) with T=500" #font "Helvetica,18" 17 | 18 | set xtics (125, 250, 500, "1k" 1000, "2k" 2000, "4k" 4000) 19 | 20 | set xrange [100:5000] 21 | set yrange [0:3.5] 22 | 23 | #set terminal postscript enhanced color eps #"Times-Roman" 22 24 | set terminal png enhanced tiny size 450,350 25 | 26 | #set output 'fdtd-2d.eps' 27 | set output 'fdtd-2d.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'fdtd-2d.dat' using 1:2 title 'ICC -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | 36 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/petsc/bratu.spec: -------------------------------------------------------------------------------- 1 | let REGS=32; 2 | 3 | spec bratu { 4 | def build 5 | { 6 | arg command = '/Users/norris/software/openmpi-1.1.4/bin/mpicc '; 7 | arg options = '-O3 -lm'; 8 | } 9 | 10 | def performance_counter 11 | { 12 | arg method = 'basic timer'; 13 | arg repetitions = 10; 14 | } 15 | 16 | def performance_params 17 | { 18 | param Ui[] = range(1,16); 19 | param Uj[] = range(1,2); 20 | constraint reg_capacity = Ui * Uj <= REGS; 21 | } 22 | 23 | def input_params 24 | { 25 | param SIZE = 512; 26 | param lambda = 6; 27 | decl int jl = 0; 28 | decl int il = 0; 29 | decl int jh = SIZE; 30 | decl int ih = SIZE; 31 | decl double x[ih][ih] = random; 32 | decl double f[jh][jh] = 0; 33 | } 34 | 35 | def search 36 | { 37 | arg algorithm = 'Exhaustive'; 38 | arg time_limit = 10; 39 | arg run_command = '/Users/norris/software/openmpi-1.1.4/bin/mpirun -np 4'; 40 | arg num_processes = 4; 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/seidel/pluto/graphs/seidel-par.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | #set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Number of cores' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "3-D Gauss Seidel (Parallel) with T=1024, N=1024" #font "Helvetica,20" 17 | 18 | set xtics (1, 2, 3, 4) 19 | 20 | set xrange [0.7:4.3] 21 | set yrange [0:10] 22 | 23 | #set terminal postscript enhanced color eps #"Times-Roman" 22 24 | set terminal png enhanced tiny size 450,350 25 | 26 | #set output 'seidel-par.eps' 27 | set output 'seidel-par.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'seidel-par.dat' using 1:2 title 'ICC -parallel -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/adi/pluto/graphs/adi.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Matrix size (N)' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "ADI -- Alternate Direction Implicit (Sequential) with T=512" #font "Helvetica,18" 17 | 18 | set xtics (256, 512, "1k" 1024, "2k" 2048, "4k" 4096) 19 | 20 | set xrange [100:5000] 21 | set yrange [0:1.5] 22 | 23 | set terminal postscript enhanced color eps #"Times-Roman" 22 24 | #set terminal png enhanced tiny size 450,350 25 | 26 | set output 'adi.eps' 27 | #set output 'adi.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'adi.dat' using 1:2 title 'ICC -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | 36 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/axpy/axpy.bg.spec: -------------------------------------------------------------------------------- 1 | 2 | spec align_unroll { 3 | def build 4 | { 5 | arg command = 'mpixlc '; 6 | arg options = '-O3 -qstrict -lm'; 7 | } 8 | 9 | def performance_counter 10 | { 11 | arg method = 'basic timer'; 12 | arg repetitions = 10000; 13 | } 14 | 15 | def performance_params 16 | { 17 | param UF[] = range(1,20); 18 | } 19 | 20 | def input_params 21 | { 22 | param SIZE = @THESIZE@; 23 | decl int n = SIZE; 24 | decl double a1 = 4.232; 25 | decl double a2 = 134531.2145; 26 | decl double a3 = 43.24141; 27 | decl double a4 = 241.24314; 28 | decl double x1[n] = random; 29 | decl double x2[n] = random; 30 | decl double x3[n] = random; 31 | decl double x4[n] = random; 32 | decl double y[n] = 0; 33 | } 34 | 35 | def search 36 | { 37 | arg algorithm = 'Exhaustive'; 38 | arg time_limit = 20; 39 | arg run_command = 'cqsub -n 64 -t 10 -q short '; 40 | arg num_processes = 64; 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /orio-0.1.0/testsuite/seidel/pluto/graphs/seidel.plot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | 3 | # plot file for speedup 4 | set data style lp 5 | set logscale x 2 6 | #set key top left 7 | #set grid 8 | 9 | 10 | #set ytics ("1x" 1, "2x" 2, "3x" 3, "4x" 4, "5x" 5, \ 11 | #"6x" 6, "7x" 7, "8x" 8) 12 | 13 | set xlabel 'Matrix size (N)' #font "Helvetica,16" 14 | set ylabel "GFLOPs" #font "Helvetica,16" 15 | 16 | set title "3-D Gauss Seidel (Sequential) with T=1024" #font "Helvetica,18" 17 | 18 | set xtics (256, 512, "1k" 1024, "2k" 2048, "4k" 4096) 19 | 20 | set xrange [200:5000] 21 | set yrange [0:3.5] 22 | 23 | #set terminal postscript enhanced color eps #"Times-Roman" 22 24 | set terminal png enhanced tiny size 450,350 25 | 26 | #set output 'seidel.eps' 27 | set output 'seidel.png' 28 | 29 | set style line 1 lt 9 lw 3 pt 3 ps 0.5 30 | set style line 2 lt 7 lw 2 pt 3 ps 0.5 31 | 32 | plot 'seidel.dat' using 1:2 title 'ICC -fast' , \ 33 | '' using 1:3 title 'PLuTo 0.0.1', \ 34 | '' using 1:4 title 'PLuTo+ancc' 35 | 36 | -------------------------------------------------------------------------------- /examples/adi/util.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef TIME 5 | #define IF_TIME(foo) foo; 6 | #else 7 | #define IF_TIME(foo) 8 | #endif 9 | 10 | void init_array() 11 | { 12 | int i, j; 13 | 14 | for (i=0; i