├── Makefile ├── README.md ├── arrdecl.hpp ├── benchmarks ├── addsgd4 │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ └── searchspacer1t │ │ ├── Makefile │ │ ├── addsg4.idsl │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl ├── addsgd6 │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ └── searchspacer1t │ │ ├── .out.cu.swp │ │ ├── Makefile │ │ ├── addsg6.idsl │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl ├── cheby │ ├── global-stream │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu1t │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu2t │ │ ├── .swp │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu3t │ │ ├── .swp │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu4t │ │ ├── .swp │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu5t │ │ ├── .swp │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchSpacergpu6t │ │ ├── .swp │ │ ├── Makefile │ │ ├── cheby.driver.cpp │ │ ├── cheby.gold.h │ │ ├── cheby.idsl │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── denoise │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── denoise.baked.cpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── denoise.baked.cpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── denoise.baked.cpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchspacer2t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── denoise.baked.cpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchspacer4t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchspacer6t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── denoise.driver.cpp │ │ ├── denoise.gold.h │ │ ├── denoise.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── diffterm │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── diffterm.driver.cpp │ │ ├── diffterm.gold.h │ │ ├── diffterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── diffterm.driver.cpp │ │ ├── diffterm.gold.h │ │ ├── diffterm.idsl │ │ ├── global.sh │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── diffterm-new.idsl │ │ ├── diffterm.driver.cpp │ │ ├── diffterm.gold.h │ │ ├── diffterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── helmholtz │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── hhz.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── hhz.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu1t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── hhz.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu2t │ │ ├── .out.cu.swp │ │ ├── .right.cu.swp │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.2t.idsl │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu3t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.3t.idsl │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu4t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.4t.idsl │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu5t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hhz.5t.idsl │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchSpacergpu6t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── hhz.6t.idsl │ │ ├── hhz.baked.cpp │ │ ├── hhz.driver.cpp │ │ ├── hhz.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── hypterm-split │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── hypterm │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── hypterm-decompose.idsl │ │ ├── hypterm.driver.cpp │ │ ├── hypterm.gold.h │ │ ├── hypterm.idsl │ │ ├── out │ │ ├── output │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── j3d27pt │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── j3d27pt.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── j3d27pt.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu1t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── j3d27pt.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu2t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.2t.idsl │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu3t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.3t.idsl │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu4t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.4t.idsl │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── j3d7pt.4t.idsl │ │ ├── j3d7pt.notemp.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu5t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d27pt.5t.idsl │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchSpacergpu6t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── j3d27pt.6t.idsl │ │ ├── j3d27pt.baked.cpp │ │ ├── j3d27pt.driver.cpp │ │ ├── j3d27pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── j3d7pt │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.h │ │ ├── j3d7pt.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── .j3d7pt.driver.cpp.swp │ │ ├── .j3d7pt.gold.h.swp │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.h │ │ ├── j3d7pt.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu1t │ │ ├── .out.cu.swp │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.h │ │ ├── j3d7pt.idsl │ │ ├── j3d7pt.notemp.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu2t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.2t.idsl │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu3t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.3t.idsl │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.cpp │ │ ├── j3d7pt.gold.h │ │ ├── j3d7pt.notemp.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu4t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.4t.idsl │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.cpp │ │ ├── j3d7pt.gold.h │ │ ├── j3d7pt.notemp.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── searchSpacergpu5t │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── common.hpp~ │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── j3d7pt.5t.idsl │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.cpp │ │ ├── j3d7pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchSpacergpu6t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── common.hpp~ │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── j3d7pt.6t.idsl │ │ ├── j3d7pt.baked.cpp │ │ ├── j3d7pt.driver.cpp │ │ ├── j3d7pt.gold.cpp │ │ ├── j3d7pt.gold.h │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── miniflux │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── miniflux.driver.cpp │ │ ├── miniflux.gold.h │ │ ├── miniflux.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── miniflux.driver.cpp │ │ ├── miniflux.gold.h │ │ ├── miniflux.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── miniflux.driver.cpp │ │ ├── miniflux.gold.h │ │ ├── miniflux.idsl │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ └── starter.sh ├── rhs4center │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ ├── global │ │ ├── .abc.cu.swp │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── global.sh │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl ├── rhs4sgcurv-split │ ├── global-stream │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ ├── global │ │ ├── Makefile │ │ ├── common │ │ │ ├── common.hpp │ │ │ ├── cuda_header.cu │ │ │ ├── time.awk │ │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl │ └── searchspacer1t │ │ ├── Makefile │ │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ │ ├── run.sh │ │ ├── searchSpacer.py │ │ ├── sort.sh │ │ ├── starter.sh │ │ ├── sw4.driver.cpp │ │ ├── sw4.gold.h │ │ └── sw4.idsl └── rhs4sgcurv │ ├── global-stream │ ├── Makefile │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ ├── run.sh │ ├── searchSpacer.py │ ├── sort.sh │ ├── starter.sh │ ├── sw4.driver.cpp │ ├── sw4.gold.h │ └── sw4.idsl │ ├── global │ ├── Makefile │ ├── common │ │ ├── common.hpp │ │ ├── cuda_header.cu │ │ ├── time.awk │ │ └── timer.hpp │ ├── run.sh │ ├── searchSpacer.py │ ├── sort.sh │ ├── starter.sh │ ├── sw4.driver.cpp │ ├── sw4.gold.h │ └── sw4.idsl │ └── rhs4sgcurv │ ├── Makefile │ ├── common │ ├── common.hpp │ ├── cuda_header.cu │ ├── time.awk │ └── timer.hpp │ ├── run.sh │ ├── searchSpacer.py │ ├── sort.sh │ ├── starter.sh │ ├── sw4.driver.cpp │ ├── sw4.gold.h │ └── sw4.idsl ├── codegen.cpp ├── codegen.hpp ├── datatypes.hpp ├── exprnode.cpp ├── exprnode.hpp ├── funcdefn.cpp ├── funcdefn.hpp ├── grammar.hpp ├── grammar.y ├── main.cpp ├── python ├── calc.py └── checkCuda │ └── cuda_check.py ├── scanner.l ├── templates.hpp └── utils.hpp /benchmarks/addsgd4/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd4/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/.out.cu.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/addsgd6/searchspacer1t/.out.cu.swp -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/addsgd6/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby MAXREGCOUNT=128 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby MAXREGCOUNT=128 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/cheby/searchSpacergpu2t/.swp -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 64.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu2t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/cheby/searchSpacergpu3t/.swp -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 255.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu3t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/cheby/searchSpacergpu4t/.swp -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 255.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu4t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/cheby/searchSpacergpu5t/.swp -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 255.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu5t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/cheby/searchSpacergpu6t/.swp -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := cheby 9 | gold: 10 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | cheby: 15 | nvcc cheby.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make cheby > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 255.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/cheby/searchSpacergpu6t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py cheby.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 64.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 64.log | sed '/Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer2t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 64.log | sed '/Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer4t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := denoise 9 | gold: 10 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | denoise: 15 | nvcc denoise.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/denoise/searchspacer6t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py denoise.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | diffterm: 8 | nvcc diffterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 9 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py diffterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 6 | endif 7 | diffterm: 8 | nvcc diffterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 9 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/diffterm/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py diffterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/diffterm/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=128 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=128 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 6 | endif 7 | diffterm: 8 | nvcc diffterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 9 | -------------------------------------------------------------------------------- /benchmarks/diffterm/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/diffterm/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/diffterm/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py diffterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print}' 128.log | grep -v Error | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print}' 255.log | grep -v Error | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print}' 32.log | grep -v Error | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print}' 64.log | grep -v Error | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu1t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/.out.cu.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/helmholtz/searchSpacergpu2t/.out.cu.swp -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/.right.cu.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/helmholtz/searchSpacergpu2t/.right.cu.swp -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu2t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.2t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu3t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=255 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu3t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu3t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu3t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.3t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu4t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=128 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=128 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu4t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu4t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu4t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.4t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu5t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=64 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=64 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu5t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu5t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 64.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu5t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.5t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu6t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | ifdef DEBUG 3 | OPTFLAGS=-O0 -g -maxrregcount=64 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 4 | else 5 | OPTFLAGS=-O3 -maxrregcount=64 -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 6 | endif 7 | .DEFAULT_GOAL := hhz 8 | gold: 9 | nvcc hhz.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 10 | ./a.out 11 | hhz: 12 | nvcc hhz.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 13 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu6t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make hhz > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu6t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 64.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/helmholtz/searchSpacergpu6t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py hhz.6t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm-split/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := hypterm 9 | gold: 10 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | hypterm: 15 | nvcc hypterm.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/output: -------------------------------------------------------------------------------- 1 | time (ms) : 22.82 2 | time (ms) : 23.8314 3 | time (ms) : 22.8074 4 | time (ms) : 24.7428 5 | time (ms) : 23.0399 6 | time (ms) : 22.712 7 | time (ms) : 22.6106 8 | time (ms) : 45.9157 9 | time (ms) : 21.434 10 | time (ms) : 30.4632 11 | time (ms) : 22.1499 12 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/hypterm/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py hypterm.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d27pt MAXREGCOUNT=128 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d27pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d27pt MAXREGCOUNT=64 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 64.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d27pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d27pt MAXREGCOUNT=255 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu1t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 255.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d27pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu2t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu2t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu2t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d27pt MAXREGCOUNT=128 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu2t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu2t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d27pt.2t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu3t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc $(OPTFLAGS) -arch=$(ARCH) j3d27pt.driver.cpp out.cu 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc $(OPTFLAGS) -arch=$(ARCH) j3d27pt.baked.cpp out.cu 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu3t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu3t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d27pt MAXREGCOUNT=128 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu3t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu3t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d27pt.3t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/j3d7pt.notemp.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | copyin in, out; 5 | 6 | stencil jacobi (out, in) { 7 | double temp[L,M,N]; 8 | temp[k][j][i] = 0.1*in[k-1][j][i] + 9 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 10 | 0.3*in[k+1][j][i]; 11 | 12 | out[k][j][i] = 0.1*temp[k-1][j][i] + 13 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 14 | 0.3*temp[k+1][j][i]; 15 | } 16 | jacobi (out, in); 17 | copyout out; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d27pt MAXREGCOUNT=128 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu4t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d27pt.4t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu5t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu5t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu5t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d27pt MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu5t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu5t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d27pt.5t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu6t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d27pt 9 | gold: 10 | nvcc j3d27pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d27pt: 15 | nvcc j3d27pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu6t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu6t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d27pt MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu6t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/j3d27pt/searchSpacergpu6t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d27pt.6t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/j3d7pt.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | double a, b, h2inv; 5 | 6 | copyin in, out, t, h2inv, a, b; 7 | 8 | stencil jacobi (out, in, h2inv, a, b) { 9 | double c = b * h2inv; 10 | gmem out, in; 11 | out[k][j][i] = a*in[k][j][i] - c*in[k][j][i+1] 12 | + c*in[k][j][i-1] 13 | + c*in[k][j+1][i] 14 | + c*in[k][j-1][i] 15 | + c*in[k+1][j][i] 16 | + c*in[k-1][j][i] 17 | - c*in[k][j][i]*6.0; 18 | } 19 | 20 | jacobi (out, in, h2inv, a, b); 21 | jacobi (in, out, h2inv, a, b); 22 | copyout in; 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d7pt> /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d7pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/.j3d7pt.driver.cpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/j3d7pt/global/.j3d7pt.driver.cpp.swp -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/.j3d7pt.gold.h.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/j3d7pt/global/.j3d7pt.gold.h.swp -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/j3d7pt.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | double a, b, h2inv; 5 | 6 | copyin in, out, t, h2inv, a, b; 7 | 8 | stencil jacobi (out, in, h2inv, a, b) { 9 | double c = b * h2inv; 10 | gmem out, in; 11 | out[k][j][i] = a*in[k][j][i] - c*in[k][j][i+1] 12 | + c*in[k][j][i-1] 13 | + c*in[k][j+1][i] 14 | + c*in[k][j-1][i] 15 | + c*in[k+1][j][i] 16 | + c*in[k-1][j][i] 17 | - c*in[k][j][i]*6.0; 18 | } 19 | 20 | jacobi (out, in, h2inv, a, b); 21 | jacobi (in, out, h2inv, a, b); 22 | copyout in; 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d7pt > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 64.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d7pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/.out.cu.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/j3d7pt/searchSpacergpu1t/.out.cu.swp -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/j3d7pt.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | double a, b, h2inv; 5 | 6 | copyin in, out, t, h2inv, a, b; 7 | 8 | stencil jacobi (out, in, h2inv, a, b) { 9 | double c = b * h2inv; 10 | out[k][j][i] = a*in[k][j][i] - c*in[k][j][i+1] 11 | + c*in[k][j][i-1] 12 | + c*in[k][j+1][i] 13 | + c*in[k][j-1][i] 14 | + c*in[k+1][j][i] 15 | + c*in[k-1][j][i] 16 | - c*in[k][j][i]*6.0; 17 | } 18 | 19 | jacobi (out, in, h2inv, a, b); 20 | jacobi (in, out, h2inv, a, b); 21 | 22 | copyout in; 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/j3d7pt.notemp.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | copyin in, out; 5 | 6 | stencil jacobi (out, in) { 7 | double temp[L,M,N]; 8 | temp[k][j][i] = 0.1*in[k-1][j][i] + 9 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 10 | 0.3*in[k+1][j][i]; 11 | 12 | out[k][j][i] = 0.1*temp[k-1][j][i] + 13 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 14 | 0.3*temp[k+1][j][i]; 15 | } 16 | jacobi (out, in); 17 | copyout out; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make j3d7pt MAXREGCOUNT=255 > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v blockdim 128.log | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py j3d7pt.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu2t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu2t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu2t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d7pt MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu2t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu2t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d7pt.2t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc $(OPTFLAGS) -arch=$(ARCH) j3d7pt.driver.cpp out.cu 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc $(OPTFLAGS) -arch=$(ARCH) j3d7pt.baked.cpp out.cu 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/j3d7pt.gold.cpp: -------------------------------------------------------------------------------- 1 | template 2 | void jacobi_gold(T *out, T *in, int L, int M, int N) { 3 | T *temp = new T[L][M][N]; 4 | for (int k = 0; k < L; ++k) { 5 | for (int j = 0; j < M; ++j) { 6 | for (int i = 0; i < N; ++i) { 7 | temp[k][j][i] = 0.1*in[k-1][j][i] + 8 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 9 | 0.3*in[k+1][j][i]; 10 | } 11 | } 12 | } 13 | for (int k = 0; k < L; ++k) { 14 | for (int j = 0; j < M; ++j) { 15 | for (int i = 0; i < N; ++i) { 16 | out[k][j][i] = 0.1*temp[k-1][j][i] + 17 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 18 | 0.3*temp[k+1][j][i]; 19 | } 20 | } 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/j3d7pt.notemp.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | copyin in, out; 5 | 6 | stencil jacobi (out, in) { 7 | double temp[L,M,N]; 8 | temp[k][j][i] = 0.1*in[k-1][j][i] + 9 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 10 | 0.3*in[k+1][j][i]; 11 | 12 | out[k][j][i] = 0.1*temp[k-1][j][i] + 13 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 14 | 0.3*temp[k+1][j][i]; 15 | } 16 | jacobi (out, in); 17 | copyout out; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d7pt MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 128.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu3t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d7pt.3t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=64 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/j3d7pt.gold.cpp: -------------------------------------------------------------------------------- 1 | template 2 | void jacobi_gold(T *out, T *in, int L, int M, int N) { 3 | T *temp = new T[L][M][N]; 4 | for (int k = 0; k < L; ++k) { 5 | for (int j = 0; j < M; ++j) { 6 | for (int i = 0; i < N; ++i) { 7 | temp[k][j][i] = 0.1*in[k-1][j][i] + 8 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 9 | 0.3*in[k+1][j][i]; 10 | } 11 | } 12 | } 13 | for (int k = 0; k < L; ++k) { 14 | for (int j = 0; j < M; ++j) { 15 | for (int i = 0; i < N; ++i) { 16 | out[k][j][i] = 0.1*temp[k-1][j][i] + 17 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 18 | 0.3*temp[k+1][j][i]; 19 | } 20 | } 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/j3d7pt.notemp.idsl: -------------------------------------------------------------------------------- 1 | parameter L, M, N; 2 | iterator k, j, i; 3 | double in[L,M,N], out[L,M,N]; 4 | copyin in, out; 5 | 6 | stencil jacobi (out, in) { 7 | double temp[L,M,N]; 8 | temp[k][j][i] = 0.1*in[k-1][j][i] + 9 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 10 | 0.3*in[k+1][j][i]; 11 | 12 | out[k][j][i] = 0.1*temp[k-1][j][i] + 13 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 14 | 0.3*temp[k+1][j][i]; 15 | } 16 | jacobi (out, in); 17 | copyout out; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make j3d7pt MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 64.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-64 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu4t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d7pt.4t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/j3d7pt.gold.cpp: -------------------------------------------------------------------------------- 1 | template 2 | void jacobi_gold(T *out, T *in, int L, int M, int N) { 3 | T *temp = new T[L][M][N]; 4 | for (int k = 0; k < L; ++k) { 5 | for (int j = 0; j < M; ++j) { 6 | for (int i = 0; i < N; ++i) { 7 | temp[k][j][i] = 0.1*in[k-1][j][i] + 8 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 9 | 0.3*in[k+1][j][i]; 10 | } 11 | } 12 | } 13 | for (int k = 0; k < L; ++k) { 14 | for (int j = 0; j < M; ++j) { 15 | for (int i = 0; i < N; ++i) { 16 | out[k][j][i] = 0.1*temp[k-1][j][i] + 17 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 18 | 0.3*temp[k+1][j][i]; 19 | } 20 | } 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make MAXREGCOUNT=32 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu5t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d7pt.5t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := j3d7pt 9 | gold: 10 | nvcc j3d7pt.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | j3d7pt: 15 | nvcc j3d7pt.baked.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/j3d7pt.gold.cpp: -------------------------------------------------------------------------------- 1 | template 2 | void jacobi_gold(T *out, T *in, int L, int M, int N) { 3 | T *temp = new T[L][M][N]; 4 | for (int k = 0; k < L; ++k) { 5 | for (int j = 0; j < M; ++j) { 6 | for (int i = 0; i < N; ++i) { 7 | temp[k][j][i] = 0.1*in[k-1][j][i] + 8 | 0.2*(in[k][j-1][i] + in[k][j+1][i] + in[k][j][i] + in[k][j][i-1] + in[k][j][i+1]) + 9 | 0.3*in[k+1][j][i]; 10 | } 11 | } 12 | } 13 | for (int k = 0; k < L; ++k) { 14 | for (int j = 0; j < M; ++j) { 15 | for (int i = 0; i < N; ++i) { 16 | out[k][j][i] = 0.1*temp[k-1][j][i] + 17 | 0.2*(temp[k][j-1][i] + temp[k][j+1][i] + temp[k][j][i] + temp[k][j][i-1] + temp[k][j][i+1]) + 18 | 0.3*temp[k+1][j][i]; 19 | } 20 | } 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./stencilgen $@ > /dev/null 2>&1 3 | if [ $? -ne 0 ]; 4 | then 5 | echo "Generation Error!" 6 | exit 0 7 | fi 8 | make MAXREGCOUNT=255 > /dev/null 2>&1 9 | if [ $? -ne 0 ]; 10 | then 11 | echo "Compilation Error!" 12 | exit 0 13 | fi 14 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 15 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 16 | | cut -d, -f 6 | python ~/python/calc.py; 17 | if [ ${PIPESTATUS[0]} -ne 0 ]; 18 | then 19 | echo "Run Error!" 20 | exit 0 21 | fi 22 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/sort.sh: -------------------------------------------------------------------------------- 1 | grep -v Error 255.log | awk -F " " '{print $NF}' |grep -v "\<0\.0\>" | grep -v "^$" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/j3d7pt/searchSpacergpu6t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | python -u searchSpacer.py j3d7pt.6t.idsl | tee a.log 3 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=128 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := miniflux 9 | gold: 10 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | miniflux: 15 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 2 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py miniflux.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := miniflux 9 | gold: 10 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | miniflux: 15 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/miniflux/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py miniflux.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/miniflux/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := miniflux 9 | gold: 10 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | miniflux: 15 | nvcc miniflux.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/miniflux/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/miniflux/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/miniflux/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/miniflux/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py miniflux.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/.abc.cu.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pssrawat/artemis/b69621553325b66f0367bf8f9d93605c885aa19d/benchmarks/rhs4center/global/.abc.cu.swp -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/global.sh: -------------------------------------------------------------------------------- 1 | touch global; 2 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 128.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 3 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 255.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 4 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 32.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 5 | awk '/unroll k=1,j=1,i=1/{print; nr[NR+1]; next}; NR in nr' 64.log | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n >> global 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 32.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-32 2 | tac 64.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 3 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 4 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 5 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=cg -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 32.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-32 2 | tac 64.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-64 3 | tac 128.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-128 4 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 5 | -------------------------------------------------------------------------------- /benchmarks/rhs4center/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/searchspacer1t/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/searchspacer1t/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/searchspacer1t/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/searchspacer1t/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv-split/searchspacer1t/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global-stream/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global-stream/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global-stream/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global-stream/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global-stream/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/global/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/Makefile: -------------------------------------------------------------------------------- 1 | ARCH=sm_60 2 | MAXREGCOUNT=255 3 | ifdef DEBUG 4 | OPTFLAGS=-O0 -g -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -g -O0 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 5 | else 6 | OPTFLAGS=-O3 -maxrregcount=$(MAXREGCOUNT) -ccbin=g++ -std=c++11 -Xcompiler "-fPIC -fopenmp -O3 -fno-strict-aliasing" --use_fast_math -Xptxas "-dlcm=ca -v" 7 | endif 8 | .DEFAULT_GOAL := sw4 9 | gold: 10 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 11 | ./a.out 12 | clean: 13 | rm test.txt gold.txt 14 | sw4: 15 | nvcc sw4.driver.cpp out.cu -arch=$(ARCH) $(OPTFLAGS) 16 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/common/cuda_header.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "stdio.h" 3 | 4 | // extern __host__ __device__ int MAX(int a, int b) { return a > b ? a : b; } 5 | // extern __host__ __device__ int MIN(int a, int b) { return a < b ? a : b; } 6 | // extern __host__ __device__ int CEIL(int a, int b) { return ( (a) % (b) == 0 ? (a) / (b) : ( (a) / (b) + 1 ) ); } 7 | 8 | void Check_CUDA_Error(const char* message){ 9 | cudaError_t error = cudaGetLastError(); 10 | if( error != cudaSuccess ){ 11 | printf("CUDA-ERROR:%s, %s\n",message,cudaGetErrorString(error) ); 12 | exit(-1); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/common/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __TIMER_HPP__ 2 | #define __TIMER_HPP__ 3 | 4 | #include 5 | 6 | #ifndef NUMRUNS 7 | #define NUMRUNS 2 8 | #endif 9 | 10 | typedef std::chrono::high_resolution_clock HighResolutionClock; 11 | typedef std::chrono::milliseconds milliseconds; 12 | typedef std::chrono::time_point TimePoint; 13 | 14 | 15 | /// Timers 16 | static TimePoint globalTimerStart; 17 | static TimePoint globalTimerStop; 18 | static void startTimer() 19 | { 20 | globalTimerStart = HighResolutionClock::now(); 21 | } 22 | static void stopTimer() 23 | { 24 | globalTimerStop = HighResolutionClock::now(); 25 | } 26 | static double getElapsedTime() 27 | { 28 | milliseconds time(0); 29 | time = 30 | std::chrono::duration_cast 31 | (globalTimerStop - globalTimerStart); 32 | return (double)time.count(); 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo $@ 3 | ./stencilgen $@ > /dev/null 2>&1 4 | if [ $? -ne 0 ]; 5 | then 6 | echo "Generation Error!" 7 | exit 0 8 | fi 9 | make > /dev/null 2>&1 10 | if [ $? -ne 0 ]; 11 | then 12 | echo "Compilation Error!" 13 | exit 0 14 | fi 15 | nvprof --csv --profile-api-trace none -u ms ./a.out 2>&1 \ 16 | | grep -v "\[CUDA memcpy " | grep -P "([[:digit:].]+,){6}" \ 17 | | cut -d, -f 6 | python ~/python/calc.py; 18 | if [ ${PIPESTATUS[0]} -ne 0 ]; 19 | then 20 | echo "Run Error!" 21 | exit 0 22 | fi 23 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/sort.sh: -------------------------------------------------------------------------------- 1 | tac 255.log | sed '/Run Error/I,+2 d' | tac | grep -v blockdim | grep -v Error | grep -v "^$" | grep -v "\<0\.0\>" | sort -n > out-255 2 | -------------------------------------------------------------------------------- /benchmarks/rhs4sgcurv/rhs4sgcurv/starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #PBS -l nodes=1:ppn=8:gpus=1 3 | #PBS -l walltime=06:00:00 4 | python -u searchSpacer.py sw4.idsl | tee a.log 5 | 6 | -------------------------------------------------------------------------------- /grammar.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __GRAMMAR_HPP__ 2 | #define __GRAMMAR_HPP__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "funcdefn.hpp" 9 | 10 | class grammar { 11 | public: 12 | static startNode *start; 13 | static void set_input (FILE *); 14 | static void parse (); 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /python/calc.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import fileinput 3 | import itertools 4 | result = 0.0 5 | numLines = 0 6 | 7 | firstFile = 1 8 | def sum(result, elem): 9 | return result + elem 10 | 11 | operation = sum 12 | 13 | if len(sys.argv) > 1 and sys.argv[1] == "--min": 14 | result = float('inf') 15 | operation = min 16 | 17 | if len(sys.argv) > 1 and sys.argv[1] == "--max": 18 | result = 0 19 | operation = max 20 | 21 | for i in range(1, len(sys.argv)): 22 | if sys.argv[i].startswith('-'): 23 | firstFile = i + 1 24 | continue 25 | break 26 | 27 | for line in fileinput.input(itertools.chain(['-'], sys.argv[firstFile:])): 28 | result = operation(result, float(line)) 29 | numLines += 1 30 | 31 | if len(sys.argv) > 1 and sys.argv[1] == "--ave": 32 | result /= numLines 33 | 34 | print(result) 35 | --------------------------------------------------------------------------------