├── tests ├── README.md ├── 06_device │ ├── Makefile │ ├── test06_device.py │ └── test06_device.c ├── 03_release │ └── Makefile ├── 11_deadlock │ └── Makefile ├── 21_task_malloc │ ├── Makefile │ ├── kernel.cu │ └── test21_task_malloc.c ├── 01_init_finalize │ ├── Makefile │ └── test01_init_finalize.c ├── 08_multithreading │ ├── Makefile │ └── test08_multithreading.c ├── 31_isaxpy │ ├── src │ │ ├── saxpy.iris.cpp │ │ ├── kernel.cu │ │ ├── kernel.cl │ │ ├── kernel.hip │ │ ├── signature.def │ │ ├── saxpy_ref.cpp │ │ ├── benchmark_ref.h │ │ ├── saxpy.iris.h │ │ ├── kernel.xilinx.cpp │ │ └── kernel.cl.openmp.c │ ├── CMakeLists.txt │ └── saxpy.py ├── 37_opencl_icd │ ├── Makefile │ └── kernel.cl ├── 13_hooks │ └── Makefile ├── 02_task_depend │ ├── Makefile │ ├── test02_task_depend.py │ └── test02_task_depend.c ├── 15_graph │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.cl.openmp.h ├── 17_json │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl.openmp.h │ ├── Makefile │ └── test17_json.c ├── 18_record │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl.openmp.h │ └── Makefile ├── 19_replay │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl.openmp.h │ ├── Makefile │ └── test19_replay.c ├── 33_graph_cpp │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── 07_policy_register │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl.openmp.h │ ├── Makefile │ ├── policy_last.cpp │ ├── test07_policy_register.c │ └── policy_gws.cpp ├── 25_random_tasks │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.openmp.h │ └── Makefile ├── 29_data_mem │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl.openmp.h │ └── Makefile ├── 14_permanent_task │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── 20_cublas │ ├── kernel.cu │ └── Makefile ├── 24_multi_kernels │ ├── Makefile │ ├── kernel.cl │ ├── kernel-stupid.cl │ ├── kernel.cu │ └── kernel.hip.cpp ├── 27_deadlock2 │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── 12_task_custom │ └── Makefile ├── 16_task_host │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.cl.openmp.h ├── 28_json2 │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl │ ├── Makefile │ └── kernel.openmp.h ├── 30_task_info │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl │ ├── Makefile │ └── kernel.openmp.h ├── 34_set_mem │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── kernel.openmp.h │ └── Makefile ├── 22_json_mixed_args │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── 35_json_mixed_args_record_replay │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── .gitignore ├── 36_double_json_mixed_args_record_replay │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── Makefile │ └── kernel.openmp.h ├── 09_dataflow │ ├── Makefile │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ └── kernel.openmp.h ├── 10_multikernelexecution │ ├── Makefile │ ├── kernel.cl │ ├── kernel.cu │ └── kernel.hip.cpp ├── 04_enclosing_targets │ ├── Makefile │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ └── kernel.openmp.h ├── 23_multigraph │ ├── Makefile │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ └── kernel.cl.openmp.h ├── 26_env_set │ ├── Makefile │ ├── kernel.cl │ ├── kernel-negative.cl │ ├── kernel.cu │ ├── kernel-negative.cu │ └── kernel.hip.cpp ├── 05_snapdragon │ ├── kernel.omp.mk │ └── saxpy.mk ├── 32_json3 │ ├── timer.h │ ├── Makefile │ └── kernel.openmp.h ├── 38_offset │ ├── kernel.cl │ ├── kernel.openmp.h │ ├── Makefile │ └── kernel.hip.cpp ├── 38_offset_subbuffer │ ├── kernel.cl │ ├── kernel.openmp.h │ ├── Makefile │ └── kernel.hip.cpp ├── 39_dmem2dmem │ └── py_host.py ├── run.sh └── Makefile.tests ├── include ├── CMakeLists.txt └── iris │ ├── hexagon │ ├── iris_interface.h │ ├── stub.h │ └── q6cache.h │ ├── iris.h │ ├── iris_errno.h │ ├── iris_hexagon_imp.h │ ├── iris_poly_types.h │ ├── gettime.h │ ├── hip │ └── LICENSE.txt │ ├── level_zero │ └── LICENSE │ ├── iris_host2hip.h │ └── CMakeLists.txt ├── scheduling-policies ├── graph-prediction │ ├── dagger │ └── generate_baseline_heatmap.sh ├── gnn-graph-prediction │ ├── dagger │ └── generate_baseline_heatmap.sh └── aiwc │ ├── kernel.cl │ ├── Makefile │ ├── aiwc_utils.h │ └── test_aiwc_policy.c ├── docs └── sphinx │ ├── source │ ├── genindex.rst │ ├── _images │ │ ├── logo.png │ │ ├── task.png │ │ ├── overview.png │ │ └── execution.png │ ├── installation.rst │ ├── api.rst │ ├── features.rst │ └── misc.rst │ ├── requirements.txt │ ├── environment.yml │ ├── Makefile │ └── make.bat ├── apps ├── 2tasks │ ├── kernel.hip.cpp │ ├── kernel.cl │ ├── kernel.cu │ ├── Makefile │ └── kernel.openmp.h ├── custom_policy │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.openmp.h │ ├── Makefile │ ├── custom_policy.c │ └── PolicyGWS.cpp ├── aiwc_policy │ ├── kernel.cl │ ├── Makefile │ ├── aiwc_utils.h │ └── test_aiwc_policy.c ├── dagger │ ├── wamta_paper │ │ └── Makefile │ ├── fixed_dag_dynamic_policy_scaling_test │ │ └── Makefile │ ├── requirements.txt │ ├── .gitignore │ ├── timer.h │ ├── plot_local_workgroup_sizes.py │ ├── kernel.cu │ ├── kernel.hip.cpp │ └── benchmark-systems.sh ├── qiree_backend │ ├── test │ │ ├── my_library.c │ │ ├── Makefile │ │ └── test_quiree.c │ ├── Makefile │ ├── qiree_task.py │ └── qiree_task.c ├── saxpy │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ ├── kernel.openmp.h │ └── Makefile ├── auto_dag_creation │ ├── kernel.cu │ ├── kernel.cl │ ├── kernel.hip.cpp │ └── Makefile ├── helloworld │ ├── kernel.cl │ ├── kernel.cu │ ├── kernel.openmp.h │ ├── kernel.hip.cpp │ ├── helloworld.cpp │ ├── helloworld.c │ ├── Makefile │ └── kernel.ptx ├── vecadd │ ├── kernel.cu │ ├── kernel.hip.cpp │ ├── kernel.cl │ ├── kernel.openmp.h │ ├── build_dependencies.sh │ ├── vecadd.cpp │ └── plot_results.sh ├── benchmarking │ ├── timer.h │ ├── setup.sh │ ├── utils.h │ ├── kernel.cl │ ├── memory-performance-scripts │ │ ├── Makefile │ │ ├── run-membench-opencl.sh │ │ ├── run-membench-hip.sh │ │ └── run-membench-openmp.sh │ ├── kernel.openmp.h │ └── compute-performance-scripts │ │ └── run-dgemm-openmp.sh ├── dgemm │ ├── kernel.cl │ ├── kernel.cu │ ├── Makefile │ ├── kernel.hip.cpp │ └── kernel.openmp.h ├── sgemm │ ├── kernel.cl │ ├── kernel.cu │ ├── Makefile │ ├── kernel.hip.cpp │ └── kernel.openmp.h └── makefile_defs.mk ├── .JuliaFormatter.toml ├── src ├── runtime │ ├── Structs.h │ ├── Retainable.cpp │ ├── __init__.py │ ├── Filter.h │ ├── PolicyBlockCycle.h │ ├── Config.h.in │ ├── TGPolicy.cpp │ ├── LoaderOpenMP.h │ ├── PolicyData.h │ ├── LoaderHost2HIP.h │ ├── PolicyDevice.h │ ├── SigHandler.h │ ├── LoaderHost2CUDA.h │ ├── PolicyRandom.h │ ├── PolicyDepend.h │ ├── PolicyDefault.h │ ├── PolicyFirstToFinish.h │ ├── LoaderHost2OpenCL.h │ ├── PolicyShortestDeviceQueue.h │ ├── PolicyProfile.h │ ├── LoaderQIREE.cpp │ ├── Policy.cpp │ ├── PolicyJulia.h │ ├── FilterTaskSplit.h │ ├── LoaderQIREE.h │ ├── ProfilerEventRecord.h │ ├── PolicyDefault.cpp │ ├── MemRange.cpp │ ├── PolicyRoundRobin.h │ ├── Queue.h │ ├── PolicyDevice.cpp │ ├── PolicyFirstToFinish.cpp │ ├── PresentTable.h │ ├── Pool.h │ ├── QueueReady.h │ ├── LoaderPolicy.h │ ├── PolicyRandom.cpp │ ├── MemRange.h │ ├── Reduction.h │ ├── QueueTask.h │ ├── ProfilerGoogleCharts.h │ ├── PolicyProfile.cpp │ ├── Thread.h │ ├── LoaderPolicy.cpp │ ├── TGPolicy.h │ ├── ProfilerDOT.h │ ├── Timer.h │ ├── Policies.h │ ├── Pool.cpp │ ├── SigHandler.cpp │ ├── Worker.h │ ├── Consistency.h │ ├── HubClient.h │ ├── Policy.h │ ├── Profiler.h │ ├── Thread.cpp │ ├── Polyhedral.h │ ├── LICENSE.jsmn │ └── Polyhedral.cpp └── CMakeLists.txt ├── .gitignore ├── .readthedocs.yaml ├── utils ├── build_host.sh ├── android_deps.min ├── build_android.sh ├── build_copy_device.sh ├── hexagon_deps.min ├── iris.def └── Makefile.hexagon ├── .gitlab-ci-scripts ├── runner_watcher.sh └── schema_check.sh ├── pyproject.toml ├── .github └── workflows │ └── workflow.yml └── Project.toml /tests/README.md: -------------------------------------------------------------------------------- 1 | # Iris Tests 2 | 3 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(iris) 2 | 3 | -------------------------------------------------------------------------------- /scheduling-policies/graph-prediction/dagger: -------------------------------------------------------------------------------- 1 | ../../apps/dagger -------------------------------------------------------------------------------- /docs/sphinx/source/genindex.rst: -------------------------------------------------------------------------------- 1 | Keyword Index 2 | ============= 3 | -------------------------------------------------------------------------------- /scheduling-policies/gnn-graph-prediction/dagger: -------------------------------------------------------------------------------- 1 | ../../apps/dagger -------------------------------------------------------------------------------- /tests/06_device/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test06_device 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /apps/2tasks/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "kernel.cu" 3 | 4 | -------------------------------------------------------------------------------- /tests/03_release/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test03_release 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /tests/11_deadlock/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test11_deadlock 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /tests/21_task_malloc/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test21_task_malloc 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /tests/01_init_finalize/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test01_init_finalize 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /tests/08_multithreading/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test08_multithreading 2 | 3 | include ../Makefile.tests 4 | 5 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/saxpy.iris.cpp: -------------------------------------------------------------------------------- 1 | 2 | #define IRIS_API_DEFINITION 3 | 4 | #include "saxpy.iris.h" 5 | 6 | -------------------------------------------------------------------------------- /tests/37_opencl_icd/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test37_opencl_icd 2 | all: $(TEST) 3 | 4 | include ../Makefile.tests 5 | 6 | -------------------------------------------------------------------------------- /docs/sphinx/source/_images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ORNL/iris/HEAD/docs/sphinx/source/_images/logo.png -------------------------------------------------------------------------------- /docs/sphinx/source/_images/task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ORNL/iris/HEAD/docs/sphinx/source/_images/task.png -------------------------------------------------------------------------------- /docs/sphinx/source/_images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ORNL/iris/HEAD/docs/sphinx/source/_images/overview.png -------------------------------------------------------------------------------- /.JuliaFormatter.toml: -------------------------------------------------------------------------------- 1 | margin = 80 2 | style = "sciml" 3 | format_doctrings = true 4 | separate_kwargs_with_semicolon = true 5 | -------------------------------------------------------------------------------- /docs/sphinx/source/_images/execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ORNL/iris/HEAD/docs/sphinx/source/_images/execution.png -------------------------------------------------------------------------------- /tests/13_hooks/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test13_hooks 2 | 3 | CPP=1 4 | CFLAGS := -std=c++11 5 | 6 | include ../Makefile.tests 7 | 8 | -------------------------------------------------------------------------------- /docs/sphinx/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | breathe 3 | numpy 4 | sphinxcontrib-contentui 5 | sphinx_rtd_theme 6 | json-schema-for-humans -------------------------------------------------------------------------------- /tests/02_task_depend/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test02_task_depend 2 | 3 | include ../Makefile.tests 4 | 5 | clean: 6 | rm -f $(TEST) *.dot 7 | -------------------------------------------------------------------------------- /tests/15_graph/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/17_json/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/18_record/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/19_replay/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/33_graph_cpp/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/custom_policy/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void setid(__global int* mem) { 2 | int id = get_global_id(0); 3 | mem[id] = id; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /src/runtime/Structs.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_STRUCTS_H 2 | #define IRIS_SRC_RT_STRUCTS_H 3 | 4 | 5 | #endif /* IRIS_SRC_RT_STRUCTS_H */ 6 | -------------------------------------------------------------------------------- /tests/07_policy_register/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | int i = get_global_id(0); 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/25_random_tasks/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void add1(__global int* restrict A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | -------------------------------------------------------------------------------- /tests/29_data_mem/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/aiwc_policy/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i] = A[i] + i; 4 | } 5 | -------------------------------------------------------------------------------- /tests/14_permanent_task/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* restrict A) { 2 | size_t i = get_global_id(0); 3 | A[i]++; 4 | } 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | _build/ 3 | install/ 4 | _install/ 5 | envs/ 6 | slurm-test-out.txt 7 | slurm-test-err.txt 8 | *.so 9 | *.swp 10 | -------------------------------------------------------------------------------- /scheduling-policies/aiwc/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* A) { 2 | size_t i = get_global_id(0); 3 | A[i] = A[i] + i; 4 | } 5 | -------------------------------------------------------------------------------- /tests/15_graph/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/17_json/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/18_record/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/19_replay/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/dagger/wamta_paper/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | ./benchmark.sh 3 | 4 | clean: 5 | rm -f *.pdf *.csv kernel.hip kernel.ptx kernel.openmp.so graph.json 6 | 7 | -------------------------------------------------------------------------------- /tests/20_cublas/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] += 1; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/25_random_tasks/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void add1(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/29_data_mem/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/33_graph_cpp/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | configuration: docs/sphinx/source/conf.py 5 | 6 | conda: 7 | environment: docs/sphinx/environment.yml 8 | -------------------------------------------------------------------------------- /apps/custom_policy/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void setid(int* mem) { 2 | int id = blockIdx.x * blockDim.x + threadIdx.x; 3 | mem[id] = id; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/07_policy_register/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] = i; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/14_permanent_task/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i]++; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/21_task_malloc/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] += 1; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/24_multi_kernels/Makefile: -------------------------------------------------------------------------------- 1 | CPP=1 2 | 3 | TEST=test24_multi_kernels 4 | 5 | all: $(TEST) kernel.ptx kernel.hip 6 | 7 | include ../Makefile.tests 8 | 9 | -------------------------------------------------------------------------------- /tests/27_deadlock2/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void copy(__global int* dst, __global int* src) { 2 | size_t i = get_global_id(0); 3 | dst[i] = src[i]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/12_task_custom/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test12_task_custom 2 | 3 | CPP=1 4 | C_FLAGS := -I../../ -I../../include -I../../build/src/runtime 5 | 6 | include ../Makefile.tests 7 | 8 | -------------------------------------------------------------------------------- /tests/27_deadlock2/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ 2 | void copy(int* dst, int *src) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | dst[i] = src[i]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /apps/dagger/fixed_dag_dynamic_policy_scaling_test/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | ./benchmark.sh 3 | 4 | clean: 5 | rm -f *.pdf *.csv kernel.hip kernel.ptx kernel.openmp.so graph.json 6 | 7 | -------------------------------------------------------------------------------- /tests/16_task_host/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void process(int* A, int* factor) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[i] = i * factor[0]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/28_json2/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void vecadd(int* C, int* A, int *B) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | C[i] += A[i] + B[i]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/30_task_info/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void vecadd(int* C, int* A, int* B) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | C[i] = A[i] + B[i]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/34_set_mem/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 2 | int id = blockIdx.x * blockDim.x + threadIdx.x; 3 | C[id] = A[id] + B[id]; 4 | } 5 | -------------------------------------------------------------------------------- /tests/15_graph/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void process(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i]++; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/16_task_host/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void process(__global int* restrict A, __global int* restrict factor) { 2 | size_t id = get_global_id(0); 3 | A[id] = id * factor[0]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/17_json/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i] = i; 6 | } 7 | -------------------------------------------------------------------------------- /tests/33_graph_cpp/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void process(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i]++; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/18_record/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i]++; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/19_replay/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i]++; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/29_data_mem/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i] = i; 6 | } 7 | -------------------------------------------------------------------------------- /apps/qiree_backend/test/my_library.c: -------------------------------------------------------------------------------- 1 | // my_library.c 2 | #include 3 | 4 | void hello_from_library(const char *name) { 5 | printf("Hello, %s, from the shared library!\n", name); 6 | } 7 | -------------------------------------------------------------------------------- /apps/saxpy/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(float* Z, float A, float* X, float* Y) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/07_policy_register/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void process(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i] = i; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/14_permanent_task/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i]++; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/25_random_tasks/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void add1(int* A) { 4 | size_t idx = threadIdx.x + blockDim.x*blockIdx.x; 5 | A[idx]++; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/34_set_mem/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 2 | size_t id = get_global_id(0); 3 | C[id] = A[id] + B[id]; 4 | } 5 | -------------------------------------------------------------------------------- /apps/dagger/requirements.txt: -------------------------------------------------------------------------------- 1 | bokeh 2 | matplotlib 3 | natsort 4 | networkx @ git+https://github.com/BeauJoh/networkx.git@main 5 | numpy 6 | pandas 7 | pygraphviz 8 | seaborn 9 | tqdm 10 | pyyaml 11 | -------------------------------------------------------------------------------- /apps/qiree_backend/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: qiree_task 4 | 5 | qiree_task: qiree_task.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | clean: 9 | rm -f qiree_task 10 | -------------------------------------------------------------------------------- /tests/22_json_mixed_args/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/auto_dag_creation/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(float* Z, float A, float* X, float* Y) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/custom_policy/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void setid(int* mem) { 4 | int id = blockIdx.x * blockDim.x + threadIdx.x; 5 | mem[id] = id; 6 | } 7 | 8 | 9 | -------------------------------------------------------------------------------- /include/iris/hexagon/iris_interface.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "iris/iris_hexagon.h" 4 | #include "iris/hexagon/stub.h" 5 | #define ENABLE_IRIS_HEXAGON_APIS 6 | #include "iris_app_cpu_dsp_interface.h" 7 | -------------------------------------------------------------------------------- /apps/helloworld/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void uppercase(__global char* b, __global char* a) { 2 | int i = get_global_id(0); 3 | if (a[i] >= 'a' && a[i] <= 'z') b[i] = a[i] + 'A' - 'a'; 4 | else b[i] = a[i]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /apps/saxpy/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global float* restrict Z, float A, __global float* restrict X, __global float* restrict Y) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/27_deadlock2/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ 4 | void copy(int* dst, int *src) { 5 | int i = blockIdx.x * blockDim.x + threadIdx.x; 6 | dst[i] = src[i]; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /tests/30_task_info/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void vecadd(int* C, int* A, int* B) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | C[i] = A[i] + B[i]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/35_json_mixed_args_record_replay/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/16_task_host/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A, int* factor) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i] = i * factor[0]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/22_json_mixed_args/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global int* restrict Z, __global int* restrict X, __global int* restrict Y, int A) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/28_json2/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void vecadd(int* C, int* A, int* B) { 4 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 5 | C[i] += A[i] + B[i]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/34_set_mem/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 4 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 5 | C[i] = A[i] + B[i]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /apps/auto_dag_creation/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global float* restrict Z, float A, __global float* restrict X, __global float* restrict Y) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .tox/ 3 | .vscode/settings.json 4 | *.egg-info 5 | output.csv 6 | output.txt 7 | *.zip 8 | .ipynb_checkpoints/ 9 | out.* 10 | /test_output/ 11 | /.vscode/ 12 | /envs/ 13 | report.xml 14 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global int* restrict Z, __global int* restrict X, __global int* restrict Y, int SIZE, int A) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/helloworld/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void uppercase(char* b, char* a) { 2 | int i = blockIdx.x * blockDim.x + threadIdx.x; 3 | if (a[i] >= 'a' && a[i] <= 'z') b[i] = a[i] + 'A' - 'a'; 4 | else b[i] = a[i]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/36_double_json_mixed_args_record_replay/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void saxpy(double* Z, double* X, double* Y, double A) { 2 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/saxpy/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void saxpy(float* Z, float A, float* X, float* Y) { 4 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 5 | Z[id] = A * X[id] + Y[id]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/22_json_mixed_args/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 3 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 4 | Z[id] = A * X[id] + Y[id]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/28_json2/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict C, __global int* restrict A, __global int* restrict B) { 2 | size_t id = get_global_id(0); 3 | printf("A[%i] = %i \n", id, A[id]); 4 | C[id] += A[id] + B[id]; 5 | } 6 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/kernel.hip: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 4 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 5 | Z[id] = A * X[id] + Y[id]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/35_json_mixed_args_record_replay/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global int* restrict Z, __global int* restrict X, __global int* restrict Y, int A) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /apps/vecadd/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 2 | int id = blockIdx.x * blockDim.x + threadIdx.x; 3 | C[id] = A[id] + B[id]; 4 | } 5 | 6 | extern "C" __global__ void empty(int* A, int* B, int* C) { 7 | } 8 | -------------------------------------------------------------------------------- /tests/30_task_info/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 2 | size_t id = get_global_id(0); 3 | printf("A[%i] = %i \n", id, A[id]); 4 | C[id] = A[id] + B[id]; 5 | } 6 | -------------------------------------------------------------------------------- /tests/20_cublas/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test20_cublas 2 | 3 | CFLAGS += -I ${MATH_LIBS}/include -I ${NVIDIA_PATH}/include 4 | LDINC += -L ${MATH_LIBS}/lib64 -L ${NVIDIA_PATH}/lib64 5 | LDFLAGS += -lcublas -lcuda 6 | 7 | include ../Makefile.tests 8 | 9 | -------------------------------------------------------------------------------- /utils/build_host.sh: -------------------------------------------------------------------------------- 1 | VCMAKE="cmake3" 2 | if ! command -v cmake3 &> /dev/null 3 | then 4 | VCMAKE=cmake 5 | fi 6 | set -x; 7 | ${VCMAKE} ../ -DCMAKE_CXX_FLAGS="-g -fPIC" -DCMAKE_C_FLAGS="-g -fPIC" -DCMAKE_INSTALL_PREFIX=$PWD/../install_host $@ 8 | -------------------------------------------------------------------------------- /apps/auto_dag_creation/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void saxpy(float* Z, float A, float* X, float* Y) { 4 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 5 | Z[id] = A * X[id] + Y[id]; 6 | } 7 | 8 | -------------------------------------------------------------------------------- /tests/35_json_mixed_args_record_replay/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void saxpy(int* Z, int* X, int* Y, int A) { 3 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 4 | Z[id] = A * X[id] + Y[id]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /tests/36_double_json_mixed_args_record_replay/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global double* restrict Z, __global double* restrict X, __global double* restrict Y, double A) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | -------------------------------------------------------------------------------- /.gitlab-ci-scripts/runner_watcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | WATCH_ID=$1 3 | 4 | echo Watcher is watching: $WATCH_ID 5 | tail --pid $WATCH_ID -f /dev/null 6 | sleep 60 7 | echo Stopping slurm job: $(cat slurm.job) 8 | [ -s "slurm.job" ] && cat slurm.job | xargs scancel -------------------------------------------------------------------------------- /tests/01_init_finalize/test01_init_finalize.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, 1); 7 | iris_finalize(); 8 | return iris_error_count(); 9 | } 10 | -------------------------------------------------------------------------------- /apps/dagger/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | dag.png 3 | dagger-graphs/ 4 | dagger-payloads/ 5 | dagger-results/ 6 | dagger_runner 7 | dagger_test 8 | graph.json 9 | kernel.hip 10 | kernel.ptx 11 | src/ 12 | /daggerpy/ 13 | dag.pdf 14 | *.csv 15 | test-out.txt 16 | -------------------------------------------------------------------------------- /apps/qiree_backend/test/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: my_library test_quiree 3 | 4 | my_library: 5 | gcc -fPIC -shared -o libexample.so my_library.c 6 | 7 | test_quiree: 8 | gcc -o test_quiree test_quiree.c -ldl 9 | 10 | clean: 11 | rm libexample.so test_quiree 12 | -------------------------------------------------------------------------------- /tests/15_graph/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test15_graph 2 | 3 | all: $(TEST) kernel.openmp.so kernel.ptx kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | -------------------------------------------------------------------------------- /docs/sphinx/environment.yml: -------------------------------------------------------------------------------- 1 | name: IRIS-docs 2 | 3 | channels: 4 | - conda-forge 5 | - defaults 6 | 7 | dependencies: 8 | - python 9 | - breathe 10 | - pip 11 | - numpy 12 | - pip: 13 | - sphinxcontrib-contentui 14 | - sphinx_rtd_theme 15 | 16 | -------------------------------------------------------------------------------- /tests/09_dataflow/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test09_dataflow 2 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 3 | 4 | include ../Makefile.tests 5 | 6 | kernel.openmp.so: kernel.openmp.c 7 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 8 | 9 | -------------------------------------------------------------------------------- /tests/30_task_info/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test30_task_info 2 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 3 | 4 | include ../Makefile.tests 5 | 6 | kernel.openmp.so: kernel.openmp.c 7 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 8 | 9 | -------------------------------------------------------------------------------- /tests/36_double_json_mixed_args_record_replay/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void saxpy(double* Z, double* X, double* Y, double A) { 3 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 4 | Z[id] = A * X[id] + Y[id]; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /include/iris/iris.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_INCLUDE_IRIS_IRIS_H 2 | #define IRIS_INCLUDE_IRIS_IRIS_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | #include 8 | #endif //__cplusplus 9 | 10 | #endif /* IRIS_INCLUDE_IRIS_IRIS_H */ 11 | 12 | -------------------------------------------------------------------------------- /src/runtime/Retainable.cpp: -------------------------------------------------------------------------------- 1 | unsigned long iris_create_new_uid() { 2 | static unsigned long uid = 1UL; 3 | unsigned long new_uid; 4 | do { 5 | new_uid = uid + 1; 6 | } while (!__sync_bool_compare_and_swap(&uid, uid, new_uid)); 7 | return new_uid; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /tests/28_json2/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test28_json2 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/33_graph_cpp/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test33_graph_cpp 2 | CPP=1 3 | all: $(TEST) kernel.openmp.so kernel.ptx kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | -------------------------------------------------------------------------------- /tests/14_permanent_task/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test14_permanent_task 2 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 3 | 4 | include ../Makefile.tests 5 | 6 | kernel.openmp.so: kernel.openmp.c 7 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 8 | 9 | -------------------------------------------------------------------------------- /tests/25_random_tasks/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void add1(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(A) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | A[i]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/14_permanent_task/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(A) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | A[i]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /include/iris/iris_errno.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_INCLUDE_IRIS_IRIS_ERRNO_H 2 | #define IRIS_INCLUDE_IRIS_IRIS_ERRNO_H 3 | 4 | #define IRIS_SUCCESS 0 5 | #define IRIS_ERROR -1 6 | #define IRIS_WARNING -2 7 | 8 | #endif /* IRIS_INCLUDE_IRIS_IRIS_ERRNO_H */ 9 | 10 | -------------------------------------------------------------------------------- /tests/10_multikernelexecution/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test10_multikernelexecution 2 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 3 | 4 | include ../Makefile.tests 5 | 6 | kernel.openmp.so: kernel.openmp.c 7 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 8 | 9 | -------------------------------------------------------------------------------- /tests/15_graph/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/16_task_host/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test16_task_host 2 | 3 | all: $(TEST) kernel.openmp.so kernel.ptx kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/18_record/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/19_replay/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/33_graph_cpp/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id]++; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/custom_policy/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void setid(int* mem, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(mem) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | mem[i] = i; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/17_json/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id] = _id; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/22_json_mixed_args/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test22_json_mixed_args 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/27_deadlock2/Makefile: -------------------------------------------------------------------------------- 1 | CPP=1 2 | 3 | TEST=test27_deadlock2 4 | 5 | all: $(TEST) kernel.openmp.so kernel.hip kernel.ptx 6 | 7 | include ../Makefile.tests 8 | 9 | kernel.openmp.so: kernel.openmp.c 10 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 11 | -------------------------------------------------------------------------------- /apps/vecadd/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 4 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 5 | C[i] = A[i] + B[i]; 6 | } 7 | 8 | extern "C" __global__ void empty(int* A, int* B, int* C) { 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/04_enclosing_targets/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test04_enclosing_targets 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/04_enclosing_targets/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void loop0(__global int* restrict A) { 2 | size_t i = get_global_id(0); 3 | A[i] *= 2; 4 | } 5 | 6 | __kernel void loop1(__global int* restrict B, __global int* restrict A) { 7 | size_t i = get_global_id(0); 8 | B[i] += A[i]; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/29_data_mem/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id] = _id; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/2tasks/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void kernel0(__global float* dst, __global float* src) { 2 | int id = get_global_id(0); 3 | dst[id] = src[id]; 4 | } 5 | 6 | __kernel void kernel1(__global float* dst, __global float* src) { 7 | int id = get_global_id(0); 8 | dst[id] += src[id]; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/vecadd/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 2 | size_t id = get_global_id(0); 3 | C[id] = A[id] + B[id]; 4 | } 5 | 6 | __kernel void empty(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 7 | } 8 | -------------------------------------------------------------------------------- /tests/04_enclosing_targets/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void loop0(int* A) { 2 | int id = blockIdx.x * blockDim.x + threadIdx.x; 3 | A[id] *= 2; 4 | } 5 | 6 | extern "C" __global__ void loop1(int* B, int* A) { 7 | int id = blockIdx.x * blockDim.x + threadIdx.x; 8 | B[id] += A[id]; 9 | } 10 | -------------------------------------------------------------------------------- /tests/07_policy_register/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id] = _id; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/27_deadlock2/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy(int* dst, int *src, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t i; 5 | #pragma omp parallel for shared(dst, src) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | dst[i] = src[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/28_json2/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* C, int* A, int* B, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] += A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/34_set_mem/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] = A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /src/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | from .iris import * 2 | 3 | import sys 4 | import types 5 | 6 | module = sys.modules[__name__] 7 | 8 | for attr in dir(module): 9 | if isinstance(getattr(module, attr), types.FunctionType): 10 | globals()[attr] = getattr(module, attr) 11 | 12 | del sys, types, module 13 | -------------------------------------------------------------------------------- /tests/18_record/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test18_record 2 | 3 | all: $(TEST) kernel.openmp.so kernel.hip kernel.ptx 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | cleanjson: 11 | rm -f *.json 12 | -------------------------------------------------------------------------------- /tests/19_replay/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test19_replay 2 | 3 | all: $(TEST) kernel.openmp.so kernel.hip kernel.ptx 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | cleanjson: 11 | rm -f *.json 12 | -------------------------------------------------------------------------------- /tests/30_task_info/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] = A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/35_json_mixed_args_record_replay/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test35_json_mixed_args_record_replay 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/16_task_host/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, int* factor, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A, factor) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id] = _id * factor[0]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/2tasks/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void kernel0(float* dst, float* src) { 2 | int id = blockIdx.x * blockDim.x + threadIdx.x; 3 | dst[id] = src[id]; 4 | } 5 | 6 | extern "C" __global__ void kernel1(float* dst, float* src) { 7 | int id = blockIdx.x * blockDim.x + threadIdx.x; 8 | dst[id] += src[id]; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/saxpy/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern void saxpy(float* Z, float A, float* X, float* Y, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t i; 5 | #pragma omp parallel for shared(Z, A, X, Y) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | Z[i] = A * X[i] + Y[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/22_json_mixed_args/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void saxpy(int* Z, int* X, int* Y, int A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t id; 5 | #pragma omp parallel for shared(Z, X, Y) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(id) 7 | Z[id] = A * X[id] + Y[id]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/36_double_json_mixed_args_record_replay/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test36_double_json_mixed_args_record_replay 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | -------------------------------------------------------------------------------- /tests/23_multigraph/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test23_multigraph 2 | 3 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | run: $(TEST) kernel.ptx 11 | ./$(TEST) 4096 6 5 12 | -------------------------------------------------------------------------------- /tests/04_enclosing_targets/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void loop0(int* A) { 4 | int id = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[id] *= 2; 6 | } 7 | 8 | extern "C" __global__ void loop1(int* B, int* A) { 9 | int id = blockIdx.x * blockDim.x + threadIdx.x; 10 | B[id] += A[id]; 11 | } 12 | -------------------------------------------------------------------------------- /tests/26_env_set/Makefile: -------------------------------------------------------------------------------- 1 | CPP=1 2 | 3 | TEST=test26_env_set 4 | all: $(TEST) kernel.ptx kernel-negative.ptx 5 | 6 | include ../Makefile.tests 7 | 8 | ifeq ($(NVCC_TEST),) 9 | kernel-negative.ptx: kernel-negative.cu 10 | @echo "No NVCC compiler found" 11 | else 12 | kernel-negative.ptx: kernel-negative.cu 13 | $(NVCC) -ptx $^ 14 | endif 15 | -------------------------------------------------------------------------------- /tests/17_json/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test17_json 2 | 3 | all: $(TEST) kernel.openmp.so kernel.hip kernel.ptx 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | clean: 11 | rm -f kernel.ptx kernel.hip kernel.openmp.so test17_json 12 | -------------------------------------------------------------------------------- /tests/35_json_mixed_args_record_replay/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void saxpy(int* Z, int* X, int* Y, int A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t id; 5 | #pragma omp parallel for shared(Z, X, Y) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(id) 7 | Z[id] = A * X[id] + Y[id]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/05_snapdragon/kernel.omp.mk: -------------------------------------------------------------------------------- 1 | IRIS=$(HOME)/work/iris-rts 2 | 3 | LOCAL_PATH := $(call my-dir) 4 | 5 | include $(CLEAR_VARS) 6 | LOCAL_MODULE:=kernel.omp.so 7 | LOCAL_MODULE_FILENAME:=kernel.omp 8 | LOCAL_C_INCLUDES:=$(IRIS)/include 9 | LOCAL_SRC_FILES:= $(IRIS)/apps/saxpy/kernel.omp.c 10 | LOCAL_LDFLAGS:=-fopenmp 11 | include $(BUILD_SHARED_LIBRARY) 12 | 13 | -------------------------------------------------------------------------------- /tests/25_random_tasks/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test25_random_tasks 2 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 3 | 4 | include ../Makefile.tests 5 | 6 | kernel.openmp.so: kernel.openmp.c 7 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 8 | 9 | clean: 10 | rm -f kernel.ptx kernel.hip kernel.openmp.so test25_random_tasks 11 | -------------------------------------------------------------------------------- /apps/helloworld/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void uppercase(char* b, char* a, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(b, a) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | if (a[i] >= 'a' && a[i] <= 'z') b[i] = a[i] + 'A' - 'a'; 8 | else b[i] = a[i]; 9 | IRIS_OPENMP_KERNEL_END 10 | } 11 | 12 | -------------------------------------------------------------------------------- /include/iris/iris_hexagon_imp.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_INCLUDE_IRIS_HEXAGON_IMP_H 2 | #define IRIS_INCLUDE_IRIS_HEXAGON_IMP_H 3 | 4 | #define IRIS_HEXAGON_KERNEL_ARGS int32 _off, int32 _ndr 5 | #define IRIS_HEXAGON_KERNEL_BEGIN(i) for (i = _off; i < _off + _ndr; i++) { 6 | #define IRIS_HEXAGON_KERNEL_END } 7 | 8 | #endif /* IRIS_INCLUDE_IRIS_HEXAGON_IMP_H */ 9 | 10 | -------------------------------------------------------------------------------- /tests/36_double_json_mixed_args_record_replay/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void saxpy(double* Z, double* X, double* Y, double A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t id; 5 | #pragma omp parallel for shared(Z, X, Y) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(id) 7 | Z[id] = A * X[id] + Y[id]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | -------------------------------------------------------------------------------- /apps/2tasks/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: 2tasks kernel.openmp.so kernel.ptx 4 | 5 | 2tasks: 2tasks.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | kernel.ptx: kernel.cu 9 | $(NVCC) -ptx $^ 10 | 11 | kernel.openmp.so: kernel.openmp.c 12 | $(CC) $(CFLAGS) -O3 -fopenmp -fPIC -shared -I. -o $@ $^ 13 | 14 | clean: 15 | rm -f 2tasks kernel.openmp.so 16 | -------------------------------------------------------------------------------- /apps/vecadd/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] = A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | void empty(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 12 | } 13 | 14 | -------------------------------------------------------------------------------- /apps/benchmarking/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef SC20_TIMER_H 2 | #define SC20_TIMER_H 3 | 4 | #include 5 | 6 | double now() { 7 | static double boot = 0.0; 8 | struct timespec t; 9 | clock_gettime(CLOCK_REALTIME, &t); 10 | if (boot == 0.0) boot = t.tv_sec + 1.e-9 * t.tv_nsec; 11 | return t.tv_sec + 1.e-9 * t.tv_nsec - boot; 12 | } 13 | 14 | #endif /* end of SC20_TIMER_H */ 15 | 16 | -------------------------------------------------------------------------------- /apps/dagger/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef DAGGER_TIMER_H 2 | #define DAGGER_TIMER_H 3 | 4 | #include 5 | 6 | double now() { 7 | static double boot = 0.0; 8 | struct timespec t; 9 | clock_gettime(CLOCK_REALTIME, &t); 10 | if (boot == 0.0) boot = t.tv_sec + 1.e-9 * t.tv_nsec; 11 | return t.tv_sec + 1.e-9 * t.tv_nsec - boot; 12 | } 13 | 14 | #endif /* end of DAGGER_TIMER_H */ 15 | 16 | -------------------------------------------------------------------------------- /tests/23_multigraph/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void ijk(__global double* C, __global double* A, __global double* B) { 2 | size_t i = get_global_id(0); 3 | size_t j = get_global_id(1); 4 | size_t SIZE = get_global_size(0); 5 | 6 | double sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | -------------------------------------------------------------------------------- /include/iris/iris_poly_types.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_INCLUDE_IRIS_POLY_TYPES_H 2 | #define IRIS_INCLUDE_IRIS_POLY_TYPES_H 3 | 4 | #include 5 | 6 | typedef struct { 7 | size_t typesz; 8 | size_t s0; 9 | size_t s1; 10 | size_t r0; 11 | size_t r1; 12 | size_t w0; 13 | size_t w1; 14 | int dim; 15 | } iris_poly_mem; 16 | 17 | #endif /* IRIS_INCLUDE_IRIS_POLY_TYPES_H */ 18 | 19 | -------------------------------------------------------------------------------- /scheduling-policies/graph-prediction/generate_baseline_heatmap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DAGGER just symbolically linked here for ease of use. 4 | #ln -s ../../apps/dagger dagger 5 | 6 | # Run the benchmark evaluation 7 | ./run-baseline-evaluation.sh 8 | 9 | # Plot the result 10 | python3 ./dagger/gantt/heatmap.py --output-file baseline-heatmap.pdf --directory ./results/ --height=5 11 | 12 | -------------------------------------------------------------------------------- /tests/32_json3/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef DAGGER_TIMER_H 2 | #define DAGGER_TIMER_H 3 | 4 | #include 5 | 6 | double now() { 7 | static double boot = 0.0; 8 | struct timespec t; 9 | clock_gettime(CLOCK_REALTIME, &t); 10 | if (boot == 0.0) boot = t.tv_sec + 1.e-9 * t.tv_nsec; 11 | return t.tv_sec + 1.e-9 * t.tv_nsec - boot; 12 | } 13 | 14 | #endif /* end of DAGGER_TIMER_H */ 15 | 16 | -------------------------------------------------------------------------------- /scheduling-policies/gnn-graph-prediction/generate_baseline_heatmap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DAGGER just symbolically linked here for ease of use. 4 | #ln -s ../../apps/dagger dagger 5 | 6 | # Run the benchmark evaluation 7 | ./run-baseline-evaluation.sh 8 | 9 | # Plot the result 10 | python3 ./dagger/gantt/heatmap.py --output-file baseline-heatmap.pdf --directory ./results/ --height=5 11 | 12 | -------------------------------------------------------------------------------- /src/runtime/Filter.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_FILTER_H 2 | #define IRIS_SRC_RT_FILTER_H 3 | 4 | namespace iris { 5 | namespace rt { 6 | 7 | class Task; 8 | 9 | class Filter { 10 | public: 11 | Filter() {} 12 | virtual ~Filter() {} 13 | 14 | virtual int Execute(Task* task) = 0; 15 | }; 16 | 17 | } /* namespace rt */ 18 | } /* namespace iris */ 19 | 20 | #endif /* IRIS_SRC_RT_FILTER_H */ 21 | 22 | -------------------------------------------------------------------------------- /apps/dgemm/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void ijk(__global double* restrict C, __global double* restrict A, __global double* restrict B) { 2 | size_t i = get_global_id(1); 3 | size_t j = get_global_id(0); 4 | size_t SIZE = get_global_size(0); 5 | 6 | double sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /apps/sgemm/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void ijk(__global float* restrict C, __global float* restrict A, __global float* restrict B) { 2 | size_t i = get_global_id(1); 3 | size_t j = get_global_id(0); 4 | size_t SIZE = get_global_size(0); 5 | 6 | float sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /apps/dgemm/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 2 | size_t i = blockIdx.y * blockDim.y + threadIdx.y; 3 | size_t j = blockIdx.x * blockDim.x + threadIdx.x; 4 | size_t SIZE = gridDim.y * blockDim.y; 5 | 6 | double sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /apps/sgemm/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void ijk(float* C, float* A, float* B) { 2 | size_t i = blockIdx.y * blockDim.y + threadIdx.y; 3 | size_t j = blockIdx.x * blockDim.x + threadIdx.x; 4 | size_t SIZE = gridDim.y * blockDim.y; 5 | 6 | float sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /tests/23_multigraph/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 2 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 3 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 4 | size_t SIZE = gridDim.x * blockDim.x; 5 | 6 | double sum = 0.0; 7 | for (size_t k = 0; k < SIZE; k++) { 8 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 9 | } 10 | C[i * SIZE + j] = sum; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /src/runtime/PolicyBlockCycle.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Policy.h" 4 | 5 | namespace iris { 6 | namespace rt { 7 | 8 | class PolicyBlockCycle : public Policy { 9 | public: 10 | PolicyBlockCycle(Scheduler* scheduler); 11 | virtual ~PolicyBlockCycle(); 12 | 13 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 14 | }; 15 | 16 | } /* namespace rt */ 17 | } /* namespace iris */ 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /utils/android_deps.min: -------------------------------------------------------------------------------- 1 | SUPPORTED_VS = $(default_VS) 2 | 3 | # must list all the dependencies of this project 4 | DEPENDENCIES = \ 5 | RPCMEM \ 6 | 7 | # each dependency needs a directory definition 8 | # the form is _DIR 9 | # for example: 10 | # DEPENDENCIES = FOO 11 | # FOO_DIR = $(HEXAGON_SDK_ROOT)/examples/common/foo 12 | # 13 | RPCMEM_DIR = $(HEXAGON_SDK_ROOT)/libs/common/rpcmem 14 | 15 | 16 | -------------------------------------------------------------------------------- /apps/vecadd/build_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | WORKING_DIRECTORY=`pwd` 3 | 4 | #IRIS 5 | cd ../.. ; ./build.sh; 6 | cd $WORKING_DIRECTORY 7 | 8 | #Charm-SYCL 9 | git clone -b irisv3 git@code.ornl.gov:fujita/charm-sycl 10 | [ $? -ne 0 ] && exit 11 | cp build_charm_sycl.sh charm-sycl 12 | cd charm-sycl 13 | ./build_charm_sycl.sh 14 | [ $? -ne 0 ] && exit 15 | 16 | cd $WORKING_DIRECTORY 17 | echo "todo dpc++ and adaptivecpp!" 18 | -------------------------------------------------------------------------------- /tests/09_dataflow/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void kernel_A(__global int* restrict AB) { 2 | size_t i = get_global_id(0); 3 | AB[i] = i; 4 | } 5 | 6 | __kernel void kernel_B(__global int* restrict AB, __global int* restrict BC) { 7 | size_t i = get_global_id(0); 8 | BC[i] = AB[i] * 10; 9 | } 10 | 11 | __kernel void kernel_C(__global int* restrict BC) { 12 | size_t i = get_global_id(0); 13 | BC[i] = BC[i] * 2; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /tests/37_opencl_icd/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void kernel_A(__global int* restrict AB) { 2 | size_t i = get_global_id(0); 3 | AB[i] = i; 4 | } 5 | 6 | __kernel void kernel_B(__global int* restrict AB, __global int* restrict BC) { 7 | size_t i = get_global_id(0); 8 | BC[i] = AB[i] * 10; 9 | } 10 | 11 | __kernel void kernel_C(__global int* restrict BC) { 12 | size_t i = get_global_id(0); 13 | BC[i] = BC[i] * 2; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/runtime/Config.h.in: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_RT_SRC_CONFIG_H 2 | #define IRIS_RT_SRC_CONFIG_H 3 | 4 | #define IRIS_VERSION_MAJOR @IRIS_VERSION_MAJOR@ 5 | #define IRIS_VERSION_MINOR @IRIS_VERSION_MINOR@ 6 | #define IRIS_VERSION_PATCH @IRIS_VERSION_PATCH@ 7 | 8 | 9 | #cmakedefine01 USE_HUB 10 | #cmakedefine01 USE_SIGHANDLER 11 | 12 | #include 13 | #include 14 | 15 | #endif /* IRIS_RT_SRC_CONFIG_H */ 16 | 17 | -------------------------------------------------------------------------------- /tests/38_offset/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 2 | size_t id = get_global_id(0); 3 | C[id] = A[id] + B[id]; 4 | } 5 | __kernel void blockadd(__global int* restrict A, __global int* restrict B, __global int* restrict C, unsigned long SIZE) { 6 | size_t x = get_global_id(0); 7 | size_t y = get_global_id(1); 8 | C[y*SIZE+x] = A[y*SIZE+x] + B[y*SIZE+x]; 9 | } 10 | -------------------------------------------------------------------------------- /apps/dgemm/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: dgemm kernel.openmp.so 4 | 5 | kernel.ptx: kernel.cu 6 | $(NVCC) -ptx $^ 7 | 8 | kernel.hip: kernel.hip.cpp 9 | $(HIPCC) --genco -o $@ $^ 10 | 11 | kernel.openmp.so: kernel.openmp.c 12 | $(CC) $(CFLAGS) -g -std=c99 -fopenmp -fPIC -shared -I. -o $@ $^ 13 | 14 | dgemm: dgemm.c 15 | $(CC) $(CFLAGS) -g -O3 -std=c99 -o $@ $^ $(LDFLAGS) 16 | 17 | clean: 18 | rm -f dgemm kernel.openmp.so 19 | -------------------------------------------------------------------------------- /apps/dgemm/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 4 | size_t i = blockIdx.y * blockDim.y + threadIdx.y; 5 | size_t j = blockIdx.x * blockDim.x + threadIdx.x; 6 | size_t SIZE = gridDim.y * blockDim.y; 7 | 8 | double sum = 0.0; 9 | for (size_t k = 0; k < SIZE; k++) { 10 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 11 | } 12 | C[i * SIZE + j] = sum; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /apps/sgemm/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: sgemm kernel.openmp.so 4 | 5 | kernel.ptx: kernel.cu 6 | $(NVCC) -ptx $^ 7 | 8 | kernel.hip: kernel.hip.cpp 9 | $(HIPCC) --genco -o $@ $^ 10 | 11 | kernel.openmp.so: kernel.openmp.c 12 | $(CC) $(CFLAGS) -g -std=c99 -fopenmp -fPIC -shared -I. -o $@ $^ 13 | 14 | sgemm: sgemm.c 15 | $(CC) $(CFLAGS) -g -O3 -std=c99 -o $@ $^ $(LDFLAGS) 16 | 17 | clean: 18 | rm -f sgemm kernel.openmp.so 19 | -------------------------------------------------------------------------------- /apps/sgemm/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void ijk(float* C, float* A, float* B) { 4 | size_t i = blockIdx.y * blockDim.y + threadIdx.y; 5 | size_t j = blockIdx.x * blockDim.x + threadIdx.x; 6 | size_t SIZE = gridDim.y * blockDim.y; 7 | 8 | float sum = 0.0; 9 | for (size_t k = 0; k < SIZE; k++) { 10 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 11 | } 12 | C[i * SIZE + j] = sum; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /tests/38_offset_subbuffer/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void vecadd(__global int* restrict A, __global int* restrict B, __global int* restrict C) { 2 | size_t id = get_global_id(0); 3 | C[id] = A[id] + B[id]; 4 | } 5 | __kernel void blockadd(__global int* restrict A, __global int* restrict B, __global int* restrict C, unsigned long SIZE) { 6 | size_t x = get_global_id(0); 7 | size_t y = get_global_id(1); 8 | C[y*SIZE+x] = A[y*SIZE+x] + B[y*SIZE+x]; 9 | } 10 | -------------------------------------------------------------------------------- /tests/23_multigraph/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 3 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 4 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 5 | size_t SIZE = gridDim.x * blockDim.x; 6 | 7 | double sum = 0.0; 8 | for (size_t k = 0; k < SIZE; k++) { 9 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 10 | } 11 | C[i * SIZE + j] = sum; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /src/runtime/TGPolicy.cpp: -------------------------------------------------------------------------------- 1 | #include "TGPolicy.h" 2 | #include "Command.h" 3 | #include "Task.h" 4 | #include "Debug.h" 5 | #include "Scheduler.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | TGPolicy::TGPolicy() { 11 | } 12 | 13 | bool TGPolicy::IsKernelSupported(Task *task, Device *dev) { 14 | return task->IsKernelSupported(dev); 15 | } 16 | 17 | TGPolicy::~TGPolicy() { 18 | } 19 | 20 | 21 | } /* namespace rt */ 22 | } /* namespace iris */ 23 | 24 | 25 | -------------------------------------------------------------------------------- /apps/custom_policy/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: custom_policy kernel.openmp.so libPolicyGWS.so 4 | 5 | custom_policy: custom_policy.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | kernel.openmp.so: kernel.openmp.c 9 | $(CC) $(CFLAGS) -O3 -fopenmp -fPIC -shared -I. -o $@ $^ 10 | 11 | libPolicyGWS.so: PolicyGWS.cpp 12 | $(CXX) $(CXXFLAGS) -std=c++11 -fPIC -shared -o $@ $^ 13 | 14 | clean: 15 | rm -f custom_policy kernel.openmp.so libPolicyGWS.so 16 | -------------------------------------------------------------------------------- /apps/qiree_backend/qiree_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import iris 4 | import numpy as np 5 | import sys 6 | 7 | iris.init() 8 | 9 | SIZE = 8 if len(sys.argv) == 1 else int(sys.argv[1]) 10 | A = 10.0 11 | 12 | n = 64 13 | tasks = [iris.task() for i in range(n)] 14 | #task = iris.task() 15 | 16 | for i in range(n): 17 | tasks[i].kernel("bell.ll", 1, [], [SIZE], [], [] , [] ) 18 | tasks[i].submit(iris.iris_default, sync=0) 19 | 20 | iris.finalize() 21 | 22 | -------------------------------------------------------------------------------- /tests/09_dataflow/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ 2 | void kernel_A(int* AB) { 3 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 4 | AB[i] = i; 5 | } 6 | 7 | extern "C" __global__ 8 | void kernel_B(int* AB, int* BC) { 9 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 10 | BC[i] = AB[i] * 10; 11 | } 12 | 13 | extern "C" __global__ 14 | void kernel_C(int* BC) { 15 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 16 | BC[i] = BC[i] * 2; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /utils/build_android.sh: -------------------------------------------------------------------------------- 1 | VCMAKE="cmake3" 2 | if ! command -v cmake3 &> /dev/null 3 | then 4 | VCMAKE=cmake 5 | fi 6 | set -x; 7 | echo "Extra args: $@" 8 | ${VCMAKE} -DCMAKE_C_FLAGS="-fPIC -g" -DCMAKE_CXX_FLAGS="-fPIC -g" -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DSNAPDRAGON=ON -DANDROID_PLATFORM=28 -DANDROID_ABI=arm64-v8a -DANDROID_ARM_NEON=ON -DUSE_NDK=ON -DCMAKE_INSTALL_PREFIX=$PWD/../install -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON .. $@ 9 | -------------------------------------------------------------------------------- /src/runtime/LoaderOpenMP.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_OPENMP_H 2 | #define IRIS_SRC_RT_LOADER_OPENMP_H 3 | 4 | #include "Loader.h" 5 | #include "HostInterface.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class LoaderOpenMP : public HostInterfaceClass { 11 | public: 12 | LoaderOpenMP(); 13 | ~LoaderOpenMP(); 14 | 15 | int LoadFunctions(); 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_LOADER_OPENMP_H */ 22 | 23 | -------------------------------------------------------------------------------- /apps/dgemm/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void ijk(double* C, double* A, double* B, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN (i) 7 | for (int j = 0; j < _ndr; j++) { 8 | double sum = 0.0; 9 | for (int k = 0; k < _ndr; k++) { 10 | sum += A[i * _ndr + k] * B[k * _ndr + j]; 11 | } 12 | C[i * _ndr + j] = sum; 13 | } 14 | IRIS_OPENMP_KERNEL_END 15 | } 16 | 17 | -------------------------------------------------------------------------------- /apps/sgemm/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void ijk(float* C, float* A, float* B, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN (i) 7 | for (int j = 0; j < _ndr; j++) { 8 | float sum = 0.0; 9 | for (int k = 0; k < _ndr; k++) { 10 | sum += A[i * _ndr + k] * B[k * _ndr + j]; 11 | } 12 | C[i * _ndr + j] = sum; 13 | } 14 | IRIS_OPENMP_KERNEL_END 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/runtime/PolicyData.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_DATA_H 2 | #define IRIS_SRC_RT_POLICY_DATA_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyData : public Policy { 10 | public: 11 | PolicyData(Scheduler* scheduler); 12 | virtual ~PolicyData(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_POLICY_DATA_H */ 22 | -------------------------------------------------------------------------------- /src/runtime/LoaderHost2HIP.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_HOST2HIP_H 2 | #define IRIS_SRC_RT_LOADER_HOST2HIP_H 3 | 4 | #include "Loader.h" 5 | #include "HostInterface.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class LoaderHost2HIP : public HostInterfaceClass { 11 | public: 12 | LoaderHost2HIP(); 13 | ~LoaderHost2HIP(); 14 | 15 | int LoadFunctions(); 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_LOADER_HOST2HIP_H */ 22 | 23 | -------------------------------------------------------------------------------- /tests/04_enclosing_targets/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void loop0(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(A) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | A[i] *= 2; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | void loop1(int* B, int* A, IRIS_OPENMP_KERNEL_ARGS) { 12 | int i; 13 | #pragma omp parallel for shared(B,A) private(i) 14 | IRIS_OPENMP_KERNEL_BEGIN(i) 15 | B[i] += A[i]; 16 | IRIS_OPENMP_KERNEL_END 17 | } 18 | -------------------------------------------------------------------------------- /src/runtime/PolicyDevice.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_DEVICE_H 2 | #define IRIS_SRC_RT_POLICY_DEVICE_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyDevice : public Policy { 10 | public: 11 | PolicyDevice(Scheduler* scheduler); 12 | virtual ~PolicyDevice(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | }; 16 | 17 | } /* namespace rt */ 18 | } /* namespace iris */ 19 | 20 | #endif /* IRIS_SRC_RT_POLICY_DEVICE_H */ 21 | -------------------------------------------------------------------------------- /src/runtime/SigHandler.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_SIGHANDLER_H 2 | #define IRIS_SRC_RT_SIGHANDLER_H 3 | 4 | #include 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class SigHandler { 10 | public: 11 | SigHandler(); 12 | ~SigHandler(); 13 | 14 | public: 15 | static void Handle(int signum, siginfo_t* si, void* arg); 16 | 17 | public: 18 | static struct sigaction sa_; 19 | }; 20 | 21 | } /* namespace rt */ 22 | } /* namespace iris */ 23 | 24 | #endif /* IRIS_SRC_RT_SIGHANDLER_H */ 25 | -------------------------------------------------------------------------------- /apps/helloworld/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void saxpy(float* Z, float A, float* X, float* Y) { 4 | size_t id = blockIdx.x * blockDim.x + threadIdx.x; 5 | Z[id] = A * X[id] + Y[id]; 6 | } 7 | 8 | extern "C" __global__ void saxpy_with_offsets(float* Z, float A, float* X, float* Y, size_t blockOff_x, size_t blockOff_y, size_t blockOff_z) { 9 | size_t id = (blockOff_x + blockIdx.x) * blockDim.x + threadIdx.x; 10 | Z[id] = A * X[id] + Y[id]; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /src/runtime/LoaderHost2CUDA.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_HOST2CUDA_H 2 | #define IRIS_SRC_RT_LOADER_HOST2CUDA_H 3 | 4 | #include "Loader.h" 5 | 6 | #include "HostInterface.h" 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class LoaderHost2CUDA : public HostInterfaceClass { 12 | public: 13 | LoaderHost2CUDA(); 14 | ~LoaderHost2CUDA(); 15 | 16 | int LoadFunctions(); 17 | }; 18 | 19 | } /* namespace rt */ 20 | } /* namespace iris */ 21 | 22 | #endif /* IRIS_SRC_RT_LOADER_HOST2CUDA_H */ 23 | 24 | -------------------------------------------------------------------------------- /src/runtime/PolicyRandom.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_RANDOM_H 2 | #define IRIS_SRC_RT_POLICY_RANDOM_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyRandom : public Policy { 10 | public: 11 | PolicyRandom(Scheduler* scheduler); 12 | virtual ~PolicyRandom(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_POLICY_RANDOM_H */ 22 | -------------------------------------------------------------------------------- /src/runtime/PolicyDepend.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_DEPEND_H 2 | #define IRIS_SRC_RT_POLICY_DEPEND_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyDepend: public Policy { 10 | public: 11 | PolicyDepend(Scheduler* scheduler); 12 | virtual ~PolicyDepend(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_POLICY_DEPEND_H */ 22 | 23 | -------------------------------------------------------------------------------- /tests/32_json3/Makefile: -------------------------------------------------------------------------------- 1 | CPP=1 2 | 3 | TEST=test32_json3 4 | all: $(TEST) kernel.openmp.so kernel.ptx kernel.hip 5 | 6 | include ../Makefile.tests 7 | 8 | kernel.openmp.so: kernel.openmp.c 9 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 10 | 11 | kernel.spv: kernel.cl 12 | clang -cc1 -finclude-default-header -triple spir $^ -O0 -emit-llvm-bc -o kernel.bc 13 | llvm-spirv kernel.bc -o $@ 14 | 15 | clean: 16 | rm -f kernel.ptx kernel.spv kernel.hip kernel.openmp.so $(TEST) 17 | -------------------------------------------------------------------------------- /apps/dagger/plot_local_workgroup_sizes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import seaborn as sns 5 | from matplotlib import pyplot as plt 6 | plt.rcParams["figure.figsize"] = [7.00, 3.50] 7 | plt.rcParams["figure.autolayout"] = True 8 | df = pd.read_csv("dagger-results/lws_times.csv") 9 | 10 | print("Local workgroup execution times in csv file:", df) 11 | sns.barplot(data=df,x='size',y='secs',hue='dim') 12 | plt.xticks(rotation=45) 13 | plt.show() 14 | plt.savefig('dagger-graphs/lws_times.pdf') 15 | -------------------------------------------------------------------------------- /src/runtime/PolicyDefault.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_DEFAULT_H 2 | #define IRIS_SRC_RT_POLICY_DEFAULT_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyDefault : public Policy { 10 | public: 11 | PolicyDefault(Scheduler* scheduler); 12 | virtual ~PolicyDefault(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | 16 | }; 17 | 18 | } /* namespace rt */ 19 | } /* namespace iris */ 20 | 21 | #endif /* IRIS_SRC_RT_POLICY_DEFAULT_H */ 22 | -------------------------------------------------------------------------------- /tests/09_dataflow/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ 4 | void kernel_A(int* AB) { 5 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 6 | AB[i] = i; 7 | } 8 | 9 | extern "C" __global__ 10 | void kernel_B(int* AB, int* BC) { 11 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 12 | BC[i] = AB[i] * 10; 13 | } 14 | 15 | extern "C" __global__ 16 | void kernel_C(int* BC) { 17 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 18 | BC[i] = BC[i] * 2; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/signature.def: -------------------------------------------------------------------------------- 1 | IRIS_SINGLE_TASK(task0, "saxpy", target_dev, 1, 2 | NULL_OFFSET, GWS(SIZE), NULL_LWS, 3 | OUT_TASK(Z, int32_t *, int32_t, Z, sizeof(int32_t)*SIZE), 4 | IN_TASK(X, const int32_t *, int32_t, X, sizeof(int32_t)*SIZE), 5 | IN_TASK(Y, const int32_t *, int32_t, Y, sizeof(int32_t)*SIZE), 6 | PARAM(SIZE, int32_t, iris_cpu), 7 | PARAM(A, int32_t), 8 | PARAM(cuUsecPtr, int32_t*, iris_dsp), 9 | PARAM(cuCycPtr, int32_t*, iris_dsp)); 10 | 11 | -------------------------------------------------------------------------------- /apps/benchmarking/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source /auto/software/iris/setup_system.source 3 | 4 | if [ ! -n "$IRIS_INSTALL_ROOT" ]; then 5 | IRIS_INSTALL_ROOT="$HOME/.iris" 6 | fi 7 | 8 | export IRIS_SRC_DIR=../.. 9 | export WORKING_DIR=`pwd` 10 | export SYSTEM=`hostname` 11 | 12 | source $IRIS_INSTALL_ROOT/setup.source 13 | #start with a clean build of iris 14 | rm -f $IRIS_INSTALL_ROOT/lib64/libiris.so ; rm -f $IRIS_INSTALL_ROOT/lib/libiris.so ; 15 | cd $IRIS_SRC_DIR ; ./build.sh; [ $? -ne 0 ] && exit ; cd $WORKING_DIR ; 16 | 17 | -------------------------------------------------------------------------------- /tests/26_env_set/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void add1(__global int* A) { 2 | int i = get_global_id(0); 3 | A[i] = A[i] + 1; 4 | } 5 | 6 | __kernel void add1_v2(__global int* A) { 7 | int i = get_global_id(0); 8 | int a = A[i]; 9 | a++; 10 | a++; 11 | A[i] = a; 12 | } 13 | 14 | __kernel void add2(__global int* A) { 15 | int i = get_global_id(0); 16 | A[i] = A[i] + 2; 17 | } 18 | 19 | __kernel void add2_v2(__global int* A) { 20 | int i = get_global_id(0); 21 | int a = A[i]; 22 | a += 2; 23 | A[i] = a; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /tests/24_multi_kernels/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void add1(__global int* A) { 2 | int i = get_global_id(0); 3 | A[i] = A[i] + 1; 4 | } 5 | 6 | __kernel void add1_v2(__global int* A) { 7 | int i = get_global_id(0); 8 | int a = A[i]; 9 | a++; 10 | a++; 11 | A[i] = a; 12 | } 13 | 14 | __kernel void add2(__global int* A) { 15 | int i = get_global_id(0); 16 | A[i] = A[i] + 2; 17 | } 18 | 19 | __kernel void add2_v2(__global int* A) { 20 | int i = get_global_id(0); 21 | int a = A[i]; 22 | a += 2; 23 | A[i] = a; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /tests/26_env_set/kernel-negative.cl: -------------------------------------------------------------------------------- 1 | __kernel void add1(__global int* A) { 2 | int i = get_global_id(0); 3 | A[i] = A[i] - 1; 4 | } 5 | 6 | __kernel void add1_v2(__global int* A) { 7 | int i = get_global_id(0); 8 | int a = A[i]; 9 | a--; 10 | a--; 11 | A[i] = a; 12 | } 13 | 14 | __kernel void add2(__global int* A) { 15 | int i = get_global_id(0); 16 | A[i] = A[i] - 2; 17 | } 18 | 19 | __kernel void add2_v2(__global int* A) { 20 | int i = get_global_id(0); 21 | int a = A[i]; 22 | a -= 2; 23 | A[i] = a; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /tests/06_device/test06_device.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import iris 4 | 5 | iris.init(True) 6 | 7 | nplatforms = iris.platform_count() 8 | for i in range(nplatforms): 9 | name = iris.platform_info(i, iris.iris_name) 10 | print "platform[", i, "] name[", name, "]" 11 | 12 | ndevs = iris.device_count() 13 | for i in range(ndevs): 14 | vendor = iris.device_info(i, iris.iris_vendor) 15 | name = iris.device_info(i, iris.iris_name) 16 | print "device[", i, "] vendor[", vendor, "] name[", name, "]" 17 | 18 | iris.finalize() 19 | 20 | -------------------------------------------------------------------------------- /apps/2tasks/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void kernel0(float* dst, float* src, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | #pragma omp parallel for shared(dst, src) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | dst[i] = src[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | static void kernel1(float* dst, float* src, IRIS_OPENMP_KERNEL_ARGS) { 12 | int i; 13 | #pragma omp parallel for shared(dst, src) private(i) 14 | IRIS_OPENMP_KERNEL_BEGIN(i) 15 | dst[i] += src[i]; 16 | IRIS_OPENMP_KERNEL_END 17 | } 18 | 19 | -------------------------------------------------------------------------------- /utils/build_copy_device.sh: -------------------------------------------------------------------------------- 1 | rm -rf ship ; rm -rf build ; mkdir -p ship ; mkdir -p build ; cd build/; sh $IRIS/utils/build_android.sh -DCMAKE_INSTALL_PREFIX=$PWD/../ship -DCDSP_FLAG=ON $@ ; make -j16; make -j16 install; cd .. ; tar -cvzf install.tar.gz ship ; scp install.tar.gz mcmurdo:~/. ; ssh mcmurdo 'source /home/nqx/setup_android.source ; adb push install.tar.gz /data/local/tmp/. ; adb shell "cd /data/local/tmp/; tar -xvzf install.tar.gz ; "' 2 | echo "Build is installed and copied successfully to mcmurdo snapdragon device in location /data/local/tmp/ship" 3 | -------------------------------------------------------------------------------- /src/runtime/PolicyFirstToFinish.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_FIRST_TO_FINISH_H 2 | #define IRIS_SRC_RT_POLICY_FIRST_TO_FINISH_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyFirstToFinish : public Policy { 10 | public: 11 | PolicyFirstToFinish(Scheduler* scheduler); 12 | virtual ~PolicyFirstToFinish(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | }; 16 | 17 | } /* namespace rt */ 18 | } /* namespace iris */ 19 | 20 | #endif /* IRIS_SRC_RT_POLICY_FIRST_TO_FINISH_H */ 21 | 22 | -------------------------------------------------------------------------------- /apps/dagger/kernel.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i] *= 100; 6 | } 7 | 8 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 9 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 10 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 11 | size_t SIZE = gridDim.x * blockDim.x; 12 | 13 | double sum = 0.0; 14 | for (size_t k = 0; k < SIZE; k++) { 15 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 16 | } 17 | C[i * SIZE + j] = sum; 18 | } 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/23_multigraph/kernel.cl.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void ijk(double* C, double* A, double* B, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(C, A, B) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | size_t SIZE = _bws[0]; 8 | size_t j, k; 9 | for (size_t j = 0; j < SIZE; j++) { 10 | double sum = 0.0; 11 | for (size_t k = 0; k < SIZE; k++) { 12 | sum += A[_id * SIZE + k] * B[k * SIZE + j]; 13 | } 14 | C[_id * SIZE + j] = sum; 15 | } 16 | IRIS_OPENMP_KERNEL_END 17 | } 18 | 19 | -------------------------------------------------------------------------------- /apps/dagger/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void process(int* A) { 4 | int i = blockIdx.x * blockDim.x + threadIdx.x; 5 | A[i] *= 100; 6 | } 7 | 8 | extern "C" __global__ void ijk(double* C, double* A, double* B) { 9 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 10 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 11 | size_t SIZE = gridDim.x * blockDim.x; 12 | 13 | double sum = 0.0; 14 | for (size_t k = 0; k < SIZE; k++) { 15 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 16 | } 17 | C[i * SIZE + j] = sum; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Author: Narasinga Rao Miniskar 3 | # Date: 06/06/2024 4 | # File: pyproject.toml 5 | # Contact: miniskarnr@ornl.gov 6 | # Comment: Files for python pip package 7 | ##################################################### 8 | [tool.project] 9 | name = "iris" 10 | version = "3.0" 11 | description = "IRIS project with CMake build" 12 | authors = ["Narasinga Rao Miniskar "] 13 | 14 | [build-system] 15 | requires = ["setuptools>=42", "wheel", "cmake"] 16 | build-backend = "setuptools.build_meta" 17 | -------------------------------------------------------------------------------- /src/runtime/LoaderHost2OpenCL.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_HOST2OpenCL_H 2 | #define IRIS_SRC_RT_LOADER_HOST2OpenCL_H 3 | 4 | #include "Loader.h" 5 | 6 | #include "HostInterface.h" 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class LoaderHost2OpenCL : public HostInterfaceClass{ 12 | public: 13 | LoaderHost2OpenCL(const char *suffix); 14 | ~LoaderHost2OpenCL(); 15 | 16 | const char* library(); 17 | int LoadFunctions(); 18 | private: 19 | char *suffix_; 20 | }; 21 | 22 | } /* namespace rt */ 23 | } /* namespace iris */ 24 | 25 | #endif /* IRIS_SRC_RT_LOADER_HOST2OpenCL_H */ 26 | 27 | -------------------------------------------------------------------------------- /src/runtime/PolicyShortestDeviceQueue.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_SHORTEST_DEVICE_QUEUE_H 2 | #define IRIS_SRC_RT_POLICY_SHORTEST_DEVICE_QUEUE_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyShortestDeviceQueue : public Policy { 10 | public: 11 | PolicyShortestDeviceQueue(Scheduler* scheduler); 12 | virtual ~PolicyShortestDeviceQueue(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | }; 16 | 17 | } /* namespace rt */ 18 | } /* namespace iris */ 19 | 20 | #endif /* IRIS_SRC_RT_POLICY_SHORTEST_DEVICE_QUEUE_H */ 21 | 22 | -------------------------------------------------------------------------------- /src/runtime/PolicyProfile.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_PROFILE_H 2 | #define IRIS_SRC_RT_POLICY_PROFILE_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class Policies; 10 | 11 | class PolicyProfile : public Policy { 12 | public: 13 | PolicyProfile(Scheduler* scheduler, Policies* policies); 14 | virtual ~PolicyProfile(); 15 | 16 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 17 | 18 | private: 19 | Policies* policies_; 20 | 21 | }; 22 | 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | #endif /* IRIS_SRC_RT_POLICY_PROFILE_H */ 27 | -------------------------------------------------------------------------------- /src/runtime/LoaderQIREE.cpp: -------------------------------------------------------------------------------- 1 | #include "LoaderQIREE.h" 2 | #include "Debug.h" 3 | #include "Platform.h" 4 | 5 | namespace iris { 6 | namespace rt { 7 | 8 | LoaderQIREE::LoaderQIREE() : HostInterfaceClass("LIB_QIREE") { 9 | } 10 | 11 | LoaderQIREE::~LoaderQIREE() { 12 | } 13 | const char * LoaderQIREE::library() { 14 | char* path = NULL; 15 | Platform::GetPlatform()->EnvironmentGet("LIB_QIREE", &path, NULL); 16 | return path; 17 | } 18 | int LoaderQIREE::LoadFunctions() { 19 | LOADFUNC(parse_input_c); 20 | return IRIS_SUCCESS; 21 | } 22 | 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | -------------------------------------------------------------------------------- /src/runtime/Policy.cpp: -------------------------------------------------------------------------------- 1 | #include "Policy.h" 2 | #include "Command.h" 3 | #include "Task.h" 4 | #include "Debug.h" 5 | #include "Scheduler.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | Policy::Policy() { 11 | } 12 | 13 | Policy::~Policy() { 14 | } 15 | 16 | bool Policy::IsKernelSupported(Task *task, Device *dev) { 17 | return task->IsKernelSupported(dev); 18 | } 19 | 20 | void Policy::SetScheduler(Scheduler* scheduler) { 21 | scheduler_ = scheduler; 22 | devs_ = scheduler_->devices(); 23 | ndevs_ = scheduler_->ndevs(); 24 | } 25 | 26 | } /* namespace rt */ 27 | } /* namespace iris */ 28 | 29 | -------------------------------------------------------------------------------- /tests/38_offset/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] = A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | void blockadd(int* A, int* B, int* C, size_t SIZE, IRIS_OPENMP_KERNEL_ARGS) { 11 | size_t i, j; 12 | //size_t SIZE=16; 13 | #pragma omp parallel for shared(C, A, B) private(i, j) 14 | IRIS_OPENMP_KERNEL_BEGIN2D(i, j) 15 | C[i*SIZE+j] = A[i*SIZE+j] + B[i*SIZE+j]; 16 | IRIS_OPENMP_KERNEL_END2D 17 | } 18 | 19 | -------------------------------------------------------------------------------- /src/runtime/PolicyJulia.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_JULIA_H 2 | #define IRIS_SRC_RT_POLICY_JULIA_H 3 | 4 | #include "Policy.h" 5 | #include "iris/iris.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class PolicyJulia: public Policy { 11 | public: 12 | PolicyJulia(Scheduler* scheduler); 13 | virtual ~PolicyJulia(); 14 | 15 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 16 | 17 | private: 18 | int index_; 19 | int32_t *out_devs_; 20 | iris_device *j_devs_; 21 | }; 22 | 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | #endif /* IRIS_SRC_RT_POLICY_JULIA_H */ 27 | 28 | -------------------------------------------------------------------------------- /tests/38_offset/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test38_offset 2 | CPP=1 3 | 4 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 5 | 6 | include ../Makefile.tests 7 | 8 | kernel.openmp.so: kernel.openmp.c 9 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 10 | 11 | ifeq ($(NVCC_TEST),) 12 | kernel.nvopenmp.so: kernel.openmp.c 13 | @echo "No NVCC compiler found" 14 | else 15 | kernel.nvopenmp.so: kernel.openmp.c 16 | $(NVCC) -g -shared -I. ${INCLUDE} ${EXT_INCLUDE} --compiler-options -fPIC -o $@ $^ 17 | endif 18 | 19 | clean: 20 | rm -f vecadd vecadd-iris vecadd-m kernel.ptx kernel.openmp.so kernel.hip 21 | -------------------------------------------------------------------------------- /tests/38_offset_subbuffer/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void vecadd(int* A, int* B, int* C, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t i; 5 | #pragma omp parallel for shared(C, A, B) private(i) 6 | IRIS_OPENMP_KERNEL_BEGIN(i) 7 | C[i] = A[i] + B[i]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | void blockadd(int* A, int* B, int* C, size_t SIZE, IRIS_OPENMP_KERNEL_ARGS) { 11 | size_t i, j; 12 | //size_t SIZE=16; 13 | #pragma omp parallel for shared(C, A, B) private(i, j) 14 | IRIS_OPENMP_KERNEL_BEGIN2D(i, j) 15 | C[i*SIZE+j] = A[i*SIZE+j] + B[i*SIZE+j]; 16 | IRIS_OPENMP_KERNEL_END2D 17 | } 18 | 19 | -------------------------------------------------------------------------------- /tests/24_multi_kernels/kernel-stupid.cl: -------------------------------------------------------------------------------- 1 | __kernel void add1(__global int* A) { 2 | int i = get_global_id(0); 3 | A[i] = A[i] + 2; 4 | A[i] = A[i] - 2; 5 | A[i] = A[i] + 1; 6 | } 7 | 8 | __kernel void add1_v1(__global int* A) { 9 | int i = get_global_id(0); 10 | int a = A[i]; 11 | a++; 12 | A[i] = a; 13 | } 14 | 15 | __kernel void add2(__global int* A) { 16 | int i = get_global_id(0); 17 | A[i] = A[i] + 5; 18 | A[i] = A[i] - 5; 19 | A[i] = A[i] + 2; 20 | } 21 | 22 | __kernel void add2_v2(__global int* A) { 23 | int i = get_global_id(0); 24 | int a = A[i]; 25 | a += 2; 26 | A[i] = a; 27 | } 28 | 29 | -------------------------------------------------------------------------------- /tests/29_data_mem/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test29_data_mem 2 | 3 | all: $(TEST) kernel.openmp.so kernel.hip kernel.ptx kernel.nvopenmp.so 4 | 5 | include ../Makefile.tests 6 | 7 | kernel.openmp.so: kernel.cl.openmp.c 8 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 9 | 10 | ifeq ($(NVCC_TEST),) 11 | kernel.nvopenmp.so: kernel.cl.openmp.c 12 | @echo "No NVCC compiler found" 13 | else 14 | kernel.nvopenmp.so: kernel.cl.openmp.c 15 | $(NVCC) -g -shared -I. ${INCLUDE} ${EXT_INCLUDE} --compiler-options -fPIC -o $@ $^ 16 | endif 17 | 18 | clean: 19 | rm -f kernel.ptx kernel.hip kernel.openmp.so test29_data_mem 20 | -------------------------------------------------------------------------------- /docs/sphinx/source/installation.rst: -------------------------------------------------------------------------------- 1 | Build IRIS 2 | ================== 3 | 4 | IRIS uses `CMake (>= 2.8) `_ for building, testing, and installing the library. 5 | 6 | .. code-block:: bash 7 | 8 | $ git clone git@github.com:ornl/iris.git 9 | $ cd iris 10 | $ mkdir build 11 | $ cd build 12 | $ cmake .. -DCMAKE_INSTALL_PREFIX= # $HOME/.iris is good for the install_path. 13 | $ make -j 14 | $ make install 15 | 16 | To run the tests 17 | 18 | .. code-block:: bash 19 | 20 | $ cd ../tests 21 | $ mkdir build 22 | $ cd build 23 | $ cmake .. 24 | $ make -j 25 | $ make test 26 | 27 | -------------------------------------------------------------------------------- /apps/aiwc_policy/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: libAIWCPolicy.so test_aiwc_policy 4 | 5 | libAIWCPolicy.so: aiwc_policy.cpp aiwc_utils.cpp 6 | $(CXX) $(CXXFLAGS) -fPIC -shared -o $@ $^ -lcrypto $(LDFLAGS) 7 | 8 | test_aiwc_policy: test_aiwc_policy.c 9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | kernel.ptx: kernel.cu 12 | $(NVCC) -ptx $^ 13 | 14 | kernel.hip: kernel.hip.cpp 15 | $(HIPCC) --genco -o $@ $^ 16 | 17 | kernel.openmp.so: kernel.openmp.c 18 | $(CC) $(CFLAGS) -O3 -lgomp -fPIC -shared -I. -o $@ $^ 19 | 20 | clean: 21 | rm -f libAIWCPolicy.so test_aiwc_policy kernel.ptx kernel.hip kernel.openmp.so 22 | 23 | -------------------------------------------------------------------------------- /tests/38_offset_subbuffer/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test38_offset_subbuffer 2 | CPP=1 3 | 4 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 5 | 6 | include ../Makefile.tests 7 | 8 | kernel.openmp.so: kernel.openmp.c 9 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 10 | 11 | ifeq ($(NVCC_TEST),) 12 | kernel.nvopenmp.so: kernel.openmp.c 13 | @echo "No NVCC compiler found" 14 | else 15 | kernel.nvopenmp.so: kernel.openmp.c 16 | $(NVCC) -g -shared -I. ${INCLUDE} ${EXT_INCLUDE} --compiler-options -fPIC -o $@ $^ 17 | endif 18 | 19 | clean: 20 | rm -f vecadd vecadd-iris vecadd-m kernel.ptx kernel.openmp.so kernel.hip 21 | -------------------------------------------------------------------------------- /tests/26_env_set/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ 2 | void add1(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i] = A[i] + 1; 5 | } 6 | 7 | extern "C" __global__ 8 | void add1_v2(int* A) { 9 | int i = blockIdx.x * blockDim.x + threadIdx.x; 10 | int a = A[i]; 11 | a++; 12 | a++; 13 | A[i] = a; 14 | } 15 | 16 | extern "C" __global__ 17 | void add2(int* A) { 18 | int i = blockIdx.x * blockDim.x + threadIdx.x; 19 | A[i] = A[i] + 2; 20 | } 21 | 22 | extern "C" __global__ 23 | void add2_v2(int* A) { 24 | int i = blockIdx.x * blockDim.x + threadIdx.x; 25 | int a = A[i]; 26 | a += 2; 27 | A[i] = a; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /scheduling-policies/aiwc/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: libAIWCPolicy.so test_aiwc_policy 4 | 5 | libAIWCPolicy.so: aiwc_policy.cpp aiwc_utils.cpp 6 | $(CXX) $(CXXFLAGS) -fPIC -shared -o $@ $^ -lcrypto $(LDFLAGS) 7 | 8 | test_aiwc_policy: test_aiwc_policy.c 9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | kernel.ptx: kernel.cu 12 | $(NVCC) -ptx $^ 13 | 14 | kernel.hip: kernel.hip.cpp 15 | $(HIPCC) --genco -o $@ $^ 16 | 17 | kernel.openmp.so: kernel.openmp.c 18 | $(CC) $(CFLAGS) -O3 -lgomp -fPIC -shared -I. -o $@ $^ 19 | 20 | clean: 21 | rm -f libAIWCPolicy.so test_aiwc_policy kernel.ptx kernel.hip kernel.openmp.so 22 | 23 | -------------------------------------------------------------------------------- /src/runtime/FilterTaskSplit.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_FILTER_TASK_SPLIT_H 2 | #define IRIS_SRC_RT_FILTER_TASK_SPLIT_H 3 | 4 | #include "Filter.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class Polyhedral; 10 | class Platform; 11 | 12 | class FilterTaskSplit : public Filter { 13 | public: 14 | FilterTaskSplit(Polyhedral* polyhedral, Platform* platform); 15 | virtual ~FilterTaskSplit(); 16 | 17 | virtual int Execute(Task* task); 18 | 19 | private: 20 | Polyhedral* polyhedral_; 21 | Platform* platform_; 22 | }; 23 | 24 | } /* namespace rt */ 25 | } /* namespace iris */ 26 | 27 | 28 | #endif /* IRIS_SRC_RT_FILTER_TASK_SPLIT_H */ 29 | 30 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/saxpy_ref.cpp: -------------------------------------------------------------------------------- 1 | /**============================================================================= 2 | Copyright (c) 2016 QUALCOMM Technologies Incorporated. 3 | All Rights Reserved Qualcomm Proprietary 4 | =============================================================================**/ 5 | #include 6 | #include 7 | #include 8 | 9 | void saxpy_ref(int32_t *Z, 10 | int32_t *X, 11 | int32_t *Y, 12 | int32_t A, 13 | int32_t size) 14 | 15 | { 16 | int i; 17 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int read_file(const char* path, char** string, size_t* len) { 8 | int fd = open((const char*) path, O_RDONLY); 9 | if (fd == -1) { 10 | *len = 0UL; 11 | return 0; 12 | } 13 | off_t s = lseek(fd, 0, SEEK_END); 14 | *string = (char*) malloc(s); 15 | *len = s; 16 | lseek(fd, 0, SEEK_SET); 17 | ssize_t r = read(fd, *string, s); 18 | if (r != s) { 19 | printf("[%s:%d] read[%zd] vs [%lu]\n", __FILE__, __LINE__, r, s); 20 | return 0; 21 | } 22 | close(fd); 23 | return 1; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /tests/26_env_set/kernel-negative.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ 2 | void add1(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i] = A[i] - 1; 5 | } 6 | 7 | extern "C" __global__ 8 | void add1_v2(int* A) { 9 | int i = blockIdx.x * blockDim.x + threadIdx.x; 10 | int a = A[i]; 11 | a--; 12 | a--; 13 | A[i] = a; 14 | } 15 | 16 | extern "C" __global__ 17 | void add2(int* A) { 18 | int i = blockIdx.x * blockDim.x + threadIdx.x; 19 | A[i] = A[i] - 2; 20 | } 21 | 22 | extern "C" __global__ 23 | void add2_v2(int* A) { 24 | int i = blockIdx.x * blockDim.x + threadIdx.x; 25 | int a = A[i]; 26 | a -= 2; 27 | A[i] = a; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Python Package 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v2 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: '3.8' 22 | 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install -r requirements.txt 27 | 28 | - name: Run tests 29 | run: | 30 | pip install pytest 31 | pytest -------------------------------------------------------------------------------- /src/runtime/LoaderQIREE.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_QIREE_H 2 | #define IRIS_SRC_RT_LOADER_QIREE_H 3 | 4 | #include "Loader.h" 5 | #include "HostInterface.h" 6 | //#include 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class LoaderQIREE : public HostInterfaceClass { 12 | public: 13 | LoaderQIREE(); 14 | ~LoaderQIREE(); 15 | 16 | //const char* library_precheck() { return "cuInit"; } 17 | const char* library();// { return "libqiree.so"; } 18 | int LoadFunctions(); 19 | void (*parse_input_c)(int, char **); 20 | 21 | }; 22 | 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | #endif /* IRIS_SRC_RT_LOADER_QIREE_H */ 27 | 28 | -------------------------------------------------------------------------------- /docs/sphinx/source/api.rst: -------------------------------------------------------------------------------- 1 | .. _c-api: 2 | 3 | C API 4 | ================== 5 | 6 | .. doxygenfile:: iris_runtime.h 7 | :project: C 8 | :path: ../../../include/iris/iris_runtime.h 9 | 10 | .. _cpp-api: 11 | 12 | C++ API 13 | ================== 14 | 15 | .. doxygenfile:: iris.hpp 16 | :project: Cpp 17 | :path: ../../../include/iris/iris.hpp 18 | 19 | .. _fortran-api: 20 | 21 | Fortran API 22 | ================== 23 | 24 | .. .. doxygenfile:: FortranAPI-IRIS.f90 25 | .. :project: F90 26 | .. :path: ../../../src/runtime/FortranAPI-IRIS.f90 27 | 28 | .. _python-api: 29 | 30 | Python API 31 | ================== 32 | 33 | .. automodule:: iris 34 | :members: 35 | 36 | -------------------------------------------------------------------------------- /include/iris/gettime.h: -------------------------------------------------------------------------------- 1 | #ifndef __IRIS_GET_TIME_H__ 2 | #define __IRIS_GET_TIME_H__ 3 | #ifdef __hexagon__ // some defs/stubs so app can build for Hexagon simulation 4 | #include "hexagon_sim_timer.h" 5 | #include "hexagon_cache.h" // for removing buffers from cache during simulation/profiling 6 | #define GetTime hexagon_sim_read_pcycles // For Hexagon sim, use PCycles for profiling 7 | #else 8 | #include 9 | #include 10 | static unsigned long long GetTime( void ) 11 | { 12 | struct timeval tv; 13 | gettimeofday(&tv, NULL); 14 | 15 | return tv.tv_sec * 1000000ULL + tv.tv_usec; 16 | } 17 | 18 | #endif 19 | #endif // __IRIS_GET_TIME_H__ 20 | -------------------------------------------------------------------------------- /src/runtime/ProfilerEventRecord.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Profiler.h" 3 | #include "pthread.h" 4 | 5 | namespace iris { 6 | namespace rt { 7 | 8 | class ProfilerEventRecord : public Profiler { 9 | public: 10 | ProfilerEventRecord(Platform* platform); 11 | virtual ~ProfilerEventRecord(); 12 | 13 | virtual int CompleteTask(Task* task); 14 | 15 | protected: 16 | virtual int Main(); 17 | virtual int Exit(); 18 | virtual const char* FileExtension() { return "events.html"; } 19 | 20 | private: 21 | pthread_mutex_t chart_lock_; 22 | double first_task_; 23 | bool kernel_profile_; 24 | }; 25 | 26 | } /* namespace rt */ 27 | } /* namespace iris */ 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /tests/05_snapdragon/saxpy.mk: -------------------------------------------------------------------------------- 1 | IRIS=$(HOME)/work/iris-rts 2 | 3 | LOCAL_PATH := $(call my-dir) 4 | 5 | include $(CLEAR_VARS) 6 | LOCAL_MODULE := libOpenCL 7 | LOCAL_SRC_FILES := ../libOpenCL.so 8 | include $(PREBUILT_SHARED_LIBRARY) 9 | 10 | include $(CLEAR_VARS) 11 | LOCAL_MODULE := libiris 12 | LOCAL_SRC_FILES:= ../obj/local/armeabi-v7a/libiris.a 13 | include $(PREBUILT_STATIC_LIBRARY) 14 | 15 | include $(CLEAR_VARS) 16 | LOCAL_MODULE:=saxpy 17 | LOCAL_STATIC_LIBRARIES := libiris 18 | LOCAL_SHARED_LIBRARIES := libOpenCL 19 | LOCAL_C_INCLUDES:=$(IRIS)/include 20 | LOCAL_SRC_FILES:= $(IRIS)/apps/saxpy/saxpy-iris.cpp 21 | LOCAL_LDFLAGS:=-fopenmp 22 | include $(BUILD_EXECUTABLE) 23 | 24 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/benchmark_ref.h: -------------------------------------------------------------------------------- 1 | /**============================================================================= 2 | Copyright (c) 2016, 2017 QUALCOMM Technologies Incorporated. 3 | All Rights Reserved Qualcomm Proprietary 4 | =============================================================================**/ 5 | #ifndef BENCHMARK_ASM_H 6 | #define BENCHMARK_ASM_H 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | #include 13 | 14 | void saxpy_ref(int32_t *Z, 15 | int32_t *X, 16 | int32_t *Y, 17 | int32_t A, 18 | int32_t size); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/runtime/PolicyDefault.cpp: -------------------------------------------------------------------------------- 1 | #include "PolicyDefault.h" 2 | #include "Debug.h" 3 | #include "Platform.h" 4 | 5 | namespace iris { 6 | namespace rt { 7 | 8 | PolicyDefault::PolicyDefault(Scheduler* scheduler) { 9 | SetScheduler(scheduler); 10 | } 11 | 12 | PolicyDefault::~PolicyDefault() { 13 | } 14 | 15 | void PolicyDefault::GetDevices(Task* task, Device** devs, int* ndevs) { 16 | int selected = 0; 17 | for(selected=0; selected 3 | extern "C" __global__ 4 | void add1(int* A) { 5 | int i = blockIdx.x * blockDim.x + threadIdx.x; 6 | A[i] = A[i] + 1; 7 | } 8 | 9 | extern "C" __global__ 10 | void add1_v2(int* A) { 11 | int i = blockIdx.x * blockDim.x + threadIdx.x; 12 | int a = A[i]; 13 | a++; 14 | a++; 15 | A[i] = a; 16 | } 17 | 18 | extern "C" __global__ 19 | void add2(int* A) { 20 | int i = blockIdx.x * blockDim.x + threadIdx.x; 21 | A[i] = A[i] + 2; 22 | } 23 | 24 | extern "C" __global__ 25 | void add2_v2(int* A) { 26 | int i = blockIdx.x * blockDim.x + threadIdx.x; 27 | int a = A[i]; 28 | a += 2; 29 | A[i] = a; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /apps/makefile_defs.mk: -------------------------------------------------------------------------------- 1 | IRIS_INSTALL_ROOT ?= $(HOME)/.iris 2 | IRIS=$(IRIS_INSTALL_ROOT) 3 | 4 | CHARMSYCL_INSTALL_ROOT ?= $(HOME)/.charm-sycl 5 | CHARMSYCL=$(CHARMSYCL_INSTALL_ROOT) 6 | OPENSYCL_INSTALL_ROOT ?= $(HOME)/.opensycl 7 | OPENSYCL=$(OPENSYCL_INSTALL_ROOT) 8 | 9 | CC ?= gcc 10 | CXX ?= g++ 11 | FORTRAN ?= gfortran 12 | NVCC ?= nvcc 13 | HIPCC ?= hipcc 14 | CHARMSYCL ?= $(HOME)/.charm-sycl 15 | CHARMSYCL_LDFLAGS ?= -L$(CHARMSYCL)/lib -L$(CHARMSYCL)/lib64 -lcharm -lpthread -ldl 16 | DPCPP ?= $(HOME)/dpc++-workspace 17 | 18 | CFLAGS=-I$(IRIS)/include/ -O3 -std=c99 19 | CXXFLAGS=-I$(IRIS)/include/ -O3 20 | FFLAGS=-g -I$(IRIS)/include/iris 21 | LDFLAGS=-L$(IRIS)/lib64 -L$(IRIS)/lib -liris -lpthread -ldl 22 | 23 | -------------------------------------------------------------------------------- /src/runtime/MemRange.cpp: -------------------------------------------------------------------------------- 1 | #include "MemRange.h" 2 | #include "Debug.h" 3 | 4 | namespace iris { 5 | namespace rt { 6 | 7 | MemRange::MemRange(size_t off, size_t size, Device* dev) { 8 | off_ = off; 9 | size_ = size; 10 | dev_ = dev; 11 | } 12 | 13 | MemRange::~MemRange() { 14 | 15 | } 16 | 17 | bool MemRange::Distinct(size_t off, size_t size) { 18 | return (off_ > off + size - 1) || (off_ + size_ - 1 < off); 19 | } 20 | 21 | bool MemRange::Overlap(size_t off, size_t size) { 22 | return !Distinct(off, size); 23 | } 24 | 25 | bool MemRange::Contain(size_t off, size_t size) { 26 | return (off_ <= off) && (off_ + size_ >= off + size); 27 | } 28 | 29 | } /* namespace rt */ 30 | } /* namespace iris */ 31 | 32 | -------------------------------------------------------------------------------- /src/runtime/PolicyRoundRobin.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_ROUND_ROBIN_H 2 | #define IRIS_SRC_RT_POLICY_ROUND_ROBIN_H 3 | 4 | #include "Policy.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class PolicyRoundRobin: public Policy { 10 | public: 11 | PolicyRoundRobin(Scheduler* scheduler); 12 | virtual ~PolicyRoundRobin(); 13 | 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs); 15 | 16 | private: 17 | void GetDevice(Task* task, Device** devs, int* ndevs); 18 | void GetDeviceType(Task* task, Device** devs, int* ndevs); 19 | 20 | private: 21 | int index_; 22 | }; 23 | 24 | } /* namespace rt */ 25 | } /* namespace iris */ 26 | 27 | #endif /* IRIS_SRC_RT_POLICY_ROUND_ROBIN_H */ 28 | 29 | -------------------------------------------------------------------------------- /src/runtime/Queue.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_QUEUE_H 2 | #define IRIS_SRC_RT_QUEUE_H 3 | 4 | #include "Config.h" 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Task; 11 | class Device; 12 | 13 | class Queue { 14 | public: 15 | virtual ~Queue() {} 16 | virtual bool Peek(Task** task, int index) = 0; 17 | virtual bool Enqueue(Task* task) = 0; 18 | virtual bool Dequeue(Task** task) = 0; 19 | virtual bool Dequeue(Task** task, Device *dev) { return Dequeue(task); } 20 | virtual size_t Size() = 0; 21 | virtual bool Empty() = 0; 22 | virtual void Print(int devno=-1) { } 23 | }; 24 | 25 | } /* namespace rt */ 26 | } /* namespace iris */ 27 | 28 | #endif /* IRIS_SRC_RT_QUEUE_H */ 29 | -------------------------------------------------------------------------------- /docs/sphinx/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /tests/39_dmem2dmem/py_host.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import iris 4 | import numpy as np 5 | import sys 6 | 7 | # IRIS initialize 8 | iris.init() 9 | 10 | N=16 11 | # Create and initialize src and dst memory 12 | src_data = np.arange(N, dtype=np.float32) 13 | dst_data = np.zeros(N, dtype=np.float32) 14 | 15 | # Create DMEM2DMEM command in task 16 | src = iris.dmem(src_data) 17 | dst = iris.dmem(dst_data) 18 | 19 | # Create task 20 | task = iris.task() 21 | 22 | # Add DMEM2DMEM command to task 23 | task.dmem2dmem(src, dst) 24 | 25 | # Add flush command to task 26 | task.flush(dst) 27 | 28 | # Submit task 29 | task.submit() 30 | 31 | # Compare output 32 | print(np.all(src_data == dst_data)) 33 | 34 | # IRIS finalize 35 | iris.finalize() -------------------------------------------------------------------------------- /apps/helloworld/helloworld.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char a[12] = "hello world"; 5 | char b[12]; 6 | size_t size = 12; 7 | 8 | int main(int argc, char** argv) { 9 | iris::Platform platform; 10 | platform.init(&argc, &argv, true); 11 | 12 | iris::Mem mem_a(size); 13 | iris::Mem mem_b(size); 14 | 15 | iris::Task task; 16 | task.h2d(&mem_a, 0, size, a); 17 | void* params[2] = { &mem_b, &mem_a }; 18 | int params_info[2] = { iris_w, iris_r }; 19 | task.kernel("uppercase", 1, NULL, &size, NULL, 2, params, params_info); 20 | task.d2h(&mem_b, 0, size, b); 21 | task.submit(iris_roundrobin, NULL, true); 22 | 23 | printf("%s\n", b); 24 | 25 | platform.finalize(); 26 | 27 | return 0; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/runtime/PolicyDevice.cpp: -------------------------------------------------------------------------------- 1 | #include "PolicyDevice.h" 2 | #include "Debug.h" 3 | #include "Device.h" 4 | #include "Task.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | PolicyDevice::PolicyDevice(Scheduler* scheduler) { 10 | SetScheduler(scheduler); 11 | } 12 | 13 | PolicyDevice::~PolicyDevice() { 14 | } 15 | 16 | void PolicyDevice::GetDevices(Task* task, Device** devs, int* ndevs) { 17 | int brs_policy = task->brs_policy(); 18 | int n = 0; 19 | for (int i = 0; i < ndevs_; i++) { 20 | Device* dev = devs_[i]; 21 | if (((dev->type() & brs_policy) == dev->type()) && IsKernelSupported(task, dev)) { 22 | devs[n++] = dev; 23 | } 24 | } 25 | *ndevs = n; 26 | } 27 | 28 | } /* namespace rt */ 29 | } /* namespace iris */ 30 | -------------------------------------------------------------------------------- /src/runtime/PolicyFirstToFinish.cpp: -------------------------------------------------------------------------------- 1 | #include "PolicyFirstToFinish.h" 2 | #include "Debug.h" 3 | #include "Scheduler.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace iris { 10 | namespace rt { 11 | 12 | PolicyFirstToFinish::PolicyFirstToFinish(Scheduler* scheduler) { 13 | SetScheduler(scheduler); 14 | } 15 | 16 | PolicyFirstToFinish::~PolicyFirstToFinish() { 17 | } 18 | 19 | void PolicyFirstToFinish::GetDevices(Task* task, Device** devs, int* ndevs) { 20 | int n = 0; 21 | for (int i = 0; i < ndevs_; i++) 22 | if (IsKernelSupported(task, devs_[i])) 23 | devs[n++] = devs_[i]; 24 | *ndevs = n; 25 | } 26 | 27 | } /* namespace rt */ 28 | } /* namespace iris */ 29 | 30 | -------------------------------------------------------------------------------- /tests/24_multi_kernels/kernel.cu: -------------------------------------------------------------------------------- 1 | // Kernel 1: add1 2 | extern "C" __global__ void add1(int* A) { 3 | int i = blockIdx.x * blockDim.x + threadIdx.x; 4 | A[i] = A[i] + 1; 5 | } 6 | 7 | // Kernel 2: add1_v2 8 | extern "C" __global__ void add1_v2(int* A) { 9 | int i = blockIdx.x * blockDim.x + threadIdx.x; 10 | int a = A[i]; 11 | a++; 12 | a++; 13 | A[i] = a; 14 | } 15 | 16 | // Kernel 3: add2 17 | extern "C" __global__ void add2(int* A) { 18 | int i = blockIdx.x * blockDim.x + threadIdx.x; 19 | A[i] = A[i] + 2; 20 | } 21 | 22 | // Kernel 4: add2_v2 23 | extern "C" __global__ void add2_v2(int* A) { 24 | int i = blockIdx.x * blockDim.x + threadIdx.x; 25 | int a = A[i]; 26 | a += 2; 27 | A[i] = a; 28 | } 29 | -------------------------------------------------------------------------------- /src/runtime/PresentTable.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_PRESENT_TABLE_H 2 | #define IRIS_SRC_RT_PRESENT_TABLE_H 3 | 4 | #include "Config.h" 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class BaseMem; 11 | 12 | typedef struct _PresentTableEntity { 13 | size_t size; 14 | BaseMem* mem; 15 | } PresentTableEntity; 16 | 17 | class PresentTable { 18 | public: 19 | PresentTable(); 20 | ~PresentTable(); 21 | 22 | int Add(void* host, size_t size, BaseMem* mem); 23 | BaseMem* Get(void* host, size_t* off); 24 | BaseMem* Remove(void* host); 25 | 26 | private: 27 | std::map entities_; 28 | }; 29 | 30 | } /* namespace rt */ 31 | } /* namespace iris */ 32 | 33 | #endif /* IRIS_SRC_RT_PRESENT_TABLE_H */ 34 | -------------------------------------------------------------------------------- /apps/aiwc_policy/aiwc_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef AIWC_H 2 | #define AIWC_H 3 | 4 | #include 5 | 6 | namespace iris { 7 | namespace rt { 8 | namespace plugin { 9 | 10 | class AIWC_Utils { 11 | public: 12 | static bool IsAIWCDevice(char* name,char* vendor); 13 | static void SetEnvironment(const char *name, const char *value); 14 | static bool HaveMetrics(char* path); 15 | static const char* MetricLocation(char* digest); 16 | static int ReadFile(char* path, char** string, size_t* len); 17 | static char* ComputeFileDigest(char* path); 18 | static char* ComputeDigest(char* src); 19 | static bool MetricsForKernelFileExist(char* path); 20 | }; 21 | 22 | } /* namespace plugin */ 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | #endif /* AIWC_UTILS_H */ 27 | -------------------------------------------------------------------------------- /docs/sphinx/source/features.rst: -------------------------------------------------------------------------------- 1 | Tiling (no python) @imo 2 | ========= 3 | 4 | DMEM to DMEM @miniskarnr 5 | ========= 6 | .. content-tabs:: 7 | .. tab-container:: tab1 8 | :title: C 9 | 10 | .. literalinclude:: ../../../tests/39_dmem2dmem/c_host.c 11 | :language: c 12 | 13 | .. tab-container:: tab2 14 | :title: C++ 15 | 16 | .. literalinclude:: ../../../tests/39_dmem2dmem/cpp_host.cpp 17 | :language: c++ 18 | 19 | .. tab-container:: tab3 20 | :title: Python 21 | 22 | .. literalinclude:: ../../../tests/39_dmem2dmem/py_host.py 23 | :language: python 24 | 25 | DMEM regions @miniskarnr 26 | ========= 27 | 28 | IRIS Graph How to create the IRIS Graphs @imo 29 | ========= 30 | -------------------------------------------------------------------------------- /tests/07_policy_register/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test07_policy_register 2 | 3 | all: $(TEST) libPolicyGWS.so libPolicyGWSHook.so policy_last.so kernel.openmp.so kernel.hip kernel.ptx 4 | 5 | include ../Makefile.tests 6 | 7 | clean: 8 | rm -f $(TEST) kernel.ptx kernel.hip kernel.openmp.so policy_last.so libPolicyGWSHook.so libPolicyGWS.so 9 | 10 | policy_last.so: policy_last.cpp 11 | g++ -std=c++11 -fPIC -shared ${CXXFLAGS} -o $@ $^ 12 | 13 | libPolicyGWS.so: policy_gws.cpp 14 | g++ -std=c++11 -fPIC -shared ${CXXFLAGS} -o $@ $^ 15 | 16 | libPolicyGWSHook.so: policy_gws_hook.cpp 17 | g++ -std=c++11 -fPIC -shared ${CXXFLAGS} -o $@ $^ 18 | 19 | kernel.openmp.so: kernel.cl.openmp.c 20 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 21 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(runtime) 2 | 3 | set(JULIA_SRC_FILES 4 | IrisHRT.jl 5 | Kernels.jl 6 | Tiling1D.jl 7 | Tiling2D.jl 8 | Tiling3D.jl 9 | ) 10 | 11 | foreach(file ${JULIA_SRC_FILES}) 12 | if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${file}") 13 | install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/${file}" 14 | DESTINATION "${CMAKE_INSTALL_PREFIX}/src") 15 | endif() 16 | endforeach() 17 | 18 | foreach(file ${JULIA_SRC_FILES}) 19 | if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${file}") 20 | install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/${file}" 21 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/iris") 22 | endif() 23 | endforeach() 24 | 25 | 26 | -------------------------------------------------------------------------------- /utils/hexagon_deps.min: -------------------------------------------------------------------------------- 1 | 2 | # must list all variants supported by this project 3 | #include hexagon_unsupport.min 4 | SUPPORTED_VS = $(default_VS) 5 | #SUPPORTED_VS = $(filter-out $(Unsupported_hexagon_VS),$(default_VS)) 6 | 7 | # must list all the dependencies of this project 8 | DEPENDENCIES = \ 9 | RPCMEM \ 10 | LIBDSPCV_SKEL \ 11 | QPRINTF 12 | 13 | # each dependency needs a directory definition 14 | # the form is _DIR 15 | # for example: 16 | # DEPENDENCIES = FOO 17 | # FOO_DIR = $(HEXAGON_SDK_ROOT)/examples/common/foo 18 | # 19 | RPCMEM_DIR = $(HEXAGON_SDK_ROOT)/libs/common/rpcmem 20 | LIBDSPCV_SKEL_DIR = $(HEXAGON_SDK_ROOT)/libs/fastcv/dspCV 21 | QPRINTF_DIR = $(HEXAGON_SDK_ROOT)/libs/common/qprintf 22 | 23 | 24 | -------------------------------------------------------------------------------- /scheduling-policies/aiwc/aiwc_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef AIWC_H 2 | #define AIWC_H 3 | 4 | #include 5 | 6 | namespace iris { 7 | namespace rt { 8 | namespace plugin { 9 | 10 | class AIWC_Utils { 11 | public: 12 | static bool IsAIWCDevice(char* name,char* vendor); 13 | static void SetEnvironment(const char *name, const char *value); 14 | static bool HaveMetrics(char* path); 15 | static const char* MetricLocation(char* digest); 16 | static int ReadFile(char* path, char** string, size_t* len); 17 | static char* ComputeFileDigest(char* path); 18 | static char* ComputeDigest(char* src); 19 | static bool MetricsForKernelFileExist(char* path); 20 | }; 21 | 22 | } /* namespace plugin */ 23 | } /* namespace rt */ 24 | } /* namespace iris */ 25 | 26 | #endif /* AIWC_UTILS_H */ 27 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/saxpy.iris.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "iris/iris.h" 4 | #include "iris/iris_macros.h" 5 | 6 | IRIS_TASK_APIS_CPP( 7 | isaxpy_cpp, // C++ overload function API for both core and task 8 | isaxpy_core, // Function name for core API 9 | isaxpy_task, // Function name for task API 10 | "saxpy", 1, 11 | NULL_OFFSET, GWS(SIZE), NULL_LWS, 12 | OUT_TASK(Z, int32_t *, int32_t, Z, sizeof(int32_t)*SIZE), 13 | IN_TASK(X, int32_t *, int32_t, X, sizeof(int32_t)*SIZE), 14 | IN_TASK(Y, int32_t *, int32_t, Y, sizeof(int32_t)*SIZE), 15 | PARAM(SIZE, int32_t), 16 | PARAM(A, int32_t), 17 | PARAM(cuUsecPtr, int32_t*, iris_dsp), 18 | PARAM(cuCycPtr, int32_t*, iris_dsp)); 19 | -------------------------------------------------------------------------------- /utils/iris.def: -------------------------------------------------------------------------------- 1 | Bootstrap: docker 2 | #Bootstrap: localimage 3 | #From: system_only.sif 4 | #Bootstrap: docker 5 | From: ubuntu:22.04 6 | Stage: build 7 | 8 | %labels 9 | Author Narasinga Rao Miniskar 10 | Version v3.0 11 | 12 | %setup 13 | mkdir -p ${APPTAINER_ROOTFS}/software/iris 14 | 15 | %files 16 | containers/install_julia.sh /install_julia.sh 17 | build.sh /software/iris/. 18 | CMakeLists.txt /software/iris/. 19 | src /software/iris/ 20 | apps /software/iris/ 21 | tests /software/iris/ 22 | utils /software/iris/ 23 | 24 | 25 | %environment 26 | 27 | %post 28 | pwd 29 | HOME=/ JULIA_CACHE_NAME=.julia.apptainer . /install_julia.sh 30 | cd /software/iris/ 31 | IRIS_INSTALL_ROOT=$PWD/install bash build.sh 32 | -------------------------------------------------------------------------------- /apps/benchmarking/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void saxpy(__global float* restrict Z, float A, __global float* restrict X, __global float* restrict Y) { 2 | size_t id = get_global_id(0); 3 | Z[id] = A * X[id] + Y[id]; 4 | } 5 | 6 | __kernel void ijk(__global double* restrict C, __global double* restrict A, __global double* restrict B) { 7 | size_t i = get_global_id(1); 8 | size_t j = get_global_id(0); 9 | size_t SIZE = get_global_size(0); 10 | 11 | double sum = 0.0; 12 | for (size_t k = 0; k < SIZE; k++) { 13 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 14 | } 15 | C[i * SIZE + j] = sum; 16 | } 17 | 18 | __kernel void nothing(__global int* A) { 19 | } 20 | 21 | __kernel void add_id(__global int* A) { 22 | size_t i = get_global_id(0); 23 | A[i] = A[i] + i; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /src/runtime/Pool.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POOL_H 2 | #define IRIS_SRC_RT_POOL_H 3 | 4 | #define IRIS_POOL_ENABLED 0 5 | #define IRIS_POOL_MAX_TASK 1100 6 | #define IRIS_POOL_MAX_CMD 1100 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class Command; 12 | class Platform; 13 | class Task; 14 | 15 | class Pool { 16 | public: 17 | Pool(Platform* platform); 18 | ~Pool(); 19 | 20 | Task* GetTask(); 21 | Command* GetCommand(Task* task, int type); 22 | 23 | private: 24 | Platform* platform_; 25 | #if IRIS_POOL_ENABLED 26 | Task* tasks_[IRIS_POOL_MAX_TASK]; 27 | Command* cmds_[IRIS_POOL_MAX_CMD]; 28 | int tid_; 29 | int cid_; 30 | #endif 31 | 32 | }; 33 | 34 | } /* namespace rt */ 35 | } /* namespace iris */ 36 | 37 | #endif /* IRIS_SRC_RT_POOL_H */ 38 | 39 | -------------------------------------------------------------------------------- /src/runtime/QueueReady.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_QUEUE_READY_H 2 | #define IRIS_SRC_RT_QUEUE_READY_H 3 | 4 | #include "Queue.h" 5 | 6 | #include 7 | #include 8 | namespace iris { 9 | namespace rt { 10 | using namespace std; 11 | class QueueReady : public Queue { 12 | public: 13 | QueueReady(); 14 | virtual ~QueueReady(); 15 | 16 | bool Peek(Task** task, int index); 17 | bool Enqueue(Task* task); 18 | bool Dequeue(Task** task); 19 | bool Dequeue(Task** task, Device *device); 20 | size_t Size(); 21 | bool Empty(); 22 | void Print(int devno=-1); 23 | 24 | private: 25 | std::deque pqueue_, queue_, mqueue_; 26 | mutable std::mutex mutex_; 27 | }; 28 | 29 | } /* namespace rt */ 30 | } /* namespace iris */ 31 | 32 | #endif /* IRIS_SRC_RT_QUEUE_READY_H */ 33 | -------------------------------------------------------------------------------- /tests/24_multi_kernels/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | // Kernel 1: add1 4 | extern "C" __global__ void add1(int* A) { 5 | int i = blockIdx.x * blockDim.x + threadIdx.x; 6 | A[i] = A[i] + 1; 7 | } 8 | 9 | // Kernel 2: add1_v2 10 | extern "C" __global__ void add1_v2(int* A) { 11 | int i = blockIdx.x * blockDim.x + threadIdx.x; 12 | int a = A[i]; 13 | a++; 14 | a++; 15 | A[i] = a; 16 | } 17 | 18 | // Kernel 3: add2 19 | extern "C" __global__ void add2(int* A) { 20 | int i = blockIdx.x * blockDim.x + threadIdx.x; 21 | A[i] = A[i] + 2; 22 | } 23 | 24 | // Kernel 4: add2_v2 25 | extern "C" __global__ void add2_v2(int* A) { 26 | int i = blockIdx.x * blockDim.x + threadIdx.x; 27 | int a = A[i]; 28 | a += 2; 29 | A[i] = a; 30 | } 31 | -------------------------------------------------------------------------------- /src/runtime/LoaderPolicy.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_LOADER_POLICY_H 2 | #define IRIS_SRC_RT_LOADER_POLICY_H 3 | 4 | #include "Loader.h" 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Policy; 11 | 12 | class LoaderPolicy : public Loader { 13 | public: 14 | LoaderPolicy(const char* lib, const char* name); 15 | ~LoaderPolicy(); 16 | 17 | Policy* policy(); 18 | 19 | const char* library(); 20 | int LoadFunctions(); 21 | void Init(void* arg); 22 | const char *name() { return name_.c_str(); } 23 | const char *lib() { return lib_.c_str(); } 24 | 25 | private: 26 | std::string lib_; 27 | std::string name_; 28 | 29 | void* (*instance_)(); 30 | }; 31 | 32 | } /* namespace rt */ 33 | } /* namespace iris */ 34 | 35 | #endif /* IRIS_SRC_RT_LOADER_POLICY_H */ 36 | 37 | -------------------------------------------------------------------------------- /tests/02_task_depend/test02_task_depend.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import iris 4 | 5 | iris.init(True) 6 | 7 | task4 = iris.task_create("A") 8 | iris.task_submit(task4, iris.iris_cpu, False); 9 | 10 | task5 = iris.task_create("B") 11 | iris.task_submit(task5, iris.iris_gpu, False); 12 | 13 | task6 = iris.task_create("C") 14 | task6_dep = [ task5 ] 15 | iris.task_depend(task6, 1, task6_dep); 16 | iris.task_submit(task6, iris.iris_cpu, False); 17 | 18 | task7 = iris.task_create("D") 19 | task7_dep = [ task4, task6 ] 20 | iris.task_depend(task7, 2, task7_dep); 21 | iris.task_submit(task7, iris.iris_gpu, False); 22 | 23 | task8 = iris.task_create("E") 24 | task8_dep = [ task5 ] 25 | iris.task_depend(task8, 1, task8_dep); 26 | iris.task_submit(task8, iris.iris_cpu, False); 27 | 28 | iris.finalize() 29 | 30 | -------------------------------------------------------------------------------- /tests/34_set_mem/Makefile: -------------------------------------------------------------------------------- 1 | TEST=test34_set_mem 2 | CPP=1 3 | 4 | all: $(TEST) kernel.ptx kernel.openmp.so kernel.hip 5 | 6 | include ../Makefile.tests 7 | 8 | vecadd: vecadd.cpp 9 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | vecadd-iris: vecadd-iris.cpp 12 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) 13 | 14 | kernel.openmp.so: kernel.openmp.c 15 | $(CC) -g -lgomp -Wl,-rpath=$(OPENMP_PATH) -fPIC -shared -I. $(CFLAGS) -o $@ $^ 16 | 17 | ifeq ($(NVCC_TEST),) 18 | kernel.nvopenmp.so: kernel.openmp.c 19 | @echo "No NVCC compiler found" 20 | else 21 | kernel.nvopenmp.so: kernel.openmp.c 22 | $(NVCC) -g -shared -I. ${INCLUDE} ${EXT_INCLUDE} --compiler-options -fPIC -o $@ $^ 23 | endif 24 | 25 | clean: 26 | rm -f vecadd vecadd-iris vecadd-m kernel.ptx kernel.nvopenmp.so kernel.openmp.so kernel.hip test34_set_mem 27 | -------------------------------------------------------------------------------- /include/iris/hexagon/stub.h: -------------------------------------------------------------------------------- 1 | #ifndef __STUB_H__ 2 | #define __STUB_H__ 3 | 4 | #ifndef ION_HEAP_ID_SYSTEM 5 | #define ION_HEAP_ID_SYSTEM 25 6 | #endif 7 | #include 8 | 9 | #ifndef __hexagon__ // some defs/stubs so app can build for Hexagon simulation 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #endif 16 | 17 | int irishxg_default_cache_flags(); 18 | int irishxg_uncached_flags(); 19 | uint8_t *irishxg_alloc_stub(int hid, int cflags, int size); 20 | int irishxg_init_stub(int UNSIGNED_PD, int FASTRPC_QOS, int LATENCY, int DCVS_ENABLE, int hap_power_level, int use_power_level); 21 | void irishxg_deinit_stub(); 22 | void irishxg_free_stub(void *ptr); 23 | uint64_t irishxg_handle_stub(); 24 | 25 | 26 | #endif //__STUB_H__ 27 | -------------------------------------------------------------------------------- /tests/08_multithreading/test08_multithreading.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | static int run(void* argp) { 7 | iris_init(NULL, NULL, 1); 8 | iris_finalize(); 9 | return iris_error_count(); 10 | } 11 | 12 | pthread_t t[256]; 13 | 14 | int main(int argc, char** argv) { 15 | //setenv("IRIS_ARCHS", "opencl", 1); 16 | int i; 17 | int nthreads = argc > 1 ? atoi(argv[1]) : 10; 18 | printf("nthreads[%d]\n", nthreads); 19 | for (i = 0; i < nthreads; i++) { 20 | pthread_create(t + i, NULL, run, NULL); 21 | } 22 | int returnval = 0; 23 | for (i = 0; i < nthreads; i++) { 24 | void* rv; 25 | pthread_join(t[i], &rv); 26 | returnval += (int)(off_t)rv; 27 | } 28 | printf("Errors:%d\n", returnval); 29 | return returnval; 30 | } 31 | -------------------------------------------------------------------------------- /tests/32_json3/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void process(int* A, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(A) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | A[_id] *= 100; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | void ijk(double* C, double* A, double* B, size_t *_off, size_t *_ndr) { 12 | const size_t SIZE = _ndr[0]; 13 | #pragma omp parallel for collapse(2) shared(A,B,C) private(SIZE) 14 | //IRIS_OPENMP_KERNEL_BEGIN(i) 15 | for (size_t i = _off[0]; i < _off[0] + _ndr[0]; i++) { 16 | for (size_t j = 0; j < SIZE; j++){ 17 | double sum = 0.0; 18 | for (size_t k = 0; k < SIZE; k++) { 19 | sum += A[i * SIZE + k] * B[k * SIZE + j]; 20 | } 21 | C[i * SIZE + j] = sum; 22 | } 23 | } 24 | //IRIS_OPENMP_KERNEL_END 25 | } 26 | -------------------------------------------------------------------------------- /src/runtime/PolicyRandom.cpp: -------------------------------------------------------------------------------- 1 | #include "PolicyRandom.h" 2 | #include "Debug.h" 3 | #include 4 | #include 5 | #include "Task.h" 6 | #include 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | PolicyRandom::PolicyRandom(Scheduler* scheduler) { 12 | SetScheduler(scheduler); 13 | srand(time(NULL)); 14 | } 15 | 16 | PolicyRandom::~PolicyRandom() { 17 | } 18 | 19 | void PolicyRandom::GetDevices(Task* task, Device** devs, int* ndevs) { 20 | int selected = 0; 21 | int supported = 0; 22 | devs[0] = devs_[0]; 23 | for(int i=0; i 0) 28 | devs[0] = devs[rand() % supported]; 29 | *ndevs = 1; 30 | } 31 | 32 | } /* namespace rt */ 33 | } /* namespace iris */ 34 | -------------------------------------------------------------------------------- /docs/sphinx/source/misc.rst: -------------------------------------------------------------------------------- 1 | .. index:: ! citation 2 | 3 | Citation 4 | ================== 5 | To cite IRIS, please use the following paper. 6 | 7 | Jungwon Kim, Seyong Lee, Beau Johnston, and Jeffrey S. Vetter. 2021. IRIS: A Portable Runtime System Exploiting Multiple Heterogeneous Programming Systems. In *Proceedings of the 25th IEEE High Performance Extreme Computing Conference (HPEC '21)*. 1--8. 8 | 9 | .. code-block:: bibtex 10 | 11 | @inproceedings{Kim:2021:IRIS, 12 | author={Jungwon Kim, Seyong Lee, Beau Johnston, and Jeffrey S. Vetter}, 13 | title={IRIS: A Portable Runtime System Exploiting Multiple Heterogeneous Programming Systems}, 14 | booktitle={Proceedings of the 25th IEEE High Performance Extreme Computing Conference}, 15 | series={HPEC '21}, 16 | year={2021}, 17 | pages={1-8}, 18 | doi={} 19 | } 20 | -------------------------------------------------------------------------------- /tests/09_dataflow/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void kernel_A(int* AB, IRIS_OPENMP_KERNEL_ARGS) { 4 | int i; 5 | printf("Kernel_A: AB:%p\n", AB); 6 | #pragma omp parallel for shared(AB) private(i) 7 | IRIS_OPENMP_KERNEL_BEGIN(i) 8 | AB[i] = i; 9 | IRIS_OPENMP_KERNEL_END 10 | } 11 | 12 | void kernel_B(int* AB, int* BC, IRIS_OPENMP_KERNEL_ARGS) { 13 | int i; 14 | printf("Kernel_B: AB:%p BC:%p\n", AB, BC); 15 | #pragma omp parallel for shared(AB, BC) private(i) 16 | IRIS_OPENMP_KERNEL_BEGIN(i) 17 | BC[i] = AB[i] * 10; 18 | IRIS_OPENMP_KERNEL_END 19 | } 20 | 21 | void kernel_C(int* BC, IRIS_OPENMP_KERNEL_ARGS) { 22 | int i; 23 | printf("Kernel_C: BC:%p\n", BC); 24 | #pragma omp parallel for shared(BC) private(i) 25 | IRIS_OPENMP_KERNEL_BEGIN(i) 26 | BC[i] = BC[i] * 2; 27 | IRIS_OPENMP_KERNEL_END 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/runtime/MemRange.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_MEM_RANGE_H 2 | #define IRIS_SRC_RT_MEM_RANGE_H 3 | 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Device; 11 | 12 | class MemRange { 13 | public: 14 | MemRange(size_t off, size_t size, Device* dev); 15 | ~MemRange(); 16 | 17 | bool Distinct(size_t off, size_t size); 18 | bool Overlap(size_t off, size_t size); 19 | bool Contain(size_t off, size_t size); 20 | 21 | size_t off() { return off_; } 22 | size_t size() { return size_; } 23 | Device* dev() { return dev_; } 24 | 25 | bool operator <(const MemRange& r) const { return off_ < r.off_; } 26 | 27 | private: 28 | size_t off_; 29 | size_t size_; 30 | Device* dev_; 31 | 32 | }; 33 | 34 | } /* namespace rt */ 35 | } /* namespace iris */ 36 | 37 | #endif /* IRIS_SRC_RT_MEM_RANGE_H */ 38 | 39 | -------------------------------------------------------------------------------- /src/runtime/Reduction.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_REDUCTION_H 2 | #define IRIS_SRC_RT_REDUCTION_H 3 | 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Mem; 11 | 12 | class Reduction { 13 | private: 14 | Reduction(); 15 | ~Reduction(); 16 | 17 | public: 18 | void Reduce(Mem* mem, void* host, size_t size); 19 | 20 | private: 21 | void Sum(Mem* mem, void* host, size_t size); 22 | void SumLong(Mem* mem, long* host, size_t size); 23 | void SumFloat(Mem* mem, float* host, size_t size); 24 | void SumDouble(Mem* mem, double* host, size_t size); 25 | 26 | public: 27 | static Reduction* GetInstance(); 28 | 29 | private: 30 | static Reduction* singleton_; 31 | pthread_mutex_t mutex_; 32 | }; 33 | 34 | } /* namespace rt */ 35 | } /* namespace iris */ 36 | 37 | #endif /* IRIS_SRC_RT_REDUCTION_H */ 38 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #NOTE: remove the following after @Narasinga adds logic to discard IRIS platforms if the kernel files can't be found or built 4 | export IRIS_ARCHS=opencl 5 | export LD_LIBRARY_PATH=$IRIS/lib:$LD_LIBRARY_PATH 6 | 7 | bash ./clean.sh 8 | rm -rf ./build 9 | mkdir build 10 | cd build 11 | cmake .. 12 | make --ignore-errors 13 | echo "Running OpenCL version of the tests..." 14 | IRIS_ARCHS=opencl make test 15 | echo "Done." 16 | echo "Running OpenMP version of the tests..." 17 | IRIS_ARCHS=openmp make test 18 | echo "Done." 19 | echo "Running CUDA version of the tests..." 20 | IRIS_ARCHS=cuda make test 21 | echo "Done." 22 | echo "Running HIP version of the tests..." 23 | IRIS_ARCHS=hip make test 24 | echo "Done." 25 | echo "Running All version of the tests..." 26 | IRIS_ARCHS=openmp,cuda,hip,opencl make test 27 | echo "Done." 28 | 29 | -------------------------------------------------------------------------------- /Project.toml: -------------------------------------------------------------------------------- 1 | name = "IrisHRT" 2 | uuid = "24efd7ac-9fee-46ff-a976-1a4721440d7c" 3 | authors = ["Narasigna Rao Miniskar "] 4 | version = "3.0.0" # This tells the package manager to run deps/build.jl when building the package. 5 | 6 | [build] 7 | script = "build.jl" 8 | 9 | [deps] 10 | Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" 11 | Preferences = "21216c6a-2e73-6563-6e65-726566657250" 12 | CxxWrap = "1f15a43c-97ca-5a2a-ae31-89f07a497df4" 13 | 14 | [weakdeps] 15 | AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" 16 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 17 | oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" 18 | JACC = "0979c8fe-16a4-4796-9b82-89a9f10403ea" 19 | [extensions] 20 | 21 | [compat] 22 | AMDGPU = "1.1.3" 23 | JACC = "0.0.5" 24 | Atomix = "0.1" 25 | CUDA = "5" 26 | CxxWrap="0.16.0" 27 | Preferences = "1.4.0" 28 | julia = "1.10.3" 29 | -------------------------------------------------------------------------------- /src/runtime/QueueTask.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_QUEUE_TASK_H 2 | #define IRIS_SRC_RT_QUEUE_TASK_H 3 | 4 | #include "Task.h" 5 | #include "Queue.h" 6 | #include 7 | #include 8 | 9 | namespace iris { 10 | namespace rt { 11 | 12 | class Platform; 13 | 14 | class QueueTask : public Queue { 15 | public: 16 | QueueTask(Platform* platform); 17 | ~QueueTask(); 18 | bool Peek(Task** task, int index); 19 | bool Enqueue(Task* task); 20 | bool Dequeue(Task** task); 21 | bool Dequeue(pair* task) { return Dequeue(task); } 22 | size_t Size(); 23 | bool Empty(); 24 | 25 | private: 26 | Platform* platform_; 27 | std::list tasks_; 28 | pthread_mutex_t mutex_; 29 | //Task* last_sync_task_; 30 | bool enable_profiler_; 31 | }; 32 | 33 | } /* namespace rt */ 34 | } /* namespace iris */ 35 | 36 | #endif /* IRIS_SRC_RT_QUEUE_TASK_H */ 37 | -------------------------------------------------------------------------------- /apps/auto_dag_creation/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: auto_dag kernel.openmp.so kernel.ptx 4 | 5 | auto_dag: auto_dag.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | auto_dag-cpp: auto_dag.c 9 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | kernel.ptx: kernel.cu 12 | $(NVCC) -ptx $^ 13 | 14 | kernel.spv: kernel.cl 15 | clang -cc1 -finclude-default-header -triple spir $^ -O0 -flto -emit-llvm-bc -o kernel.bc 16 | llvm-spirv kernel.bc -o $@ 17 | 18 | kernel.hip: kernel.hip.cpp 19 | $(HIPCC) --genco -o $@ $^ 20 | 21 | kernel.openmp.so: kernel.openmp.c 22 | $(CC) $(CFLAGS) -O3 -fopenmp -fPIC -shared -I. -o $@ $^ 23 | 24 | kernel.hexagon.so: kernel.hexagon.c 25 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 26 | 27 | kernel.poly.so: kernel.cl.poly.c 28 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 29 | 30 | clean: 31 | rm -f auto_dag kernel.openmp.so 32 | -------------------------------------------------------------------------------- /src/runtime/ProfilerGoogleCharts.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_PROFILER_GOOGLE_CHARTS_H 2 | #define IRIS_SRC_RT_PROFILER_GOOGLE_CHARTS_H 3 | 4 | #include "Profiler.h" 5 | #include "pthread.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class ProfilerGoogleCharts : public Profiler { 11 | public: 12 | ProfilerGoogleCharts(Platform* platform, bool kernel_profile=false); 13 | virtual ~ProfilerGoogleCharts(); 14 | 15 | virtual int CompleteTask(Task* task); 16 | 17 | protected: 18 | virtual int Main(); 19 | virtual int Exit(); 20 | virtual const char* FileExtension() { if (kernel_profile_) return "kernel.html"; else return "html"; } 21 | 22 | private: 23 | pthread_mutex_t chart_lock_; 24 | double first_task_; 25 | bool kernel_profile_; 26 | }; 27 | 28 | } /* namespace rt */ 29 | } /* namespace iris */ 30 | 31 | 32 | #endif /*IRIS_SRC_RT_PROFILER_GOOGLE_CHARTS_H */ 33 | 34 | -------------------------------------------------------------------------------- /src/runtime/PolicyProfile.cpp: -------------------------------------------------------------------------------- 1 | #include "PolicyProfile.h" 2 | #include "Debug.h" 3 | #include "Command.h" 4 | #include "History.h" 5 | #include "Policies.h" 6 | #include "Kernel.h" 7 | #include "Task.h" 8 | #include 9 | using namespace std; 10 | namespace iris { 11 | namespace rt { 12 | 13 | PolicyProfile::PolicyProfile(Scheduler* scheduler, Policies* policies) { 14 | SetScheduler(scheduler); 15 | policies_ = policies; 16 | } 17 | 18 | PolicyProfile::~PolicyProfile() { 19 | } 20 | 21 | void PolicyProfile::GetDevices(Task* task, Device** devs, int* ndevs) { 22 | Command* cmd = task->cmd_kernel(); 23 | if (!cmd) return policies_->GetPolicy(iris_default, NULL)->GetDevices(task, devs, ndevs); 24 | shared_ptr history = cmd->kernel()->history(); 25 | devs[0] = history->OptimalDevice(task); 26 | *ndevs = 1; 27 | } 28 | 29 | } /* namespace rt */ 30 | } /* namespace iris */ 31 | -------------------------------------------------------------------------------- /src/runtime/Thread.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_THREAD_H 2 | #define IRIS_SRC_RT_THREAD_H 3 | 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Thread { 11 | public: 12 | Thread(); 13 | virtual ~Thread(); 14 | 15 | void Start(); 16 | void StartWithOutThread(); 17 | virtual void Stop(); 18 | virtual void Sleep(); 19 | virtual void Invoke(); 20 | void set_running(bool flag=true) { running_ = flag; } 21 | pthread_t thread() { return thread_; } 22 | pthread_t self() { return pthread_self(); } 23 | 24 | protected: 25 | virtual void Run() = 0; 26 | 27 | protected: 28 | static void* ThreadFunc(void* argp); 29 | protected: 30 | pthread_t thread_; 31 | volatile bool running_; 32 | volatile bool sleeping_; 33 | sem_t sem_; 34 | }; 35 | 36 | } /* namespace rt */ 37 | } /* namespace iris */ 38 | 39 | #endif /* IRIS_SRC_RT_THREAD_H */ 40 | -------------------------------------------------------------------------------- /tests/10_multikernelexecution/kernel.cl: -------------------------------------------------------------------------------- 1 | __kernel void kernel0(__global int* C, int loop) { 2 | size_t id = get_global_id(0); 3 | for (int i = 0; i < loop; i++) { 4 | for (int j = 0; j < loop; j++) { 5 | C[id] += id; 6 | } 7 | } 8 | } 9 | 10 | __kernel void kernel1(__global int* C, int loop) { 11 | size_t id = get_global_id(0); 12 | for (int i = 0; i < loop; i++) { 13 | for (int j = 0; j < loop; j++) { 14 | C[id] += id; 15 | } 16 | } 17 | } 18 | 19 | __kernel void kernel2(__global int* C, int loop) { 20 | size_t id = get_global_id(0); 21 | for (int i = 0; i < loop; i++) { 22 | for (int j = 0; j < loop; j++) { 23 | C[id] += id; 24 | } 25 | } 26 | } 27 | 28 | __kernel void kernel3(__global int* C, int loop) { 29 | size_t id = get_global_id(0); 30 | for (int i = 0; i < loop; i++) { 31 | for (int j = 0; j < loop; j++) { 32 | C[id] += id; 33 | } 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /apps/custom_policy/custom_policy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, 1); 7 | 8 | size_t SIZE = argc > 1 ? atol(argv[1]) : 8; 9 | int* A = (int*) malloc(SIZE * sizeof(int)); 10 | 11 | iris_mem memA; 12 | iris_mem_create(SIZE * sizeof(int), &memA); 13 | 14 | iris_register_policy("libPolicyGWS.so", "custom_gws", (void*) 16); 15 | 16 | void* params[1] = { &memA }; 17 | int params_info[1] = { iris_w }; 18 | iris_task task; 19 | iris_task_create(&task); 20 | iris_task_kernel(task, "setid", 1, NULL, &SIZE, NULL, 1, params, params_info); 21 | iris_task_d2h_full(task, memA, A); 22 | iris_task_submit(task, iris_custom, "custom_gws", 1); 23 | 24 | printf("A["); 25 | for (int i = 0; i < SIZE; i++) printf("%3d", A[i]); 26 | printf("]\n"); 27 | 28 | iris_finalize(); 29 | 30 | return 0; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /apps/helloworld/helloworld.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char a[12] = "hello world"; 5 | char b[12]; 6 | size_t size = 12; 7 | 8 | int main(int argc, char** argv) { 9 | iris_init(&argc, &argv, 1); 10 | 11 | iris_mem mem_a; 12 | iris_mem mem_b; 13 | iris_mem_create(size, &mem_a); 14 | iris_mem_create(size, &mem_b); 15 | 16 | iris_task task; 17 | iris_task_create(&task); 18 | iris_task_h2d(task, mem_a, 0, size, a); 19 | void* params[2] = { &mem_b, &mem_a }; 20 | int params_info[2] = { iris_w, iris_r }; 21 | iris_task_kernel(task, "uppercase", 1, NULL, &size, NULL, 2, params, params_info); 22 | iris_task_d2h(task, mem_b, 0, size, b); 23 | iris_task_submit(task, iris_roundrobin, NULL, 1); 24 | 25 | printf("%s\n", b); 26 | 27 | iris_task_release(task); 28 | iris_mem_release(mem_a); 29 | iris_mem_release(mem_b); 30 | 31 | iris_finalize(); 32 | 33 | return 0; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /tests/19_replay/test19_replay.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, true); 7 | 8 | size_t SIZE; 9 | int *A, *B; 10 | 11 | SIZE = argc > 1 ? atol(argv[1]) : 8; 12 | 13 | printf("[%s:%d] SIZE[%lu]\n", __FILE__, __LINE__, SIZE); 14 | 15 | A = (int*) malloc(SIZE * sizeof(int)); 16 | B = (int*) malloc(SIZE * sizeof(int)); 17 | 18 | for (int i = 0; i < SIZE; i++) A[i] = i; 19 | 20 | iris_mem mem; 21 | iris_mem_create(SIZE * sizeof(int), &mem); 22 | 23 | void* json_inputs[3] = { A, B, &mem }; 24 | 25 | iris_graph graph; 26 | iris_graph_create_json("../18_record/output.json", json_inputs, &graph); 27 | iris_graph_submit(graph, iris_default, true); 28 | 29 | for (int i = 0; i < SIZE; i++) printf("[%3d] %3d\n", i, B[i]); 30 | 31 | iris_finalize(); 32 | 33 | return iris_error_count(); 34 | } 35 | 36 | -------------------------------------------------------------------------------- /tests/06_device/test06_device.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, true); 7 | 8 | char vendor[64]; 9 | char name[64]; 10 | int type; 11 | int nplatforms = 0; 12 | int ndevs = 0; 13 | iris_platform_count(&nplatforms); 14 | for (int i = 0; i < nplatforms; i++) { 15 | size_t size; 16 | iris_platform_info(i, iris_name, name, &size); 17 | printf("platform[%d] name[%s]\n", i, name); 18 | } 19 | 20 | iris_device_count(&ndevs); 21 | for (int i = 0; i < ndevs; i++) { 22 | size_t size; 23 | iris_device_info(i, iris_vendor, vendor, &size); 24 | iris_device_info(i, iris_name, name, &size); 25 | iris_device_info(i, iris_type, &type, &size); 26 | printf("dev[%d] vendor[%s] name[%s] type[0x%x]\n", i, vendor, name, type); 27 | } 28 | iris_finalize(); 29 | 30 | return iris_error_count(); 31 | } 32 | -------------------------------------------------------------------------------- /docs/sphinx/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /src/runtime/LoaderPolicy.cpp: -------------------------------------------------------------------------------- 1 | #include "LoaderPolicy.h" 2 | #include "Policy.h" 3 | #include "Debug.h" 4 | 5 | namespace iris { 6 | namespace rt { 7 | 8 | LoaderPolicy::LoaderPolicy(const char* lib, const char* name) : Loader() { 9 | name_ = std::string(name); 10 | lib_ = std::string(lib); 11 | } 12 | 13 | LoaderPolicy::~LoaderPolicy() { 14 | } 15 | 16 | Policy* LoaderPolicy::policy() { 17 | return (Policy*) (instance_)(); 18 | } 19 | 20 | const char* LoaderPolicy::library() { 21 | return lib(); 22 | } 23 | 24 | int LoaderPolicy::LoadFunctions() { 25 | char func[128]; 26 | sprintf(func, "%s_instance", name()); 27 | *(void**) (&instance_) = dlsym(handle_, func); 28 | if (!instance_) { 29 | _error("%s", dlerror()); 30 | return IRIS_ERROR; 31 | } 32 | return IRIS_SUCCESS; 33 | } 34 | 35 | void LoaderPolicy::Init(void* arg) { 36 | ((Policy*) (instance_)())->Init(arg); 37 | } 38 | 39 | } /* namespace rt */ 40 | } /* namespace iris */ 41 | 42 | -------------------------------------------------------------------------------- /src/runtime/TGPolicy.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_TGPOLICY_H 2 | #define IRIS_SRC_RT_TGPOLICY_H 3 | 4 | #define REGISTER_CUSTOM_TGPOLICY(class_name, name) \ 5 | iris::rt::class_name name; \ 6 | extern "C" void* name ## _instance() { return (void*) &name; } 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class Device; 12 | class Scheduler; 13 | class Task; 14 | class Graph; 15 | 16 | class TGPolicy { 17 | public: 18 | TGPolicy(); 19 | virtual ~TGPolicy(); 20 | 21 | virtual void Init(void* arg) {} 22 | virtual bool IsKernelSupported(Task *task, Device *dev); 23 | virtual void Schedule(Graph *graph, Device** devs, int* ndevs) = 0; 24 | 25 | protected: 26 | }; 27 | 28 | } /* namespace rt */ 29 | } /* namespace iris */ 30 | 31 | #endif /* IRIS_SRC_RT_TGPOLICY_H */ 32 | 33 | 34 | -------------------------------------------------------------------------------- /apps/benchmarking/memory-performance-scripts/Makefile: -------------------------------------------------------------------------------- 1 | CXX ?= clang++ 2 | #CXX := xlC 3 | NVCC ?= nvcc 4 | HIPCC ?= hipcc 5 | CFLAGS := -O3 6 | CUDA_CFLAGS ?= -I/usr/local/cuda/include 7 | CUDA_LDFLAGS ?= -lcuda 8 | HIP_CFLAGS ?= -I/opt/rocm/hip/include -I/opt/rocm/hsa/include 9 | HIP_LDFLAGS ?= -L/opt/rocm/hip/lib -lamdhip64 10 | #HIPCC_FLAGS += --targets gfx906 11 | OPENCL_CFLAGS ?= 12 | OPENCL_LDFLAGS ?= -lOpenCL 13 | IRIS_LDFLAGS := -lbrisbane -lpthread -ldl 14 | 15 | all: membench-iris-profiling 16 | 17 | kernels: kernel.ptx kernel.hip 18 | 19 | membench-iris-profiling: membench-iris-profiling.cpp 20 | $(CXX) $(CFLAGS) $(CXX_FLAGS) -g -o $@ $^ $(IRIS_LDFLAGS) $(LD_FLAGS) 21 | 22 | kernel.ptx: kernel.cu 23 | $(NVCC) $(NVCC_FLAGS) -ptx $^ 24 | 25 | kernel.hip: kernel.hip.cpp 26 | $(HIPCC) --genco $(HIPCC_FLAGS) -o $@ $^ 27 | 28 | clean: 29 | rm -f membench-iris-profiling kernel.ptx kernel.hip 30 | 31 | clean-results: 32 | rm -f membench-*.csv 33 | 34 | -------------------------------------------------------------------------------- /tests/07_policy_register/policy_last.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class PolicyFirst: public Policy { 11 | public: 12 | PolicyFirst() {} 13 | virtual ~PolicyFirst() {} 14 | virtual void GetDevices(Task* task, Device** devs, int* ndevs) { 15 | _info("ndevs[%d]", ndevs_); 16 | devs[0] = devs_[0]; 17 | *ndevs = 1; 18 | } 19 | }; 20 | 21 | class PolicyLast : public Policy { 22 | public: 23 | PolicyLast() {} 24 | virtual ~PolicyLast() {} 25 | virtual void GetDevices(Task* task, Device** devs, int* ndevs) { 26 | _info("ndevs[%d]", ndevs_); 27 | devs[0] = devs_[ndevs_ - 1]; 28 | *ndevs = 1; 29 | } 30 | }; 31 | 32 | } /* namespace rt */ 33 | } /* namespace iris */ 34 | 35 | REGISTER_CUSTOM_POLICY(PolicyLast, policy_last) 36 | REGISTER_CUSTOM_POLICY(PolicyFirst, policy_first) 37 | 38 | -------------------------------------------------------------------------------- /src/runtime/ProfilerDOT.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_PROFILER_DOT_H 2 | #define IRIS_SRC_RT_PROFILER_DOT_H 3 | 4 | #include "Profiler.h" 5 | #include "pthread.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace iris { 12 | namespace rt { 13 | 14 | class ProfilerDOT : public Profiler { 15 | public: 16 | ProfilerDOT(Platform* platform); 17 | virtual ~ProfilerDOT(); 18 | 19 | virtual int CompleteTask(Task* task); 20 | 21 | protected: 22 | virtual int Main(); 23 | virtual int Exit(); 24 | virtual const char* FileExtension(); 25 | 26 | private: 27 | std::set tasks_exit_; 28 | pthread_mutex_t dot_lock_; 29 | bool no_task_; 30 | #ifdef PER_TASK_COLOR 31 | std::vector list_color; 32 | std::map map_color; 33 | int round_robin_counter; 34 | #endif 35 | 36 | }; 37 | 38 | } /* namespace rt */ 39 | } /* namespace iris */ 40 | 41 | 42 | #endif /*IRIS_SRC_RT_PROFILER_DOT_H */ 43 | 44 | -------------------------------------------------------------------------------- /src/runtime/Timer.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_TIMER_H 2 | #define IRIS_SRC_RT_TIMER_H 3 | 4 | #define IRIS_TIMER_MAX 128 5 | #define IRIS_TIMER_APP 1 6 | #define IRIS_TIMER_PLATFORM 2 7 | #define IRIS_TIMER_INIT 3 8 | #define IRIS_TIMER_KERNEL 4 9 | #define IRIS_TIMER_H2D 5 10 | #define IRIS_TIMER_D2H 6 11 | 12 | #include 13 | 14 | namespace iris { 15 | namespace rt { 16 | 17 | class Timer { 18 | public: 19 | Timer(); 20 | ~Timer(); 21 | 22 | double Now(); 23 | static double GetCurrentTime(); 24 | size_t NowNS(); 25 | double Start(int i); 26 | double Stop(int i); 27 | double Total(int i); 28 | 29 | size_t Inc(int i); 30 | size_t Inc(int i, size_t s); 31 | 32 | private: 33 | double start_[IRIS_TIMER_MAX]; 34 | double total_[IRIS_TIMER_MAX]; 35 | size_t total_ull_[IRIS_TIMER_MAX]; 36 | 37 | static double boot_; 38 | }; 39 | 40 | } /* namespace rt */ 41 | } /* namespace iris */ 42 | 43 | #endif /* IRIS_SRC_RT_TIMER_H */ 44 | 45 | -------------------------------------------------------------------------------- /tests/10_multikernelexecution/kernel.cu: -------------------------------------------------------------------------------- 1 | extern "C" __global__ void kernel0(int* C, int loop) { 2 | int id = threadIdx.x + blockIdx.x * blockDim.x; 3 | for (int i = 0; i < loop; i++) { 4 | for (int j = 0; j < loop; j++) { 5 | C[id] += id; 6 | } 7 | } 8 | } 9 | 10 | extern "C" __global__ void kernel1(int* C, int loop) { 11 | int id = threadIdx.x + blockIdx.x * blockDim.x; 12 | for (int i = 0; i < loop; i++) { 13 | for (int j = 0; j < loop; j++) { 14 | C[id] += id; 15 | } 16 | } 17 | } 18 | 19 | extern "C" __global__ void kernel2(int* C, int loop) { 20 | int id = threadIdx.x + blockIdx.x * blockDim.x; 21 | for (int i = 0; i < loop; i++) { 22 | for (int j = 0; j < loop; j++) { 23 | C[id] += id; 24 | } 25 | } 26 | } 27 | 28 | extern "C" __global__ void kernel3(int* C, int loop) { 29 | int id = threadIdx.x + blockIdx.x * blockDim.x; 30 | for (int i = 0; i < loop; i++) { 31 | for (int j = 0; j < loop; j++) { 32 | C[id] += id; 33 | } 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /tests/31_isaxpy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(ISaxpy) 4 | set(XILINX_VERSION 1) 5 | #set(XILINX_TARGET "$ENV{XCL_EMULATION_MODE}") # sw_emu, hw_emu, hw 6 | set(XILINX_LANGUAGE "xilinx") # opencl, xilinx 7 | 8 | set(XILINX_OPENCL_SOURCES 9 | ) 10 | set(XILINX_KERNEL_SOURCES 11 | saxpy kernel_stage src/kernel.xilinx.cpp 12 | ) 13 | 14 | #set(EXTRACT_IRIS_KERNEL_SIGNATURES 15 | # src/signature.def) 16 | 17 | set(OPENMP_KERNEL_SOURCES 18 | src/kernel.cl.openmp.c 19 | ) 20 | 21 | set(HIP_KERNEL_SOURCES 22 | src/kernel.hip 23 | ) 24 | 25 | set(CUDA_KERNEL_SOURCES 26 | src/kernel.cu 27 | ) 28 | 29 | set(APP_SOURCES 30 | src/saxpy.cpp 31 | src/saxpy.iris.cpp 32 | src/saxpy_ref.cpp 33 | ) 34 | 35 | set(HEXAGON_KERNEL_SOURCES 36 | src/kernel.cl.hexagon.c 37 | ) 38 | 39 | set(APP_EXECUTABLE isaxpy) 40 | set(OPENMP_FLAGS "-I$ENV{IRIS}/include/iris/hexagon") 41 | include($ENV{IRIS}/utils/CMakeLists.txt) 42 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/kernel.xilinx.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // Kernel top function 6 | extern "C" { 7 | void saxpy(int *z, const int *x, const int *y, int n, int alpha) { 8 | #pragma HLS INTERFACE m_axi port=x offset=slave bundle=gmem 9 | #pragma HLS INTERFACE m_axi port=y offset=slave bundle=gmem 10 | #pragma HLS INTERFACE m_axi port=z offset=slave bundle=gmem 11 | #pragma HLS INTERFACE s_axilite port=x bundle=control 12 | #pragma HLS INTERFACE s_axilite port=y bundle=control 13 | #pragma HLS INTERFACE s_axilite port=z bundle=control 14 | #pragma HLS INTERFACE s_axilite port=alpha bundle=control 15 | #pragma HLS INTERFACE s_axilite port=n bundle=control 16 | #pragma HLS INTERFACE s_axilite port=return bundle=control 17 | 18 | // Loop over the elements of the vectors 19 | for (int i = 0; i < n; i++) { 20 | #pragma HLS PIPELINE 21 | z[i] = alpha * x[i] + y[i]; 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /include/iris/hexagon/q6cache.h: -------------------------------------------------------------------------------- 1 | /**============================================================================= 2 | 3 | @file 4 | q6cache.h 5 | 6 | @brief 7 | definitions for L2 prefetch from C. 8 | 9 | Copyright (c) 2016 QUALCOMM Technologies Incorporated. 10 | All Rights Reserved Qualcomm Proprietary 11 | =============================================================================**/ 12 | #ifndef Q6CACHE_H 13 | #define Q6CACHE_H 14 | 15 | #include "hexagon_types.h" 16 | 17 | #define CreateL2pfParam(stride, w, h, dir) (unsigned long long)HEXAGON_V64_CREATE_H((dir), (stride), (w), (h)) 18 | 19 | 20 | static void L2fetch(unsigned int addr, unsigned long long param) 21 | { 22 | __asm__ __volatile__ ("l2fetch(%0,%1)" : : "r"(addr), "r"(param)); 23 | } 24 | 25 | static inline void WaitForL2fetch() 26 | { 27 | int usr; 28 | do 29 | { 30 | __asm__ __volatile__ ( 31 | " %0 = usr " :"=r"(usr) 32 | ); 33 | } while (usr < 0); 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /tests/31_isaxpy/src/kernel.cl.openmp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "iris/iris_openmp.h" 5 | #include "iris/gettime.h" 6 | #include "iris_app_cpu_dsp_interface.h" 7 | 8 | int saxpy(int32_t* Z, const int32_t* X, const int32_t* Y, int32_t SIZE, int32_t A, IRIS_OPENMP_KERNEL_ARGS) 9 | //int saxpy(int32_t* Z, int32_t A, const int32_t* X, const int32_t* Y, int32_t SIZE, int32_t *dspUsec, int32_t *dspCyc, IRIS_OPENMP_KERNEL_ARGS) 10 | { 11 | size_t i; 12 | //printf("Kernel Launch parameters: X:%p Y:%p Z:%p A:%d xSize:%d dspUsec:%p dspCyc:%p\n", X, Y, Z, A, SIZE, dspUsec, dspCyc); 13 | //IRIS_OPENMP_KERNEL_BEGIN(i) 14 | printf("A:%d SIZE:%d\n", A, SIZE); 15 | #pragma omp parallel for shared(Z, A, X, Y) private(i) 16 | for(i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | iris_init(&argc, &argv, true); 8 | 9 | size_t SIZE = argc > 1 ? atol(argv[1]) : 4; 10 | 11 | int* A = (int*) malloc(SIZE * sizeof(int)); 12 | 13 | iris_register_policy("./libAIWCPolicy.so", "aiwc_policy", (void*) 8); 14 | 15 | iris_mem memA; 16 | iris_mem_create(SIZE * sizeof(int), &memA); 17 | 18 | void* params[1] = { &memA }; 19 | int params_info[1] = { iris_w }; 20 | iris_task task; 21 | iris_task_create(&task); 22 | iris_task_kernel(task, "process", 1, NULL, &SIZE, NULL, 1, params, params_info); 23 | iris_task_d2h_full(task, memA, A); 24 | iris_task_submit(task, iris_custom, "aiwc_policy", 1); 25 | 26 | for (int i = 0; i < SIZE; i++) printf("[%3d] %8d\n", i, A[i]); 27 | 28 | iris_finalize(); 29 | 30 | return iris_error_count(); 31 | } 32 | 33 | // Built in Edinburgh 2023 34 | -------------------------------------------------------------------------------- /scheduling-policies/aiwc/test_aiwc_policy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | iris_init(&argc, &argv, true); 8 | 9 | size_t SIZE = argc > 1 ? atol(argv[1]) : 4; 10 | 11 | int* A = (int*) malloc(SIZE * sizeof(int)); 12 | 13 | iris_register_policy("./libAIWCPolicy.so", "aiwc_policy", (void*) 8); 14 | 15 | iris_mem memA; 16 | iris_mem_create(SIZE * sizeof(int), &memA); 17 | 18 | void* params[1] = { &memA }; 19 | int params_info[1] = { iris_w }; 20 | iris_task task; 21 | iris_task_create(&task); 22 | iris_task_kernel(task, "process", 1, NULL, &SIZE, NULL, 1, params, params_info); 23 | iris_task_d2h_full(task, memA, A); 24 | iris_task_submit(task, iris_custom, "aiwc_policy", 1); 25 | 26 | for (int i = 0; i < SIZE; i++) printf("[%3d] %8d\n", i, A[i]); 27 | 28 | iris_finalize(); 29 | 30 | return iris_error_count(); 31 | } 32 | 33 | // Built in Edinburgh 2023 34 | -------------------------------------------------------------------------------- /apps/saxpy/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: saxpy-c saxpy-cpp kernel.openmp.so 4 | 5 | saxpy-dmem: saxpy-dmem.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | saxpy-c: saxpy.c 9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | saxpy-cpp: saxpy.cpp 12 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) 13 | 14 | saxpy-f90: saxpy.f90 15 | $(FORTRAN) $(FFLAGS) -o $@ $^ $(LDFLAGS) 16 | 17 | kernel.ptx: kernel.cu 18 | $(NVCC) -ptx $^ 19 | 20 | kernel.spv: kernel.cl 21 | clang -cc1 -finclude-default-header -triple spir $^ -O0 -flto -emit-llvm-bc -o kernel.bc 22 | llvm-spirv kernel.bc -o $@ 23 | 24 | kernel.hip: kernel.hip.cpp 25 | $(HIPCC) --genco -o $@ $^ 26 | 27 | kernel.openmp.so: kernel.openmp.c 28 | $(CC) $(CFLAGS) -O3 -fopenmp -fPIC -shared -I. -o $@ $^ 29 | 30 | kernel.hexagon.so: kernel.hexagon.c 31 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 32 | 33 | kernel.poly.so: kernel.cl.poly.c 34 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 35 | 36 | clean: 37 | rm -f saxpy-c saxpy-cpp saxpy-f90 kernel.openmp.so 38 | -------------------------------------------------------------------------------- /tests/07_policy_register/test07_policy_register.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, true); 7 | 8 | size_t SIZE = argc > 1 ? atol(argv[1]) : 4; 9 | 10 | int* A = (int*) malloc(SIZE * sizeof(int)); 11 | 12 | iris_register_policy("libPolicyGWS.so", "policy_gws", (void*) 8); 13 | iris_register_policy("libPolicyGWSHook.so", "policy_gws_hook", (void*) 8); 14 | 15 | iris_mem memA; 16 | iris_mem_create(SIZE * sizeof(int), &memA); 17 | 18 | void* params[1] = { &memA }; 19 | int params_info[1] = { iris_w }; 20 | iris_task task; 21 | iris_task_create_name("example", &task); 22 | iris_task_kernel(task, "process", 1, NULL, &SIZE, NULL, 1, params, params_info); 23 | iris_task_d2h_full(task, memA, A); 24 | iris_task_submit(task, iris_custom, "policy_gws_hook", 1); 25 | 26 | for (int i = 0; i < SIZE; i++) printf("[%3d] %8d\n", i, A[i]); 27 | 28 | iris_finalize(); 29 | 30 | return iris_error_count(); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /apps/custom_policy/PolicyGWS.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class PolicyGWS: public Policy { 11 | public: 12 | PolicyGWS() {} 13 | virtual ~PolicyGWS() {} 14 | virtual void Init(void* params) { 15 | threshold_ = (size_t) params; 16 | } 17 | virtual void GetDevices(Task* task, Device** devs, int* ndevs) { 18 | Command* cmd = task->cmd_kernel(); 19 | size_t* gws = cmd->gws(); 20 | size_t total_work_items = gws[0] * gws[1] * gws[2]; 21 | int target_dev = total_work_items > threshold_ ? iris_gpu : iris_cpu; 22 | int devid = 0; 23 | for (int i = 0; i < ndevices(); i++) 24 | if (device(i)->type() & target_dev) devs[devid++] = device(i); 25 | *ndevs = devid; 26 | } 27 | 28 | size_t threshold_; 29 | }; 30 | 31 | } /* namespace runtime */ 32 | } /* namespace iris */ 33 | 34 | REGISTER_CUSTOM_POLICY(PolicyGWS, custom_gws) 35 | 36 | -------------------------------------------------------------------------------- /tests/10_multikernelexecution/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void kernel0(int* C, int loop) { 4 | int id = threadIdx.x + blockIdx.x * blockDim.x; 5 | for (int i = 0; i < loop; i++) { 6 | for (int j = 0; j < loop; j++) { 7 | C[id] += id; 8 | } 9 | } 10 | } 11 | 12 | extern "C" __global__ void kernel1(int* C, int loop) { 13 | int id = threadIdx.x + blockIdx.x * blockDim.x; 14 | for (int i = 0; i < loop; i++) { 15 | for (int j = 0; j < loop; j++) { 16 | C[id] += id; 17 | } 18 | } 19 | } 20 | 21 | extern "C" __global__ void kernel2(int* C, int loop) { 22 | int id = threadIdx.x + blockIdx.x * blockDim.x; 23 | for (int i = 0; i < loop; i++) { 24 | for (int j = 0; j < loop; j++) { 25 | C[id] += id; 26 | } 27 | } 28 | } 29 | 30 | extern "C" __global__ void kernel3(int* C, int loop) { 31 | int id = threadIdx.x + blockIdx.x * blockDim.x; 32 | for (int i = 0; i < loop; i++) { 33 | for (int j = 0; j < loop; j++) { 34 | C[id] += id; 35 | } 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /tests/07_policy_register/policy_gws.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class PolicyGWS: public Policy { 11 | public: 12 | PolicyGWS() {} 13 | virtual ~PolicyGWS() {} 14 | virtual void Init(void* params) { 15 | threshold_ = (size_t) params; 16 | } 17 | virtual void GetDevices(Task* task, Device** devs, int* ndevs) { 18 | Command* cmd = task->cmd_kernel(); 19 | size_t* gws = cmd->gws(); 20 | size_t total_work_items = gws[0] * gws[1] * gws[2]; 21 | int target_dev = total_work_items > threshold_ ? iris_gpu : iris_cpu; 22 | int devid = 0; 23 | for (int i = 0; i < ndevices(); i++) 24 | if (device(i)->type() & target_dev) devs[devid++] = device(i); 25 | *ndevs = devid; 26 | } 27 | 28 | size_t threshold_; 29 | }; 30 | 31 | } /* namespace rt */ 32 | } /* namespace iris */ 33 | 34 | REGISTER_CUSTOM_POLICY(PolicyGWS, policy_gws) 35 | 36 | -------------------------------------------------------------------------------- /utils/Makefile.hexagon: -------------------------------------------------------------------------------- 1 | # Check if SDK_SETUP_ENV is set or not for checking whether environment is set or not. 2 | # Exit compilation if SDK_SETUP_ENV is not set. 3 | 4 | ifndef SDK_SETUP_ENV 5 | $(error Error! SDK Environment not set up -> please run setup_sdk_env script from SDK root directory.) 6 | endif 7 | 8 | 9 | # include the variant specific .min files 10 | # V = hexagon --> hexagon.min 11 | # V = android --> android.min 12 | 13 | ifndef V 14 | $(error Varaint must be provided, pass a variant by adding 'V=' to your build command) 15 | endif 16 | 17 | V_TARGET = $(word 1,$(subst _, ,$(V))) 18 | ifeq ($(wildcard $(V_TARGET)_deps.min $(V_TARGET)_rules.min), ) 19 | $(error Unsupported target '$(V)' in variant '$(V)', check that both $(V_TARGET)_deps.min and $(V_TARGET)_rules.min exist) 20 | endif 21 | 22 | include $(V_TARGET)_deps.min 23 | include $(HEXAGON_SDK_ROOT)/build/make.d/$(V_TARGET)_vs.min 24 | include $(HEXAGON_SDK_ROOT)/build/defines.min 25 | include $(V_TARGET).min 26 | 27 | # always last 28 | include $(RULES_MIN) 29 | -------------------------------------------------------------------------------- /src/runtime/Policies.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICIES_H 2 | #define IRIS_SRC_RT_POLICIES_H 3 | 4 | #include 5 | #include 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Policy; 11 | class LoaderPolicy; 12 | class Scheduler; 13 | 14 | class Policies { 15 | public: 16 | Policies(Scheduler* scheduler); 17 | ~Policies(); 18 | 19 | Policy* GetPolicy(int brs_policy, const char* opt); 20 | 21 | int Register(const char* lib, const char* name, void* params); 22 | 23 | private: 24 | Scheduler* scheduler_; 25 | 26 | Policy* policy_block_cycle_; 27 | Policy* policy_ftf_; 28 | Policy* policy_data_; 29 | Policy* policy_default_; 30 | Policy* policy_depend_; 31 | Policy* policy_device_; 32 | Policy* policy_sdq_; 33 | Policy* policy_profile_; 34 | Policy* policy_random_; 35 | Policy* policy_roundrobin_; 36 | Policy* policy_julia_; 37 | 38 | std::map policy_customs_; 39 | }; 40 | 41 | } /* namespace rt */ 42 | } /* namespace iris */ 43 | 44 | #endif /* IRIS_SRC_RT_POLICIES_H */ 45 | 46 | -------------------------------------------------------------------------------- /src/runtime/Pool.cpp: -------------------------------------------------------------------------------- 1 | #include "Pool.h" 2 | #include "Debug.h" 3 | #include "Command.h" 4 | #include "Task.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | Pool::Pool(Platform* platform) { 10 | platform_ = platform; 11 | #if IRIS_POOL_ENABLED 12 | for (int i = 0; i < IRIS_POOL_MAX_TASK; i++) { 13 | tasks_[i] = new Task(platform, IRIS_TASK, NULL); 14 | } 15 | for (int i = 0; i < IRIS_POOL_MAX_CMD; i++) { 16 | cmds_[i] = new Command(); 17 | } 18 | tid_ = 0; 19 | cid_ = 0; 20 | #endif 21 | } 22 | 23 | Pool::~Pool() { 24 | } 25 | 26 | Task* Pool::GetTask() { 27 | #if IRIS_POOL_ENABLED 28 | return tasks_[tid_++]; 29 | #else 30 | const char *pool_tn = "Pool"; 31 | return new Task(platform_, IRIS_TASK, pool_tn); 32 | #endif 33 | } 34 | 35 | Command* Pool::GetCommand(Task* task, int type) { 36 | #if IRIS_POOL_ENABLED 37 | Command* cmd = cmds_[cid_++]; 38 | cmd->Set(task, type); 39 | return cmd; 40 | #else 41 | return new Command(task, type); 42 | #endif 43 | } 44 | 45 | } /* namespace rt */ 46 | } /* namespace iris */ 47 | 48 | -------------------------------------------------------------------------------- /src/runtime/SigHandler.cpp: -------------------------------------------------------------------------------- 1 | #include "SigHandler.h" 2 | #include "Config.h" 3 | #include "Debug.h" 4 | #if USE_SIGHANDLER 5 | #include 6 | #include 7 | #include 8 | #include 9 | #endif 10 | 11 | namespace iris { 12 | namespace rt { 13 | 14 | struct sigaction SigHandler::sa_; 15 | 16 | SigHandler::SigHandler() { 17 | #if USE_SIGHANDLER 18 | struct sigaction sa; 19 | memset(&sa, 0, sizeof(sa)); 20 | sigemptyset(&sa.sa_mask); 21 | sa.sa_sigaction = Handle; 22 | sa.sa_flags = SA_SIGINFO; 23 | sigaction(SIGSEGV, &sa, &sa_); 24 | #endif 25 | } 26 | 27 | SigHandler::~SigHandler() { 28 | } 29 | 30 | void SigHandler::Handle(int signum, siginfo_t* si, void* arg) { 31 | #if USE_SIGHANDLER 32 | void* buf[128]; 33 | size_t size = backtrace(buf, sizeof(buf) / sizeof(void*)); 34 | _error("signum[%d][%s]", signum, strsignal(signum)); 35 | backtrace_symbols_fd(buf, size, STDERR_FILENO); 36 | sigaction(SIGSEGV, &sa_, NULL); 37 | #endif 38 | } 39 | 40 | } /* namespace rt */ 41 | } /* namespace iris */ 42 | 43 | -------------------------------------------------------------------------------- /src/runtime/Worker.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_WORKER_H 2 | #define IRIS_SRC_RT_WORKER_H 3 | 4 | #include "Thread.h" 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class Consistency; 10 | class Device; 11 | class Platform; 12 | class Queue; 13 | class ReadyQueue; 14 | class Scheduler; 15 | class Task; 16 | 17 | class Worker : public Thread { 18 | public: 19 | Worker(Device* dev, Platform* platform, bool single = false); 20 | virtual ~Worker(); 21 | 22 | void Enqueue(Task* task); 23 | void TaskComplete(Task* task); 24 | 25 | bool busy() { return busy_; } 26 | unsigned long ntasks(); 27 | Device* device() { return dev_; } 28 | Platform* platform() { return platform_; } 29 | private: 30 | void Execute(Task* task); 31 | virtual void Run(); 32 | 33 | private: 34 | Platform* platform_; 35 | Queue* queue_; 36 | Consistency* consistency_; 37 | Device* dev_; 38 | Scheduler* scheduler_; 39 | bool single_; 40 | bool busy_; 41 | }; 42 | 43 | } /* namespace rt */ 44 | } /* namespace iris */ 45 | 46 | #endif /* IRIS_SRC_RT_WORKER_H */ 47 | -------------------------------------------------------------------------------- /apps/helloworld/Makefile: -------------------------------------------------------------------------------- 1 | include ../makefile_defs.mk 2 | 3 | all: helloworld-c helloworld-cpp 4 | 5 | helloworld-c: helloworld.c 6 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 7 | 8 | helloworld-cpp: helloworld.cpp 9 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | kernel.ptx: kernel.cu 12 | $(NVCC) -ptx $^ 13 | 14 | kernel.spv: kernel.cl 15 | #clang -cc1 -finclude-default-header -triple spir $^ -O0 -emit-llvm-bc -o kernel.bc 16 | clang -cc1 -finclude-default-header -triple spir $^ -O0 -flto -emit-llvm-bc -o kernel.bc 17 | llvm-spirv kernel.bc -o $@ 18 | 19 | kernel.hip: kernel.hip.cpp 20 | #hipcc --genco --targets gfx906 -o $@ $^ 21 | $(HIPCC) --genco -o $@ $^ 22 | 23 | kernel.openmp.so: kernel.openmp.c 24 | #$(CC) -O3 -qsmp -fPIC -shared -I. -o $@ $^ 25 | $(CC) $(CFLAGS) -O3 -fopenmp -fPIC -shared -I. -o $@ $^ 26 | 27 | kernel.hexagon.so: kernel.hexagon.c 28 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 29 | 30 | kernel.poly.so: kernel.cl.poly.c 31 | $(CC) $(CFLAGS) -g -fPIC -shared -I. -o $@ $^ 32 | 33 | clean: 34 | rm -f helloworld-c helloworld-cpp 35 | -------------------------------------------------------------------------------- /src/runtime/Consistency.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_CONSISTENCY_H 2 | #define IRIS_SRC_RT_CONSISTENCY_H 3 | 4 | #include 5 | #include "Kernel.h" 6 | 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Command; 11 | class Mem; 12 | class Scheduler; 13 | class Task; 14 | class Worker; 15 | 16 | class Consistency { 17 | public: 18 | Consistency(Scheduler* scheduler); 19 | ~Consistency(); 20 | 21 | void Resolve(Task* task); 22 | void Disable() { disable_ = true; } 23 | void Enable() { disable_ = false; } 24 | 25 | private: 26 | void ResolveKernel(Task* task, Command* cmd); 27 | void ResolveKernelWithPolymem(Task* task, Command* cmd, Mem* mem, KernelArg* arg, iris_poly_mem* polymem); 28 | void ResolveKernelWithoutPolymem(Task* task, Command* cmd, Mem* mem, KernelArg* arg); 29 | void ResolveD2H(Task* task, Command* cmd); 30 | private: 31 | Scheduler* scheduler_; 32 | pthread_mutex_t mutex_; 33 | bool disable_; 34 | }; 35 | 36 | } /* namespace rt */ 37 | } /* namespace iris */ 38 | 39 | #endif /* IRIS_SRC_RT_CONSISTENCY_H */ 40 | -------------------------------------------------------------------------------- /src/runtime/HubClient.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_HUB_CLIENT_H 2 | #define IRIS_SRC_RT_HUB_CLIENT_H 3 | 4 | #include 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | class Message; 10 | class Scheduler; 11 | 12 | class HubClient { 13 | public: 14 | HubClient(Scheduler* scheduler); 15 | ~HubClient(); 16 | 17 | int Init(); 18 | int StopHub(); 19 | int Status(); 20 | 21 | int TaskInc(int dev, int i); 22 | int TaskDec(int dev, int i); 23 | int TaskAll(size_t* ntasks, int ndevs); 24 | 25 | bool available() { return available_; } 26 | 27 | private: 28 | int OpenMQ(); 29 | int CloseMQ(); 30 | int SendMQ(Message& msg); 31 | 32 | int OpenFIFO(); 33 | int CloseFIFO(); 34 | int RecvFIFO(Message& msg); 35 | 36 | int Register(); 37 | int Deregister(); 38 | 39 | private: 40 | Scheduler* scheduler_; 41 | pid_t pid_; 42 | int mq_; 43 | int fifo_; 44 | int ndevs_; 45 | bool available_; 46 | bool stop_hub_; 47 | }; 48 | 49 | } /* namespace rt */ 50 | } /* namespace iris */ 51 | 52 | #endif /* IRIS_SRC_RT_HUB_CLIENT_H */ 53 | 54 | -------------------------------------------------------------------------------- /src/runtime/Policy.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLICY_H 2 | #define IRIS_SRC_RT_POLICY_H 3 | 4 | #define REGISTER_CUSTOM_POLICY(class_name, name) \ 5 | iris::rt::class_name name; \ 6 | extern "C" void* name ## _instance() { return (void*) &name; } 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class Device; 12 | class Scheduler; 13 | class Task; 14 | 15 | class Policy { 16 | public: 17 | Policy(); 18 | virtual ~Policy(); 19 | 20 | virtual void Init(void* arg) {} 21 | virtual bool IsKernelSupported(Task *task, Device *dev); 22 | virtual void GetDevices(Task* task, Device** devs, int* ndevs) = 0; 23 | void SetScheduler(Scheduler* scheduler); 24 | 25 | protected: 26 | Device** devices() const { return devs_; } 27 | Device* device(int i) const { return devs_[i]; } 28 | int ndevices() const { return ndevs_; } 29 | 30 | protected: 31 | Scheduler* scheduler_; 32 | Device** devs_; 33 | int ndevs_; 34 | }; 35 | 36 | } /* namespace rt */ 37 | } /* namespace iris */ 38 | 39 | #endif /* IRIS_SRC_RT_POLICY_H */ 40 | 41 | -------------------------------------------------------------------------------- /tests/31_isaxpy/saxpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import iris 4 | import numpy as np 5 | import sys 6 | import pdb 7 | 8 | iris.init() 9 | 10 | SIZE = 8 if len(sys.argv) == 1 else int(sys.argv[1]) 11 | A = 10 12 | SIZEK=np.int32(8) 13 | 14 | x = np.arange(SIZE, dtype=np.int32) 15 | y = np.arange(SIZE, dtype=np.int32) 16 | s = np.arange(SIZE, dtype=np.int32) 17 | 18 | print('X', x) 19 | print('Y', y) 20 | print('A', A) 21 | print('SIZE', SIZE) 22 | 23 | mem_x = iris.mem(x.nbytes) 24 | mem_y = iris.mem(y.nbytes) 25 | mem_s = iris.mem(s.nbytes) 26 | 27 | task = iris.task() 28 | task.h2d(mem_x, 0, x.nbytes, x) 29 | task.h2d(mem_y, 0, y.nbytes, y) 30 | #pdb.set_trace() 31 | task.kernel("saxpy", 1, [0], [SIZE], [1], 32 | [mem_s, mem_x, mem_y, SIZE, A] , 33 | [iris.iris_w, iris.iris_r, iris.iris_r, 4, 4] ) 34 | task.params_map([iris.iris_ftf, iris.iris_ftf, iris.iris_ftf, iris.iris_cpu, iris.iris_ftf]) 35 | task.d2h(mem_s, 0, s.nbytes, s) 36 | task.submit(iris.iris_fpga) 37 | 38 | print('S =', A, '* X + Y', s) 39 | 40 | iris.finalize() 41 | 42 | -------------------------------------------------------------------------------- /src/runtime/Profiler.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_PROFILER_H 2 | #define IRIS_SRC_RT_PROFILER_H 3 | 4 | #include 5 | #include 6 | using namespace std; 7 | namespace iris { 8 | namespace rt { 9 | 10 | class Message; 11 | class Platform; 12 | class Task; 13 | 14 | class Profiler { 15 | public: 16 | Profiler(Platform* platform, const char *profiler_name); 17 | virtual ~Profiler(); 18 | 19 | virtual int CompleteTask(Task* task) = 0; 20 | 21 | protected: 22 | virtual int Main(); 23 | virtual int Exit() = 0; 24 | virtual const char* FileExtension() = 0; 25 | 26 | int OpenFD(const char *path=NULL); 27 | int CloseFD(); 28 | int Write(const char* s, int tab = 0); 29 | int Write(string s, int tab = 0); 30 | 31 | const char* policy_str(int policy); 32 | 33 | private: 34 | int Flush(); 35 | 36 | protected: 37 | Platform* platform_; 38 | 39 | private: 40 | int fd_; 41 | char path_[1024]; 42 | char profiler_name_[64]; 43 | Message* msg_; 44 | }; 45 | 46 | } /* namespace rt */ 47 | } /* namespace iris */ 48 | 49 | 50 | #endif /*IRIS_SRC_RT_PROFILER_H */ 51 | -------------------------------------------------------------------------------- /src/runtime/Thread.cpp: -------------------------------------------------------------------------------- 1 | #include "Thread.h" 2 | #include "Debug.h" 3 | 4 | namespace iris { 5 | namespace rt { 6 | 7 | Thread::Thread() { 8 | thread_ = (pthread_t) NULL; 9 | running_ = false; 10 | sem_init(&sem_, 0, 0); 11 | } 12 | 13 | Thread::~Thread() { 14 | Stop(); 15 | sem_destroy(&sem_); 16 | } 17 | 18 | void Thread::Start() { 19 | if (thread_) return; 20 | running_ = true; 21 | pthread_create(&thread_, NULL, &Thread::ThreadFunc, this); 22 | } 23 | 24 | void Thread::StartWithOutThread() { 25 | set_running(true); 26 | Run(); 27 | } 28 | 29 | void Thread::Stop() { 30 | if (!thread_) return; 31 | running_ = false; 32 | Invoke(); 33 | pthread_join(thread_, NULL); 34 | thread_ = (pthread_t) NULL; 35 | } 36 | 37 | void Thread::Sleep() { 38 | sleeping_ = true; 39 | sem_wait(&sem_); 40 | sleeping_ = false; 41 | } 42 | 43 | void Thread::Invoke() { 44 | sem_post(&sem_); 45 | } 46 | 47 | void* Thread::ThreadFunc(void* argp) { 48 | ((Thread*) argp)->Run(); 49 | return NULL; 50 | } 51 | 52 | } /* namespace rt */ 53 | } /* namespace iris */ 54 | 55 | -------------------------------------------------------------------------------- /apps/vecadd/vecadd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | size_t SIZE; 7 | int *A, *B, *C; 8 | int ERROR = 0; 9 | 10 | SIZE = argc > 1 ? atol(argv[1]) : 16; 11 | printf("SIZE[%d]\n", SIZE); 12 | 13 | A = (int*) malloc(SIZE * sizeof(int)); 14 | B = (int*) malloc(SIZE * sizeof(int)); 15 | C = (int*) malloc(SIZE * sizeof(int)); 16 | 17 | for (int i = 0; i < SIZE; i++) { 18 | A[i] = i; 19 | B[i] = i; 20 | C[i] = 0; 21 | } 22 | 23 | #pragma acc parallel loop copyin(A[0:SIZE], B[0:SIZE]) device(gpu) 24 | #pragma omp target teams distribute parallel for map(to:A[0:SIZE], B[0:SIZE]) device(gpu) 25 | #pragma iris kernel h2d(A[0:SIZE], B[0:SIZE]) alloc(C[0:SIZE]) device(gpu) 26 | for (int i = 0; i < SIZE; i++) { 27 | C[i] = A[i] + B[i]; 28 | } 29 | 30 | for (int i = 0; i < SIZE; i++) { 31 | printf("C[%d] = %d\n", i, C[i]); 32 | if (C[i] != (A[i] + B[i])) ERROR++; 33 | } 34 | printf("ERROR[%d]\n", ERROR); 35 | 36 | free(A); 37 | free(B); 38 | free(C); 39 | 40 | return ERROR; 41 | } 42 | -------------------------------------------------------------------------------- /apps/benchmarking/memory-performance-scripts/run-membench-opencl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./setup.sh 4 | #ensure libiris.so is in the shared library path 5 | if [ ! -n "$IRIS_INSTALL_ROOT" ]; then 6 | IRIS_INSTALL_ROOT="$HOME/.iris" 7 | fi 8 | echo "ADDING $IRIS_INSTALL_ROOT/lib64 to LD_LIBRARY_PATH" 9 | export LD_LIBRARY_PATH=$IRIS_INSTALL_ROOT/lib64:$IRIS_INSTALL_ROOT/lib:$LD_LIBRARY_PATH 10 | export WORKING_DIR=`pwd` 11 | 12 | make clean 13 | make memory-performance-iris 14 | 15 | #exit if the last program run wasn't successful 16 | [ $? -ne 0 ] && exit 17 | 18 | #don't proceed if the target failed to build 19 | if ! [ -f memory-performance-iris ] ; then 20 | exit 21 | fi 22 | 23 | export RUNTIME=opencl 24 | export REPEATS=10 25 | # Final experiment: Lock the number of transfers and increase the buffer-size---starting from 1KiB onwards 26 | for SIZE in {1..25} 27 | do 28 | ((ELEMENTS=2**${SIZE})) 29 | echo ${ELEMENTS} 30 | echo ${KIB} 31 | IRIS_ARCHS=opencl ./memory-performance-iris ${ELEMENTS} ${REPEATS} 1000 membench-${RUNTIME}-${HOST}-${ELEMENTS}.csv 32 | done 33 | 34 | source ./setup.sh 35 | 36 | -------------------------------------------------------------------------------- /src/runtime/Polyhedral.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_SRC_RT_POLYHEDRAL_H 2 | #define IRIS_SRC_RT_POLYHEDRAL_H 3 | 4 | #include 5 | #include 6 | #include "Loader.h" 7 | 8 | namespace iris { 9 | namespace rt { 10 | 11 | class Polyhedral : public Loader { 12 | public: 13 | Polyhedral(); 14 | ~Polyhedral(); 15 | 16 | const char* library() { return "kernel.poly.so"; } 17 | 18 | int LoadFunctions(); 19 | 20 | int Kernel(const char* name); 21 | int SetArg(int idx, size_t size, void* value); 22 | int Launch(int dim, size_t* wgo, size_t* wgs, size_t* gws, size_t* lws); 23 | int GetMem(int idx, iris_poly_mem* plmem); 24 | 25 | private: 26 | int (*iris_poly_init)(); 27 | int (*iris_poly_finalize)(); 28 | int (*iris_poly_kernel)(const char* name); 29 | int (*iris_poly_setarg)(int idx, size_t size, void* value); 30 | int (*iris_poly_launch)(int dim, size_t* wgo, size_t* wgs, size_t* gws, size_t* lws); 31 | int (*iris_poly_getmem)(int idx, iris_poly_mem* plmem); 32 | }; 33 | 34 | } /* namespace rt */ 35 | } /* namespace iris */ 36 | 37 | #endif /* IRIS_SRC_RT_POLYHEDRAL_H */ 38 | -------------------------------------------------------------------------------- /.gitlab-ci-scripts/schema_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | errors=0 4 | total=0 5 | 6 | # Validate against record schema. 7 | echo "Tests directory: tests-${IRIS_ARCHS}-${IRIS_ASYNC}-${IRIS_TAG}-${IRIS_MACHINE}${IRIS_TESTNAME}" 8 | for f in tests-${IRIS_ARCHS}-${IRIS_ASYNC}-${IRIS_TAG}-${IRIS_MACHINE}${IRIS_TESTNAME}/**/output.json 9 | do 10 | echo python utils/validate_schema.py -i $f -s schema/record.schema.json 11 | python utils/validate_schema.py -i $f -s schema/record.schema.json 12 | es=$? 13 | if (( es > 0 )) 14 | then 15 | errors=$((errors+1)) 16 | fi 17 | total=$((total+1)) 18 | done 19 | 20 | # Validate against dagger schema. 21 | for f in tests-${IRIS_ARCHS}-${IRIS_ASYNC}-${IRIS_TAG}-${IRIS_MACHINE}${IRIS_TESTNAME}/**/*.json 22 | do 23 | echo python utils/validate_schema.py -i $f -s schema/dagger.schema.json 24 | python utils/validate_schema.py -i $f -s schema/dagger.schema.json 25 | es=$? 26 | if (( es > 0 )) 27 | then 28 | errors=$((errors+1)) 29 | fi 30 | total=$((total+1)) 31 | done 32 | 33 | echo Schema check: $((total-errors)) of $total tests passed. 34 | exit $errors 35 | -------------------------------------------------------------------------------- /apps/benchmarking/kernel.openmp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void saxpy(float* Z, float A, float* X, float* Y, IRIS_OPENMP_KERNEL_ARGS) { 4 | size_t _id; 5 | #pragma omp parallel for shared(Z, A, X, Y) private(_id) 6 | IRIS_OPENMP_KERNEL_BEGIN(_id) 7 | Z[_id] = A * X[_id] + Y[_id]; 8 | IRIS_OPENMP_KERNEL_END 9 | } 10 | 11 | static void ijk(double* C, double* A, double* B, IRIS_OPENMP_KERNEL_ARGS) { 12 | size_t _id; 13 | #pragma omp parallel for shared(C, A, B) private(_id) 14 | IRIS_OPENMP_KERNEL_BEGIN(_id) 15 | size_t SIZE = _bws[0]; 16 | size_t j, k; 17 | for (size_t j = 0; j < SIZE; j++) { 18 | double sum = 0.0; 19 | for (size_t k = 0; k < SIZE; k++) { 20 | sum += A[_id * SIZE + k] * B[k * SIZE + j]; 21 | } 22 | C[_id * SIZE + j] = sum; 23 | } 24 | IRIS_OPENMP_KERNEL_END 25 | } 26 | 27 | static void nothing(int* A, IRIS_OPENMP_KERNEL_ARGS) { 28 | } 29 | 30 | static void add_id(int* A, IRIS_OPENMP_KERNEL_ARGS) { 31 | size_t i; 32 | #pragma omp parallel for shared(A) private(i) 33 | IRIS_OPENMP_KERNEL_BEGIN(i) 34 | A[i] = A[i] + i; 35 | IRIS_OPENMP_KERNEL_END 36 | } 37 | 38 | -------------------------------------------------------------------------------- /apps/benchmarking/memory-performance-scripts/run-membench-hip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./setup.sh 4 | #ensure libiris.so is in the shared library path 5 | if [ ! -n "$IRIS_INSTALL_ROOT" ]; then 6 | IRIS_INSTALL_ROOT="$HOME/.iris" 7 | fi 8 | echo "ADDING $IRIS_INSTALL_ROOT/lib64 to LD_LIBRARY_PATH" 9 | export LD_LIBRARY_PATH=$IRIS_INSTALL_ROOT/lib64:$IRIS_INSTALL_ROOT/lib:$LD_LIBRARY_PATH 10 | export WORKING_DIR=`pwd` 11 | 12 | make clean 13 | make memory-performance-iris kernel.hip 14 | 15 | #exit if the last program run wasn't successful 16 | [ $? -ne 0 ] && exit 17 | 18 | #don't proceed if the target failed to build 19 | if ! [ -f memory-performance-iris ] || ! [ -f kernel.hip ] ; then 20 | exit 21 | fi 22 | 23 | export RUNTIME=hip 24 | export REPEATS=25 25 | # Final experiment: Lock the number of transfers and increase the buffer-size---starting from 1KiB onwards 26 | for SIZE in {1..26} 27 | do 28 | ((ELEMENTS=2**${SIZE})) 29 | echo ${ELEMENTS} 30 | echo ${KIB} 31 | IRIS_ARCHS=hip ./memory-performance-iris ${ELEMENTS} ${REPEATS} 1000 membench-${RUNTIME}-${HOST}-${ELEMENTS}.csv 32 | done 33 | 34 | source ./setup.sh 35 | 36 | -------------------------------------------------------------------------------- /apps/dagger/benchmark-systems.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | WD=`pwd` 3 | #reset the experiment (including regenerate the dagger payloads) 4 | rm -fr dagger-figures dagger-payloads dagger-results dagger-graphs 5 | 6 | #echo "Running on explorer..." 7 | ssh -l 9bj explorer "cd $WD && ./run-policy-evaluation.sh" 8 | [ $? -ne 0 ] && echo "FAILED on explorer!" && exit 1 9 | 10 | echo "Running on radeon..." 11 | ssh -l 9bj radeon "cd $WD && ./run-policy-evaluation.sh" 12 | [ $? -ne 0 ] && echo "FAILED on radeon!" && exit 1 13 | 14 | echo "Running on equinox..." 15 | ssh -l 9bj equinox "cd $WD && ./run-policy-evaluation.sh" 16 | [ $? -ne 1 ] && echo "FAILED on equinox!" && exit 1 17 | 18 | echo "Running on leconte..." 19 | ssh -l 9bj leconte "cd $WD && ./run-policy-evaluation.sh" 20 | [ $? -ne 0 ] && echo "FAILED on leconte!" && exit 1 21 | 22 | echo "Running on oswald00..." 23 | ssh -l 9bj oswald00 "cd $WD && ./run-policy-evaluation.sh" 24 | [ $? -ne 0 ] && echo "FAILED on oswald00!" && exit 1 25 | 26 | echo "Running on zenith..." 27 | ssh -l 9bj zenith "cd $WD && ./run-policy-evaluation.sh" 28 | [ $? -ne 0 ] && echo "FAILED on zenith!" && exit 1 29 | 30 | -------------------------------------------------------------------------------- /src/runtime/LICENSE.jsmn: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Serge A. Zaitsev 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /tests/21_task_malloc/test21_task_malloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, 1); 7 | 8 | size_t SIZE, nbytes; 9 | double *A, *B, *C; 10 | 11 | SIZE = argc > 1 ? atol(argv[1]) : 8; 12 | 13 | nbytes = SIZE * sizeof(double); 14 | 15 | printf("[%s:%d] SIZE[%lu]\n", __FILE__, __LINE__, SIZE); 16 | 17 | iris_mem memA, memB, memC; 18 | iris_mem_create(nbytes, &memA); 19 | iris_mem_create(nbytes, &memB); 20 | iris_mem_create(nbytes, &memC); 21 | 22 | iris_task task; 23 | iris_task_create(&task); 24 | iris_task_malloc(task, memA); 25 | iris_task_malloc(task, memB); 26 | iris_task_malloc(task, memC); 27 | iris_task_submit(task, 0, NULL, 1); 28 | 29 | iris_mem_arch(memA, 0, (void**) &A); 30 | iris_mem_arch(memB, 0, (void**) &B); 31 | iris_mem_arch(memC, 0, (void**) &C); 32 | 33 | printf("[%s:%d] A[%p] B[%p] C[%p]\n", __FILE__, __LINE__, A, B, C); 34 | 35 | iris_mem_release(memA); 36 | iris_mem_release(memB); 37 | iris_mem_release(memC); 38 | 39 | iris_finalize(); 40 | 41 | return iris_error_count(); 42 | } 43 | -------------------------------------------------------------------------------- /apps/benchmarking/memory-performance-scripts/run-membench-openmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./setup.sh 4 | #ensure libiris.so is in the shared library path 5 | if [ ! -n "$IRIS_INSTALL_ROOT" ]; then 6 | IRIS_INSTALL_ROOT="$HOME/.iris" 7 | fi 8 | echo "ADDING $IRIS_INSTALL_ROOT/lib64 to LD_LIBRARY_PATH" 9 | export LD_LIBRARY_PATH=$IRIS_INSTALL_ROOT/lib64:$IRIS_INSTALL_ROOT/lib:$LD_LIBRARY_PATH 10 | export WORKING_DIR=`pwd` 11 | 12 | make clean 13 | make memory-performance-iris kernel.openmp.so 14 | 15 | #exit if the last program run wasn't successful 16 | [ $? -ne 0 ] && exit 17 | 18 | #don't proceed if the target failed to build 19 | if ! [ -f memory-performance-iris ] || ! [ -f kernel.openmp.so ] ; then 20 | exit 21 | fi 22 | 23 | export RUNTIME=openmp 24 | export REPEATS=10 25 | # Final experiment: Lock the number of transfers and increase the buffer-size---starting from 1KiB onwards 26 | for SIZE in {1..25} 27 | do 28 | ((ELEMENTS=2**${SIZE})) 29 | echo ${ELEMENTS} 30 | echo ${KIB} 31 | IRIS_ARCHS=openmp ./memory-performance-iris ${ELEMENTS} ${REPEATS} 1000 membench-${RUNTIME}-${HOST}-${ELEMENTS}.csv 32 | done 33 | 34 | source ./setup.sh 35 | 36 | -------------------------------------------------------------------------------- /include/iris/hip/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2020 Advanced Micro Devices, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /include/iris/level_zero/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Intel Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/runtime/Polyhedral.cpp: -------------------------------------------------------------------------------- 1 | #include "Polyhedral.h" 2 | #include "Debug.h" 3 | #include "Loader.h" 4 | #include 5 | 6 | namespace iris { 7 | namespace rt { 8 | 9 | Polyhedral::Polyhedral() { 10 | } 11 | 12 | Polyhedral::~Polyhedral() { 13 | if (handle_) iris_poly_finalize(); 14 | } 15 | 16 | int Polyhedral::LoadFunctions() { 17 | LOADFUNC(iris_poly_init); 18 | LOADFUNC(iris_poly_finalize); 19 | LOADFUNC(iris_poly_kernel); 20 | LOADFUNC(iris_poly_setarg); 21 | LOADFUNC(iris_poly_launch); 22 | LOADFUNC(iris_poly_getmem); 23 | 24 | iris_poly_init(); 25 | 26 | return IRIS_SUCCESS; 27 | } 28 | 29 | int Polyhedral::Kernel(const char* name) { 30 | return iris_poly_kernel(name); 31 | } 32 | 33 | int Polyhedral::SetArg(int idx, size_t size, void* value) { 34 | return iris_poly_setarg(idx, size, value); 35 | } 36 | 37 | int Polyhedral::Launch(int dim, size_t* wgo, size_t* wgs, size_t* gws, size_t* lws) { 38 | return iris_poly_launch(dim, wgo, wgs, gws, lws); 39 | } 40 | 41 | int Polyhedral::GetMem(int idx, iris_poly_mem* plmem) { 42 | return iris_poly_getmem(idx, plmem); 43 | } 44 | 45 | } /* namespace rt */ 46 | } /* namespace iris */ 47 | 48 | -------------------------------------------------------------------------------- /tests/Makefile.tests: -------------------------------------------------------------------------------- 1 | SHELL := bash 2 | IRIS ?= $(HOME)/.iris 3 | CC := gcc 4 | CXX := g++ 5 | INCLUDE += -I$(IRIS)/include 6 | CFLAGS += ${INCLUDE} -g -std=gnu99 ${EXT_INCLUDE} 7 | CXXFLAGS += ${INCLUDE} -g -std=c++11 ${EXT_INCLUDE} 8 | LDFLAGS += -liris -lpthread -ldl ${EXT_LDFLAGS} 9 | LDINC += -L${IRIS}/lib -L${IRIS}/lib64 10 | NVCC ?= $(CUDA_PATH)/bin/nvcc 11 | HIPCC ?= $(ROCM_PATH)/bin/hipcc 12 | NVCC_TEST := $(shell which $(NVCC)) 13 | HIPCC_TEST := $(shell which $(HIPCC)) 14 | ifeq (1, ${CPP}) 15 | $(TEST):$(TEST).cpp 16 | rm -f $(TEST) 17 | $(CXX) $(CXXFLAGS) -o $@ $^ $(LDINC) $(LDFLAGS) 18 | else 19 | $(TEST):$(TEST).c 20 | rm -f $(TEST) 21 | $(CC) $(CFLAGS) -o $@ $^ $(LDINC) $(LDFLAGS) -Dfalse=0 -Dtrue=1 22 | endif 23 | 24 | ifeq ($(NVCC_TEST),) 25 | kernel.ptx: kernel.cu 26 | @echo "No NVCC compiler found" 27 | else 28 | kernel.ptx: kernel.cu 29 | $(NVCC) -ptx $^ 30 | endif 31 | 32 | ifeq ($(HIPCC_TEST),) 33 | kernel.hip: kernel.hip.cpp 34 | @echo "No HIPCC compiler found" 35 | else 36 | kernel.hip: kernel.hip.cpp 37 | $(HIPCC) --genco -o $@ $^ 38 | endif 39 | 40 | clean: 41 | rm -f $(TEST) kernel.ptx kernel.hip kernel.openmp.so 42 | -------------------------------------------------------------------------------- /apps/qiree_backend/qiree_task.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | iris_init(&argc, &argv, 1); 8 | 9 | size_t SIZE = 0; 10 | int TARGET; 11 | int VERBOSE; 12 | 13 | SIZE = argc > 1 ? atol(argv[1]) : 8; 14 | TARGET = argc > 2 ? atol(argv[2]) : 0; 15 | VERBOSE = argc > 3 ? atol(argv[3]) : 1; 16 | 17 | printf("[%s:%d] SIZE[%zu] TARGET[%d] VERBOSE[%d]\n", __FILE__, __LINE__, SIZE, TARGET, VERBOSE); 18 | 19 | //iris_graph graph; 20 | //iris_graph_create(&graph); 21 | 22 | char tn[128]; 23 | sprintf(tn, "qiree_task", NULL); 24 | 25 | iris_task task0; 26 | iris_task_create_name(tn, &task0); 27 | //void* params0[] = {}; 28 | //int pinfo0[] = {}; 29 | iris_task_kernel(task0, "bell.ll", 1, NULL, &SIZE, NULL, 0, NULL, NULL); 30 | //iris_task_kernel(task0, "saxpy", 1, NULL, &SIZE, NULL, 4, params0, pinfo0); 31 | //iris_task_dmem_flush_out(task0, mem_Z); 32 | //iris_graph_task(graph, task0, iris_any, NULL); 33 | iris_task_submit(task0, iris_cpu, NULL, 1); 34 | 35 | iris_synchronize(); 36 | 37 | iris_finalize(); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /apps/vecadd/plot_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -rf figures ; mkdir figures 3 | python plot_results.py iris_openmp "IRIS OpenMP" 4 | python plot_results.py iris_cuda "IRIS CUDA" 5 | python plot_results.py iris_hip "IRIS HIP" 6 | python plot_results.py dpc++_cuda "DPC++ CUDA" 7 | python plot_results.py charmsycl_hip "Charm-SYCL IRIS HIP" 8 | python plot_results.py charmsycl_hip_directly "Charm-SYCL (Internal) HIP" 9 | python plot_results.py charmsycl_cuda "Charm-SYCL IRIS CUDA" 10 | python plot_results.py charmsycl_cuda_directly "Charm-SYCL (Internal) CUDA" 11 | python plot_results.py charmsycl_openmp "Charm-SYCL IRIS OpenMP" 12 | python plot_results.py charmsycl_openmp_directly "Charm-SYCL (Internal) OpenMP" 13 | python plot_results.py opensycl_openmp "OpenSYCL OpenMP" 14 | pdfunite figures/iris_cuda.pdf figures/charmsycl_cuda.pdf figures/charmsycl_cuda_directly.pdf figures/dpc++_cuda.pdf figures/cuda_comparison.pdf 15 | pdfunite figures/iris_openmp.pdf figures/charmsycl_openmp.pdf figures/charmsycl_openmp_directly.pdf figures/opensycl_openmp.pdf figures/openmp_comparison.pdf 16 | pdfunite figures/iris_hip.pdf figures/charmsycl_hip.pdf figures/charmsycl_hip_directly.pdf figures/hip_comparison.pdf 17 | -------------------------------------------------------------------------------- /tests/38_offset/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 4 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 5 | C[i] = A[i] + B[i]; 6 | } 7 | extern "C" __global__ void vecadd_with_offsets(int* A, int* B, int* C, size_t blockOff_x, size_t blockOff_y, size_t blockOff_z) { 8 | size_t id = blockOff_x + blockIdx.x * blockDim.x + threadIdx.x; 9 | //size_t id = (blockOff_x + blockIdx.x) * blockDim.x + threadIdx.x; 10 | //int id = blockIdx.x * blockDim.x + threadIdx.x; 11 | C[id] = A[id] + B[id]; 12 | } 13 | extern "C" __global__ void blockadd(int* A, int* B, int* C, size_t SIZE) { 14 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 15 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 16 | C[j * SIZE + i] = A[j * SIZE + i] + B[j * SIZE + i]; 17 | } 18 | extern "C" __global__ void blockadd_with_offsets(int* A, int* B, int* C, size_t SIZE, size_t blockOff_x, size_t blockOff_y, size_t blockOff_z) { 19 | size_t i = blockOff_x + blockIdx.x * blockDim.x + threadIdx.x; 20 | size_t j = blockOff_y + blockIdx.y * blockDim.y + threadIdx.y; 21 | C[j * SIZE + i] = A[j * SIZE + i] + B[j * SIZE + i]; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /tests/38_offset_subbuffer/kernel.hip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void vecadd(int* A, int* B, int* C) { 4 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 5 | C[i] = A[i] + B[i]; 6 | } 7 | extern "C" __global__ void vecadd_with_offsets(int* A, int* B, int* C, size_t blockOff_x, size_t blockOff_y, size_t blockOff_z) { 8 | size_t id = blockOff_x + blockIdx.x * blockDim.x + threadIdx.x; 9 | //size_t id = (blockOff_x + blockIdx.x) * blockDim.x + threadIdx.x; 10 | //int id = blockIdx.x * blockDim.x + threadIdx.x; 11 | C[id] = A[id] + B[id]; 12 | } 13 | extern "C" __global__ void blockadd(int* A, int* B, int* C, size_t SIZE) { 14 | size_t i = blockIdx.x * blockDim.x + threadIdx.x; 15 | size_t j = blockIdx.y * blockDim.y + threadIdx.y; 16 | C[j * SIZE + i] = A[j * SIZE + i] + B[j * SIZE + i]; 17 | } 18 | extern "C" __global__ void blockadd_with_offsets(int* A, int* B, int* C, size_t SIZE, size_t blockOff_x, size_t blockOff_y, size_t blockOff_z) { 19 | size_t i = blockOff_x + blockIdx.x * blockDim.x + threadIdx.x; 20 | size_t j = blockOff_y + blockIdx.y * blockDim.y + threadIdx.y; 21 | C[j * SIZE + i] = A[j * SIZE + i] + B[j * SIZE + i]; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /apps/qiree_backend/test/test_quiree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include // For dlopen, dlsym, dlclose 3 | 4 | int main() { 5 | // Path to the shared library 6 | const char *lib_path = "libqir.xacc.lib.so"; 7 | 8 | // Open the shared library 9 | void *handle = dlopen(lib_path, RTLD_LAZY); 10 | if (!handle) { 11 | fprintf(stderr, "Failed to open library: %s\n", dlerror()); 12 | return 1; 13 | } 14 | 15 | // Clear any existing errors 16 | dlerror(); 17 | 18 | // Get a pointer to the function 19 | void (*parse_input_c)(int, char **); 20 | *(void **)(&parse_input_c) = dlsym(handle, "parse_input_c"); 21 | 22 | // Check for errors 23 | const char *error = dlerror(); 24 | if (error != NULL) { 25 | fprintf(stderr, "Failed to find symbol: %s\n", error); 26 | dlclose(handle); 27 | return 1; 28 | } 29 | char* argv[4]; 30 | argv[0] = "null"; 31 | argv[1] = "bell.ll"; 32 | argv[2] = "-a"; 33 | argv[3] = "qpp"; 34 | // Call the function with an argument 35 | parse_input_c(4, (char**)argv); 36 | 37 | // Close the library 38 | dlclose(handle); 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /apps/benchmarking/compute-performance-scripts/run-dgemm-openmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./setup.sh 4 | #ensure libiris.so is in the shared library path 5 | if [ ! -n "$IRIS_INSTALL_ROOT" ]; then 6 | IRIS_INSTALL_ROOT="$HOME/.iris" 7 | fi 8 | echo "ADDING $IRIS_INSTALL_ROOT/lib64 to LD_LIBRARY_PATH" 9 | export LD_LIBRARY_PATH=$IRIS_INSTALL_ROOT/lib64:$IRIS_INSTALL_ROOT/lib:$LD_LIBRARY_PATH 10 | export WORKING_DIR=`pwd` 11 | 12 | make clean 13 | make compute-performance-iris kernel.openmp.so 14 | 15 | #exit if the last program run wasn't successful 16 | [ $? -ne 0 ] && exit 17 | 18 | #don't proceed if the target failed to build 19 | if ! [ -f compute-performance-iris ] || ! [ -f kernel.openmp.so ] ; then 20 | exit 21 | fi 22 | 23 | # 24 | #run openmp baseline to see FLOP scaling over increasing device count 25 | REPEATS=1 26 | #for num_devices in {6..6} 27 | #REPEATS=100 28 | for num_devices in {1..13} 29 | do 30 | IRIS_ARCHS=openmp ./compute-performance-iris 4096 0 ${num_devices} ${REPEATS} dgemm-iris-openmp-${HOST}-${num_devices}.csv 31 | done 32 | 33 | source ./setup.sh 34 | 35 | -------------------------------------------------------------------------------- /apps/helloworld/kernel.ptx: -------------------------------------------------------------------------------- 1 | // 2 | // Generated by NVIDIA NVVM Compiler 3 | // 4 | // Compiler Build ID: CL-28540450 5 | // Cuda compilation tools, release 11.0, V11.0.194 6 | // Based on LLVM 3.4svn 7 | // 8 | 9 | .version 7.0 10 | .target sm_52 11 | .address_size 64 12 | 13 | // .globl uppercase 14 | 15 | .visible .entry uppercase( 16 | .param .u64 uppercase_param_0, 17 | .param .u64 uppercase_param_1 18 | ) 19 | { 20 | .reg .pred %p<2>; 21 | .reg .b16 %rs<6>; 22 | .reg .b32 %r<7>; 23 | .reg .b64 %rd<8>; 24 | 25 | 26 | ld.param.u64 %rd1, [uppercase_param_0]; 27 | ld.param.u64 %rd2, [uppercase_param_1]; 28 | cvta.to.global.u64 %rd3, %rd1; 29 | cvta.to.global.u64 %rd4, %rd2; 30 | mov.u32 %r1, %ctaid.x; 31 | mov.u32 %r2, %ntid.x; 32 | mov.u32 %r3, %tid.x; 33 | mad.lo.s32 %r4, %r2, %r1, %r3; 34 | cvt.s64.s32 %rd5, %r4; 35 | add.s64 %rd6, %rd4, %rd5; 36 | ld.global.u8 %rs1, [%rd6]; 37 | add.s16 %rs2, %rs1, -97; 38 | and.b16 %rs3, %rs2, 255; 39 | setp.lt.u16 %p1, %rs3, 26; 40 | add.s64 %rd7, %rd3, %rd5; 41 | cvt.u32.u16 %r5, %rs1; 42 | add.s32 %r6, %r5, 224; 43 | cvt.u16.u32 %rs4, %r6; 44 | selp.b16 %rs5, %rs4, %rs1, %p1; 45 | st.global.u8 [%rd7], %rs5; 46 | ret; 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /include/iris/iris_host2hip.h: -------------------------------------------------------------------------------- 1 | #ifndef IRIS_INCLUDE_IRIS_HOST2HIP_H 2 | #define IRIS_INCLUDE_IRIS_HOST2HIP_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define IRIS_HOST2HIP_KERNEL_ARGS size_t _off, size_t _ndr 11 | #define IRIS_HOST2HIP_KERNEL_BEGIN(i) for (i = _off; i < _off + _ndr; i++) { 12 | #define IRIS_HOST2HIP_KERNEL_END } 13 | 14 | #define __kernel 15 | #define __global 16 | #define __constant 17 | #define __local 18 | #define __restrict 19 | 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | static pthread_mutex_t iris_host2hip_mutex; 26 | static int iris_host2hip_kernel_idx; 27 | 28 | void iris_host2hip_init() { 29 | pthread_mutex_init(&iris_host2hip_mutex, NULL); 30 | } 31 | 32 | void iris_host2hip_finalize() { 33 | pthread_mutex_destroy(&iris_host2hip_mutex); 34 | } 35 | 36 | static void iris_host2hip_lock() { 37 | pthread_mutex_lock(&iris_host2hip_mutex); 38 | } 39 | 40 | static void iris_host2hip_unlock() { 41 | pthread_mutex_unlock(&iris_host2hip_mutex); 42 | } 43 | 44 | #ifdef __cplusplus 45 | } /* end of extern "C" */ 46 | #endif 47 | 48 | #endif /* IRIS_INCLUDE_IRIS_HOST2HIP_H */ 49 | 50 | -------------------------------------------------------------------------------- /tests/02_task_depend/test02_task_depend.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, true); 7 | 8 | iris_task task2; 9 | iris_task_create_name("task2", &task2); 10 | iris_task_submit(task2, iris_cpu, NULL, false); 11 | 12 | iris_task task3; 13 | iris_task_create_name("task3", &task3); 14 | iris_task_submit(task3, iris_gpu, NULL, false); 15 | 16 | iris_task task4; 17 | iris_task task4_dep[] = { task3 }; 18 | iris_task_create_name("task4", &task4); 19 | iris_task_depend(task4, 1, task4_dep); 20 | iris_task_submit(task4, iris_cpu, NULL, false); 21 | 22 | iris_task task5; 23 | iris_task task5_dep[] = { task2, task4 }; 24 | iris_task_create_name("task5", &task5); 25 | iris_task_depend(task5, 2, task5_dep); 26 | iris_task_submit(task5, iris_gpu, NULL, false); 27 | 28 | iris_task task6; 29 | iris_task task6_dep[] = { task2 }; 30 | iris_task_create_name("task6", &task6); 31 | iris_task_depend(task6, 1, task6_dep); 32 | iris_task_submit(task6, iris_cpu, NULL, false); 33 | 34 | iris_finalize(); 35 | 36 | int errors = iris_error_count(); 37 | printf("Errors:%d\n", errors); 38 | return errors; 39 | } 40 | -------------------------------------------------------------------------------- /tests/17_json/test17_json.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char** argv) { 6 | iris_init(&argc, &argv, true); 7 | 8 | size_t SIZE, SIZECB; 9 | int *A, *B; 10 | int target = iris_default; 11 | 12 | SIZE = argc > 1 ? atol(argv[1]) : 8; 13 | SIZECB = SIZE * sizeof(int); 14 | 15 | printf("[%s:%d] SIZE[%lu]\n", __FILE__, __LINE__, SIZE); 16 | 17 | A = (int*) malloc(SIZE * sizeof(int)); 18 | B = (int*) malloc(SIZE * sizeof(int)); 19 | for (int i = 0; i < SIZE; i++) B[i] = 0; 20 | iris_mem mem; 21 | iris_mem_create(SIZE * sizeof(int), &mem); 22 | 23 | void* json_inputs[6] = { &SIZE, &SIZECB, B, &mem, &target }; 24 | 25 | iris_graph graph; 26 | iris_graph_create_json("graph.json", json_inputs, &graph); 27 | 28 | for (int i = 0; i < SIZE; i++) A[i] = i; 29 | iris_graph_submit(graph, iris_gpu, true); 30 | iris_synchronize(); 31 | int errs = 0; 32 | for (int i = 0; i < SIZE; i++) { 33 | printf("------------[%3d] %3d %3d\n", i, A[i], B[i]); 34 | if (A[i] != B[i]) errs++; 35 | } 36 | iris_finalize(); 37 | 38 | printf("return code = %i value_errors:%d\n",iris_error_count()+errs, errs); 39 | 40 | return iris_error_count()+errs; 41 | } 42 | 43 | -------------------------------------------------------------------------------- /include/iris/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install(FILES 2 | default_cpu_gpu_kernels.cpp 3 | iris.h 4 | iris.hpp 5 | iris_errno.h 6 | iris_hexagon.h 7 | iris_hexagon_imp.h 8 | iris_llvm.h 9 | iris_openmp.h 10 | iris_poly.h 11 | iris_poly_types.h 12 | iris_runtime.h 13 | iris_host2opencl.h 14 | iris_host2cuda.h 15 | iris_host2hip.h 16 | gettime.h 17 | verify.h 18 | iris_macros.h 19 | Tiling1D.h 20 | Tiling2D.h 21 | Tiling3D.h 22 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris) 23 | 24 | install(FILES 25 | CL/cl.h 26 | CL/cl_platform.h 27 | CL/cl_version.h 28 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris/CL) 29 | 30 | install(FILES 31 | cuda/cuda.h 32 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris/cuda) 33 | 34 | install(FILES 35 | hexagon/rpcmem.h 36 | hexagon/AEEStdDef.h 37 | hexagon/stub.h 38 | hexagon/stub_imp.h 39 | hexagon/hvx_util.h 40 | hexagon/q6cache.h 41 | hexagon/std_kernels.h 42 | hexagon/iris_interface.h 43 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris/hexagon) 44 | 45 | install(FILES 46 | hip/hip_runtime.h 47 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris/hip) 48 | 49 | install(FILES 50 | level_zero/ze_api.h 51 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/iris/level_zero) 52 | 53 | --------------------------------------------------------------------------------